diff --git a/examples/persistent-context/persistent-context-anthropic.py b/examples/persistent-context/persistent-context-anthropic.py
index c401becb2..791f00440 100644
--- a/examples/persistent-context/persistent-context-anthropic.py
+++ b/examples/persistent-context/persistent-context-anthropic.py
@@ -84,7 +84,7 @@ async def load_conversation(params: FunctionCallParams):
filename = params.arguments["filename"]
logger.debug(f"loading conversation from {filename}")
try:
- with open(filename, "r") as file:
+ with open(filename) as file:
params.context.set_messages(json.load(file))
logger.debug(
f"loaded conversation from {filename}\n{json.dumps(params.context.get_messages(), indent=4)}"
diff --git a/examples/persistent-context/persistent-context-aws-nova-sonic.py b/examples/persistent-context/persistent-context-aws-nova-sonic.py
index e4dc635bf..62b9e7eab 100644
--- a/examples/persistent-context/persistent-context-aws-nova-sonic.py
+++ b/examples/persistent-context/persistent-context-aws-nova-sonic.py
@@ -105,7 +105,7 @@ async def load_conversation(params: FunctionCallParams):
filename = params.arguments["filename"]
logger.debug(f"loading conversation from {filename}")
try:
- with open(filename, "r") as file:
+ with open(filename) as file:
messages = json.load(file)
# HACK: if using the older Nova Sonic (pre-2) model, you need a special way of
# triggering the first assistant response. The call to trigger_assistant_response(),
diff --git a/examples/persistent-context/persistent-context-gemini.py b/examples/persistent-context/persistent-context-gemini.py
index d236a15b1..2ec3cd0bb 100644
--- a/examples/persistent-context/persistent-context-gemini.py
+++ b/examples/persistent-context/persistent-context-gemini.py
@@ -110,7 +110,7 @@ async def load_conversation(params: FunctionCallParams):
filename = params.arguments["filename"]
logger.debug(f"loading conversation from {filename}")
try:
- with open(filename, "r") as file:
+ with open(filename) as file:
params.context.set_messages(json.load(file))
await params.result_callback(
{
diff --git a/examples/persistent-context/persistent-context-grok-realtime.py b/examples/persistent-context/persistent-context-grok-realtime.py
index 9942550e6..4a990d24e 100644
--- a/examples/persistent-context/persistent-context-grok-realtime.py
+++ b/examples/persistent-context/persistent-context-grok-realtime.py
@@ -94,7 +94,7 @@ async def load_conversation(params: FunctionCallParams):
filename = params.arguments["filename"]
logger.debug(f"loading conversation from {filename}")
try:
- with open(filename, "r") as file:
+ with open(filename) as file:
params.context.set_messages(json.load(file))
await params.llm.reset_conversation()
# Manually create a response since we've reset the conversation
diff --git a/examples/persistent-context/persistent-context-openai-realtime.py b/examples/persistent-context/persistent-context-openai-realtime.py
index ad27161bb..0a7a888a8 100644
--- a/examples/persistent-context/persistent-context-openai-realtime.py
+++ b/examples/persistent-context/persistent-context-openai-realtime.py
@@ -91,7 +91,7 @@ async def load_conversation(params: FunctionCallParams):
filename = params.arguments["filename"]
logger.debug(f"loading conversation from {filename}")
try:
- with open(filename, "r") as file:
+ with open(filename) as file:
params.context.set_messages(json.load(file))
await params.llm.reset_conversation()
# NOTE: we manually create a response here rather than relying
diff --git a/examples/persistent-context/persistent-context-openai-responses-http.py b/examples/persistent-context/persistent-context-openai-responses-http.py
index 0f13eda18..d77513049 100644
--- a/examples/persistent-context/persistent-context-openai-responses-http.py
+++ b/examples/persistent-context/persistent-context-openai-responses-http.py
@@ -85,7 +85,7 @@ async def load_conversation(params: FunctionCallParams):
filename = params.arguments["filename"]
logger.debug(f"loading conversation from {filename}")
try:
- with open(filename, "r") as file:
+ with open(filename) as file:
params.context.set_messages(json.load(file))
logger.debug(
f"loaded conversation from {filename}\n{json.dumps(params.context.get_messages(), indent=4)}"
diff --git a/examples/persistent-context/persistent-context-openai-responses.py b/examples/persistent-context/persistent-context-openai-responses.py
index 5fd9c7657..c89fe3ff1 100644
--- a/examples/persistent-context/persistent-context-openai-responses.py
+++ b/examples/persistent-context/persistent-context-openai-responses.py
@@ -85,7 +85,7 @@ async def load_conversation(params: FunctionCallParams):
filename = params.arguments["filename"]
logger.debug(f"loading conversation from {filename}")
try:
- with open(filename, "r") as file:
+ with open(filename) as file:
params.context.set_messages(json.load(file))
logger.debug(
f"loaded conversation from {filename}\n{json.dumps(params.context.get_messages(), indent=4)}"
diff --git a/examples/persistent-context/persistent-context-openai.py b/examples/persistent-context/persistent-context-openai.py
index 7f744fd46..22e160102 100644
--- a/examples/persistent-context/persistent-context-openai.py
+++ b/examples/persistent-context/persistent-context-openai.py
@@ -85,7 +85,7 @@ async def load_conversation(params: FunctionCallParams):
filename = params.arguments["filename"]
logger.debug(f"loading conversation from {filename}")
try:
- with open(filename, "r") as file:
+ with open(filename) as file:
params.context.set_messages(json.load(file))
logger.debug(
f"loaded conversation from {filename}\n{json.dumps(params.context.get_messages(), indent=4)}"
diff --git a/examples/rag/rag-gemini.py b/examples/rag/rag-gemini.py
index 5380f98d8..0f9a8ddb2 100644
--- a/examples/rag/rag-gemini.py
+++ b/examples/rag/rag-gemini.py
@@ -87,7 +87,7 @@ def get_rag_content():
"""Get the RAG content from the file."""
script_dir = os.path.dirname(os.path.abspath(__file__))
rag_content_path = os.path.join(script_dir, "assets", "rag-content.txt")
- with open(rag_content_path, "r") as f:
+ with open(rag_content_path) as f:
return f.read()
diff --git a/examples/transports/transports-small-webrtc.py b/examples/transports/transports-small-webrtc.py
index 286b1143a..18403940b 100644
--- a/examples/transports/transports-small-webrtc.py
+++ b/examples/transports/transports-small-webrtc.py
@@ -8,7 +8,6 @@ import argparse
import asyncio
import os
from contextlib import asynccontextmanager
-from typing import Dict
import uvicorn
from dotenv import load_dotenv
@@ -39,7 +38,7 @@ load_dotenv(override=True)
app = FastAPI()
# Store connections by pc_id
-pcs_map: Dict[str, SmallWebRTCConnection] = {}
+pcs_map: dict[str, SmallWebRTCConnection] = {}
ice_servers = [
IceServer(
diff --git a/examples/turn-management/turn-management-user-assistant-turns.py b/examples/turn-management/turn-management-user-assistant-turns.py
index afc45423b..380c4866d 100644
--- a/examples/turn-management/turn-management-user-assistant-turns.py
+++ b/examples/turn-management/turn-management-user-assistant-turns.py
@@ -45,13 +45,13 @@ class TranscriptHandler:
output_file: Optional path to file where transcript is saved. If None, outputs to log only.
"""
- def __init__(self, output_file: Optional[str] = None):
+ def __init__(self, output_file: str | None = None):
"""Initialize handler with optional file output.
Args:
output_file: Path to output file. If None, outputs to log only.
"""
- self.output_file: Optional[str] = output_file
+ self.output_file: str | None = output_file
logger.debug(
f"TranscriptHandler initialized {'with output_file=' + output_file if output_file else 'with log output only'}"
)
diff --git a/scripts/evals/eval.py b/scripts/evals/eval.py
index fc9997318..cf7ecf257 100644
--- a/scripts/evals/eval.py
+++ b/scripts/evals/eval.py
@@ -13,7 +13,7 @@ import wave
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
-from typing import Any, List, Optional, Tuple
+from typing import Any
import aiofiles
from loguru import logger
@@ -60,7 +60,7 @@ PIPELINE_IDLE_TIMEOUT_SECS = 60
EVAL_TIMEOUT_SECS = 120
EVAL_RESULT_TIMEOUT_SECS = 10
-EvalPrompt = str | Tuple[str, ImageFile]
+EvalPrompt = str | tuple[str, ImageFile]
@dataclass
@@ -68,7 +68,7 @@ class EvalConfig:
prompt: EvalPrompt
eval: str
eval_speaks_first: bool = False
- runner_args_body: Optional[Any] = None
+ runner_args_body: Any | None = None
class EvalRunner:
@@ -78,7 +78,7 @@ class EvalRunner:
examples_dir: Path,
pattern: str = "",
record_audio: bool = False,
- name: Optional[str] = None,
+ name: str | None = None,
log_level: str = "DEBUG",
):
self._examples_dir = examples_dir
@@ -86,8 +86,8 @@ class EvalRunner:
self._record_audio = record_audio
self._log_level = log_level
self._total_success = 0
- self._tests: List[EvalResult] = []
- self._result_future: Optional[asyncio.Future[bool]] = None
+ self._tests: list[EvalResult] = []
+ self._result_future: asyncio.Future[bool] | None = None
# We to save runner files.
name = name or f"{datetime.now().strftime('%Y%m%d_%H%M%S')}"
@@ -150,7 +150,7 @@ class EvalRunner:
try:
# Wait for the future to resolve.
result = await asyncio.wait_for(self._result_future, timeout=EVAL_RESULT_TIMEOUT_SECS)
- except asyncio.TimeoutError:
+ except TimeoutError:
logger.error(f"ERROR: Timeout waiting for eval result.")
result = False
@@ -282,7 +282,7 @@ async def run_eval_pipeline(
# Load example prompt depending on image.
example_prompt = ""
- example_image: Optional[ImageFile] = None
+ example_image: ImageFile | None = None
if isinstance(eval_config.prompt, str):
example_prompt = eval_config.prompt
elif isinstance(eval_config.prompt, tuple):
diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py
index 3bf6a0dfd..ebf5b36c2 100644
--- a/scripts/evals/run-release-evals.py
+++ b/scripts/evals/run-release-evals.py
@@ -7,7 +7,7 @@
import argparse
import asyncio
import sys
-from datetime import datetime, timezone
+from datetime import UTC, datetime, timezone
from pathlib import Path
from dotenv import load_dotenv
@@ -41,7 +41,7 @@ EVAL_WEATHER_AND_RESTAURANT = EvalConfig(
EVAL_ONLINE_SEARCH = EvalConfig(
prompt="What's the current date in UTC?",
- eval=f"Current date in UTC is {datetime.now(timezone.utc).strftime('%A, %B %d, %Y')}.",
+ eval=f"Current date in UTC is {datetime.now(UTC).strftime('%A, %B %d, %Y')}.",
)
EVAL_SWITCH_LANGUAGE = EvalConfig(
diff --git a/scripts/evals/utils.py b/scripts/evals/utils.py
index 8111d404f..7ce9dfe84 100644
--- a/scripts/evals/utils.py
+++ b/scripts/evals/utils.py
@@ -6,9 +6,9 @@
import importlib.util
import os
+from collections.abc import Sequence
from dataclasses import dataclass
from pathlib import Path
-from typing import Sequence
GREEN = "\033[92m"
RED = "\033[91m"
diff --git a/scripts/krisp/audio_file_utils.py b/scripts/krisp/audio_file_utils.py
index 620ffe6d1..d7b412173 100644
--- a/scripts/krisp/audio_file_utils.py
+++ b/scripts/krisp/audio_file_utils.py
@@ -5,13 +5,12 @@ handling format detection and conversion to int16 PCM format.
"""
import sys
-from typing import Tuple
import numpy as np
import soundfile as sf
-def read_audio_file(input_path: str, verbose: bool = False) -> Tuple[np.ndarray, int]:
+def read_audio_file(input_path: str, verbose: bool = False) -> tuple[np.ndarray, int]:
"""Read an audio file and convert to int16 mono format.
This function:
diff --git a/src/pipecat/adapters/base_llm_adapter.py b/src/pipecat/adapters/base_llm_adapter.py
index 9c6747766..5d1063c54 100644
--- a/src/pipecat/adapters/base_llm_adapter.py
+++ b/src/pipecat/adapters/base_llm_adapter.py
@@ -12,7 +12,7 @@ adapters that handle tool format conversion and standardization.
import warnings
from abc import ABC, abstractmethod
-from typing import Any, Dict, Generic, List, Optional, TypeVar
+from typing import Any, Generic, TypeVar
from loguru import logger
@@ -50,10 +50,10 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]):
def __init__(self):
"""Initialize the adapter."""
self._warned_system_instruction = False
- self._builtin_tools: Dict[str, FunctionSchema] = {}
+ self._builtin_tools: dict[str, FunctionSchema] = {}
@property
- def builtin_tools(self) -> Dict[str, FunctionSchema]:
+ def builtin_tools(self) -> dict[str, FunctionSchema]:
"""Built-in tools automatically merged into every inference request.
Keyed by tool name for O(1) lookup, insertion, and removal. The
@@ -90,7 +90,7 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]):
pass
@abstractmethod
- def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Any]:
+ def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[Any]:
"""Convert tools schema to the provider's specific format.
Args:
@@ -102,7 +102,7 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]):
pass
@abstractmethod
- def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]:
+ def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]:
"""Get messages from a universal LLM context in a format ready for logging about this provider.
Args:
@@ -127,7 +127,7 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]):
def get_messages(
self, context: LLMContext, *, truncate_large_values: bool = False
- ) -> List[LLMContextMessage]:
+ ) -> list[LLMContextMessage]:
"""Get messages from the LLM context, including standard and LLM-specific messages.
Args:
@@ -142,7 +142,7 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]):
self.id_for_llm_specific_messages, truncate_large_values=truncate_large_values
)
- def from_standard_tools(self, tools: Any) -> List[Any] | NotGiven:
+ def from_standard_tools(self, tools: Any) -> list[Any] | NotGiven:
"""Convert tools from standard format to provider format.
Built-in tools are automatically merged into the schema before conversion so that every
@@ -188,8 +188,8 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]):
self,
messages: list,
*,
- system_instruction: Optional[str] = None,
- ) -> Optional[str]:
+ system_instruction: str | None = None,
+ ) -> str | None:
"""Extract an initial ``"system"`` message for use as a system instruction.
Only useful for services that expect the system instruction as a
@@ -247,11 +247,11 @@ class BaseLLMAdapter(ABC, Generic[TLLMInvocationParams]):
def _resolve_system_instruction(
self,
- system_from_context: Optional[str],
- system_instruction: Optional[str],
+ system_from_context: str | None,
+ system_instruction: str | None,
*,
discard_context_system: bool,
- ) -> Optional[str]:
+ ) -> str | None:
"""Resolve conflict between ``system_instruction`` and an extracted context system message.
Args:
diff --git a/src/pipecat/adapters/schemas/direct_function.py b/src/pipecat/adapters/schemas/direct_function.py
index 62eb9f76d..47afaa897 100644
--- a/src/pipecat/adapters/schemas/direct_function.py
+++ b/src/pipecat/adapters/schemas/direct_function.py
@@ -15,16 +15,11 @@ formats).
import inspect
import types
+from collections.abc import Callable, Mapping
from typing import (
TYPE_CHECKING,
Any,
- Callable,
- Dict,
- List,
- Mapping,
Protocol,
- Set,
- Tuple,
Union,
get_args,
get_origin,
@@ -144,8 +139,8 @@ class BaseDirectFunctionWrapper:
# TODO: maybe to better support things like enums, check if each type is a pydantic type and use its convert-to-jsonschema function
def _get_parameters_as_jsonschema(
- self, func: Callable, docstring_params: List[docstring_parser.DocstringParam]
- ) -> Tuple[Dict[str, Any], List[str]]:
+ self, func: Callable, docstring_params: list[docstring_parser.DocstringParam]
+ ) -> tuple[dict[str, Any], list[str]]:
"""Get function parameters as a dictionary of JSON schemas and a list of required parameters.
Ignore the first parameter, as it's expected to be the "special" one.
@@ -193,7 +188,7 @@ class BaseDirectFunctionWrapper:
return properties, required
- def _typehint_to_jsonschema(self, type_hint: Any) -> Dict[str, Any]:
+ def _typehint_to_jsonschema(self, type_hint: Any) -> dict[str, Any]:
"""Convert a Python type hint to a JSON Schema.
Args:
@@ -216,9 +211,9 @@ class BaseDirectFunctionWrapper:
return {"type": "number"}
elif type_hint is bool:
return {"type": "boolean"}
- elif type_hint is dict or type_hint is Dict:
+ elif type_hint is dict or type_hint is dict:
return {"type": "object"}
- elif type_hint is list or type_hint is List:
+ elif type_hint is list or type_hint is list:
return {"type": "array"}
# Get origin and arguments for complex types
@@ -230,11 +225,11 @@ class BaseDirectFunctionWrapper:
return {"anyOf": [self._typehint_to_jsonschema(arg) for arg in args]}
# Handle List, Tuple, Set with specific item types
- if origin in (list, List, tuple, Tuple, set, Set) and args:
+ if origin in (list, list, tuple, tuple, set, set) and args:
return {"type": "array", "items": self._typehint_to_jsonschema(args[0])}
# Handle Dict with specific key/value types
- if origin in (dict, Dict) and len(args) == 2:
+ if origin in (dict, dict) and len(args) == 2:
# For JSON Schema, keys must be strings
return {"type": "object", "additionalProperties": self._typehint_to_jsonschema(args[1])}
diff --git a/src/pipecat/adapters/schemas/function_schema.py b/src/pipecat/adapters/schemas/function_schema.py
index 5fba8dd57..046ac04f9 100644
--- a/src/pipecat/adapters/schemas/function_schema.py
+++ b/src/pipecat/adapters/schemas/function_schema.py
@@ -11,7 +11,7 @@ tools and functions used with AI models, ensuring consistent formatting
across different AI service providers.
"""
-from typing import Any, Dict, List
+from typing import Any
class FunctionSchema:
@@ -23,7 +23,7 @@ class FunctionSchema:
"""
def __init__(
- self, name: str, description: str, properties: Dict[str, Any], required: List[str]
+ self, name: str, description: str, properties: dict[str, Any], required: list[str]
) -> None:
"""Initialize the function schema.
@@ -38,7 +38,7 @@ class FunctionSchema:
self._properties = properties
self._required = required
- def to_default_dict(self) -> Dict[str, Any]:
+ def to_default_dict(self) -> dict[str, Any]:
"""Converts the function schema to a dictionary.
Returns:
@@ -73,7 +73,7 @@ class FunctionSchema:
return self._description
@property
- def properties(self) -> Dict[str, Any]:
+ def properties(self) -> dict[str, Any]:
"""Get the function properties.
Returns:
@@ -82,7 +82,7 @@ class FunctionSchema:
return self._properties
@property
- def required(self) -> List[str]:
+ def required(self) -> list[str]:
"""Get the required parameters.
Returns:
diff --git a/src/pipecat/adapters/schemas/tools_schema.py b/src/pipecat/adapters/schemas/tools_schema.py
index 1c1ba0dd3..28c2b9b88 100644
--- a/src/pipecat/adapters/schemas/tools_schema.py
+++ b/src/pipecat/adapters/schemas/tools_schema.py
@@ -11,7 +11,7 @@ and custom adapter-specific tools in the Pipecat framework.
"""
from enum import Enum
-from typing import Any, Dict, List, Optional
+from typing import Any
from pipecat.adapters.schemas.direct_function import DirectFunction, DirectFunctionWrapper
from pipecat.adapters.schemas.function_schema import FunctionSchema
@@ -39,8 +39,8 @@ class ToolsSchema:
def __init__(
self,
- standard_tools: List[FunctionSchema | DirectFunction],
- custom_tools: Optional[Dict[AdapterType, List[Dict[str, Any]]]] = None,
+ standard_tools: list[FunctionSchema | DirectFunction],
+ custom_tools: dict[AdapterType, list[dict[str, Any]]] | None = None,
) -> None:
"""Initialize the tools schema.
@@ -66,7 +66,7 @@ class ToolsSchema:
self._custom_tools = custom_tools
@property
- def standard_tools(self) -> List[FunctionSchema]:
+ def standard_tools(self) -> list[FunctionSchema]:
"""Get the list of standard function schema tools.
Returns:
@@ -75,7 +75,7 @@ class ToolsSchema:
return self._standard_tools
@property
- def custom_tools(self) -> Dict[AdapterType, List[Dict[str, Any]]]:
+ def custom_tools(self) -> dict[AdapterType, list[dict[str, Any]]]:
"""Get the custom tools dictionary.
Returns:
@@ -84,7 +84,7 @@ class ToolsSchema:
return self._custom_tools
@custom_tools.setter
- def custom_tools(self, value: Dict[AdapterType, List[Dict[str, Any]]]) -> None:
+ def custom_tools(self, value: dict[AdapterType, list[dict[str, Any]]]) -> None:
"""Set the custom tools dictionary.
Args:
diff --git a/src/pipecat/adapters/services/anthropic_adapter.py b/src/pipecat/adapters/services/anthropic_adapter.py
index 9617dadeb..067d1eb22 100644
--- a/src/pipecat/adapters/services/anthropic_adapter.py
+++ b/src/pipecat/adapters/services/anthropic_adapter.py
@@ -9,7 +9,7 @@
import copy
import json
from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, TypedDict
+from typing import Any, TypedDict
from anthropic import NOT_GIVEN, NotGiven
from anthropic.types.message_param import MessageParam
@@ -31,8 +31,8 @@ class AnthropicLLMInvocationParams(TypedDict):
"""Context-based parameters for invoking Anthropic's LLM API."""
system: str | NotGiven
- messages: List[MessageParam]
- tools: List[ToolUnionParam]
+ messages: list[MessageParam]
+ tools: list[ToolUnionParam]
class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
@@ -51,7 +51,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
self,
context: LLMContext,
enable_prompt_caching: bool,
- system_instruction: Optional[str] = None,
+ system_instruction: str | None = None,
) -> AnthropicLLMInvocationParams:
"""Get Anthropic-specific LLM invocation parameters from a universal LLM context.
@@ -83,7 +83,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
"tools": self.from_standard_tools(context.tools) or [],
}
- def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]:
+ def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]:
"""Get messages from a universal LLM context in a format ready for logging about Anthropic.
Removes or truncates sensitive data like image content for safe logging.
@@ -115,14 +115,14 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
class ConvertedMessages:
"""Container for Anthropic-formatted messages converted from universal context."""
- messages: List[MessageParam]
+ messages: list[MessageParam]
system: str | NotGiven
def _from_universal_context_messages(
self,
- universal_context_messages: List[LLMContextMessage],
+ universal_context_messages: list[LLMContextMessage],
*,
- system_instruction: Optional[str] = None,
+ system_instruction: str | None = None,
) -> ConvertedMessages:
system = NOT_GIVEN
@@ -333,7 +333,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
return message
- def _with_cache_control_markers(self, messages: List[MessageParam]) -> List[MessageParam]:
+ def _with_cache_control_markers(self, messages: list[MessageParam]) -> list[MessageParam]:
"""Add cache control markers to messages for prompt caching.
Args:
@@ -381,7 +381,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
return messages_with_markers
@staticmethod
- def _to_anthropic_function_format(function: FunctionSchema) -> Dict[str, Any]:
+ def _to_anthropic_function_format(function: FunctionSchema) -> dict[str, Any]:
"""Convert a single function schema to Anthropic's format.
Args:
@@ -400,7 +400,7 @@ class AnthropicLLMAdapter(BaseLLMAdapter[AnthropicLLMInvocationParams]):
},
}
- def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
+ def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]:
"""Convert function schemas to Anthropic's function-calling format.
Args:
diff --git a/src/pipecat/adapters/services/aws_nova_sonic_adapter.py b/src/pipecat/adapters/services/aws_nova_sonic_adapter.py
index d0dca53c0..e38fe901b 100644
--- a/src/pipecat/adapters/services/aws_nova_sonic_adapter.py
+++ b/src/pipecat/adapters/services/aws_nova_sonic_adapter.py
@@ -10,7 +10,7 @@ import copy
import json
from dataclasses import dataclass
from enum import Enum
-from typing import Any, Dict, List, Optional, TypedDict
+from typing import Any, TypedDict
from loguru import logger
@@ -55,9 +55,9 @@ class AWSNovaSonicLLMInvocationParams(TypedDict):
This is a placeholder until support for universal LLMContext machinery is added for AWS Nova Sonic.
"""
- system_instruction: Optional[str]
- messages: List[AWSNovaSonicConversationHistoryMessage]
- tools: List[Dict[str, Any]]
+ system_instruction: str | None
+ messages: list[AWSNovaSonicConversationHistoryMessage]
+ tools: list[dict[str, Any]]
class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
@@ -73,7 +73,7 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
return "aws-nova-sonic"
def get_llm_invocation_params(
- self, context: LLMContext, *, system_instruction: Optional[str] = None
+ self, context: LLMContext, *, system_instruction: str | None = None
) -> AWSNovaSonicLLMInvocationParams:
"""Get AWS Nova Sonic-specific LLM invocation parameters from a universal LLM context.
@@ -97,7 +97,7 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
"tools": self.from_standard_tools(context.tools) or [],
}
- def get_messages_for_logging(self, context) -> List[Dict[str, Any]]:
+ def get_messages_for_logging(self, context) -> list[dict[str, Any]]:
"""Get messages from a universal LLM context in a format ready for logging about AWS Nova Sonic.
Removes or truncates sensitive data like image content for safe logging.
@@ -116,11 +116,11 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
class ConvertedMessages:
"""Container for Google-formatted messages converted from universal context."""
- messages: List[AWSNovaSonicConversationHistoryMessage]
- system_instruction: Optional[str] = None
+ messages: list[AWSNovaSonicConversationHistoryMessage]
+ system_instruction: str | None = None
def _from_universal_context_messages(
- self, universal_context_messages: List[LLMContextMessage]
+ self, universal_context_messages: list[LLMContextMessage]
) -> ConvertedMessages:
system_instruction = None
messages = []
@@ -187,7 +187,7 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
# Sonic conversation history
@staticmethod
- def _to_aws_nova_sonic_function_format(function: FunctionSchema) -> Dict[str, Any]:
+ def _to_aws_nova_sonic_function_format(function: FunctionSchema) -> dict[str, Any]:
"""Convert a function schema to AWS Nova Sonic format.
Args:
@@ -212,7 +212,7 @@ class AWSNovaSonicLLMAdapter(BaseLLMAdapter[AWSNovaSonicLLMInvocationParams]):
}
}
- def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
+ def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]:
"""Convert tools schema to AWS Nova Sonic function-calling format.
Args:
diff --git a/src/pipecat/adapters/services/bedrock_adapter.py b/src/pipecat/adapters/services/bedrock_adapter.py
index 3150e6458..bb1223880 100644
--- a/src/pipecat/adapters/services/bedrock_adapter.py
+++ b/src/pipecat/adapters/services/bedrock_adapter.py
@@ -10,7 +10,7 @@ import base64
import copy
import json
from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, TypedDict
+from typing import Any, TypedDict
from loguru import logger
@@ -29,9 +29,9 @@ from pipecat.processors.aggregators.llm_context import (
class AWSBedrockLLMInvocationParams(TypedDict):
"""Context-based parameters for invoking AWS Bedrock's LLM API."""
- system: Optional[List[dict[str, Any]]] # [{"text": "system message"}]
- messages: List[dict[str, Any]]
- tools: List[dict[str, Any]]
+ system: list[dict[str, Any]] | None # [{"text": "system message"}]
+ messages: list[dict[str, Any]]
+ tools: list[dict[str, Any]]
tool_choice: LLMContextToolChoice
@@ -48,7 +48,7 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]):
return "aws"
def get_llm_invocation_params(
- self, context: LLMContext, *, system_instruction: Optional[str] = None
+ self, context: LLMContext, *, system_instruction: str | None = None
) -> AWSBedrockLLMInvocationParams:
"""Get AWS Bedrock-specific LLM invocation parameters from a universal LLM context.
@@ -79,7 +79,7 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]):
"tool_choice": context.tool_choice,
}
- def get_messages_for_logging(self, context) -> List[Dict[str, Any]]:
+ def get_messages_for_logging(self, context) -> list[dict[str, Any]]:
"""Get messages from a universal LLM context in a format ready for logging about AWS Bedrock.
Removes or truncates sensitive data like image content for safe logging.
@@ -109,14 +109,14 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]):
class ConvertedMessages:
"""Container for Bedrock-formatted messages converted from universal context."""
- messages: List[dict[str, Any]]
- system: Optional[str]
+ messages: list[dict[str, Any]]
+ system: str | None
def _from_universal_context_messages(
self,
- universal_context_messages: List[LLMContextMessage],
+ universal_context_messages: list[LLMContextMessage],
*,
- system_instruction: Optional[str] = None,
+ system_instruction: str | None = None,
) -> ConvertedMessages:
system = None
@@ -305,7 +305,7 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]):
return message
@staticmethod
- def _to_bedrock_function_format(function: FunctionSchema) -> Dict[str, Any]:
+ def _to_bedrock_function_format(function: FunctionSchema) -> dict[str, Any]:
"""Convert a function schema to Bedrock's tool format.
Args:
@@ -328,7 +328,7 @@ class AWSBedrockLLMAdapter(BaseLLMAdapter[AWSBedrockLLMInvocationParams]):
}
}
- def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
+ def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]:
"""Convert function schemas to Bedrock's function-calling format.
Args:
diff --git a/src/pipecat/adapters/services/gemini_adapter.py b/src/pipecat/adapters/services/gemini_adapter.py
index 565d0d0b8..aede18e7c 100644
--- a/src/pipecat/adapters/services/gemini_adapter.py
+++ b/src/pipecat/adapters/services/gemini_adapter.py
@@ -9,7 +9,7 @@
import base64
import json
from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, TypedDict
+from typing import Any, TypedDict
from loguru import logger
from openai import NotGiven
@@ -34,9 +34,9 @@ except ModuleNotFoundError as e:
class GeminiLLMInvocationParams(TypedDict):
"""Context-based parameters for invoking Gemini LLM."""
- system_instruction: Optional[str]
- messages: List[Content]
- tools: List[Any] | NotGiven
+ system_instruction: str | None
+ messages: list[Content]
+ tools: list[Any] | NotGiven
class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
@@ -54,7 +54,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
return "google"
def get_llm_invocation_params(
- self, context: LLMContext, *, system_instruction: Optional[str] = None
+ self, context: LLMContext, *, system_instruction: str | None = None
) -> GeminiLLMInvocationParams:
"""Get Gemini-specific LLM invocation parameters from a universal LLM context.
@@ -81,7 +81,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
"tools": self.from_standard_tools(context.tools),
}
- def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
+ def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]:
"""Convert tool schemas to Gemini's function-calling format.
Args:
@@ -92,7 +92,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
Includes both converted standard tools and any custom Gemini-specific tools.
"""
- def _strip_additional_properties(schema: Dict[str, Any]) -> Dict[str, Any]:
+ def _strip_additional_properties(schema: dict[str, Any]) -> dict[str, Any]:
"""Recursively remove "additionalProperties" fields from JSON schema, as they're not supported by Gemini.
Args:
@@ -139,7 +139,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
return formatted_standard_tools + custom_gemini_tools
- def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]:
+ def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]:
"""Get messages from a universal LLM context in a format ready for logging about Gemini.
Removes or truncates sensitive data like image content for safe logging.
@@ -173,8 +173,8 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
class ConvertedMessages:
"""Container for Google-formatted messages converted from universal context."""
- messages: List[Content]
- system_instruction: Optional[str] = None
+ messages: list[Content]
+ system_instruction: str | None = None
@dataclass
class MessageConversionResult:
@@ -184,20 +184,20 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
for any tool calls discovered in the message.
"""
- content: Optional[Content] = None
- tool_call_id_to_name_mapping: Dict[str, str] = field(default_factory=dict)
+ content: Content | None = None
+ tool_call_id_to_name_mapping: dict[str, str] = field(default_factory=dict)
@dataclass
class MessageConversionParams:
"""Parameters for converting a single universal context message to Google format."""
- tool_call_id_to_name_mapping: Dict[str, str]
+ tool_call_id_to_name_mapping: dict[str, str]
def _from_universal_context_messages(
self,
- universal_context_messages: List[LLMContextMessage],
+ universal_context_messages: list[LLMContextMessage],
*,
- system_instruction: Optional[str] = None,
+ system_instruction: str | None = None,
) -> ConvertedMessages:
"""Restructures messages to ensure proper Google format and message ordering.
@@ -443,8 +443,8 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
)
def _merge_parallel_tool_calls_for_thinking(
- self, thought_signature_dicts: List[dict], messages: List[Content]
- ) -> List[Content]:
+ self, thought_signature_dicts: list[dict], messages: list[Content]
+ ) -> list[Content]:
"""Merge parallel tool calls into single Content objects when thinking is enabled.
Gemini expects parallel tool calls (multiple function calls made
@@ -540,7 +540,7 @@ class GeminiLLMAdapter(BaseLLMAdapter[GeminiLLMInvocationParams]):
return merged_messages
def _apply_thought_signatures_to_messages(
- self, thought_signature_dicts: List[dict], messages: List[Content]
+ self, thought_signature_dicts: list[dict], messages: list[Content]
) -> None:
"""Apply thought signatures to corresponding assistant messages.
diff --git a/src/pipecat/adapters/services/grok_realtime_adapter.py b/src/pipecat/adapters/services/grok_realtime_adapter.py
index cc98887f8..75ca61030 100644
--- a/src/pipecat/adapters/services/grok_realtime_adapter.py
+++ b/src/pipecat/adapters/services/grok_realtime_adapter.py
@@ -13,7 +13,7 @@ Grok's Voice Agent API.
import copy
import json
from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, TypedDict
+from typing import Any, TypedDict
from loguru import logger
@@ -33,9 +33,9 @@ class GrokRealtimeLLMInvocationParams(TypedDict):
tools: List of tool definitions (function, web_search, x_search, file_search).
"""
- system_instruction: Optional[str]
- messages: List[events.ConversationItem]
- tools: List[Dict[str, Any]]
+ system_instruction: str | None
+ messages: list[events.ConversationItem]
+ tools: list[dict[str, Any]]
class GrokRealtimeLLMAdapter(BaseLLMAdapter):
@@ -51,7 +51,7 @@ class GrokRealtimeLLMAdapter(BaseLLMAdapter):
return "grok-realtime"
def get_llm_invocation_params(
- self, context: LLMContext, *, system_instruction: Optional[str] = None
+ self, context: LLMContext, *, system_instruction: str | None = None
) -> GrokRealtimeLLMInvocationParams:
"""Get Grok Realtime-specific LLM invocation parameters from a universal LLM context.
@@ -74,7 +74,7 @@ class GrokRealtimeLLMAdapter(BaseLLMAdapter):
"tools": self.from_standard_tools(context.tools) or [],
}
- def get_messages_for_logging(self, context) -> List[Dict[str, Any]]:
+ def get_messages_for_logging(self, context) -> list[dict[str, Any]]:
"""Get messages from context in a format safe for logging.
Binary data (images, audio) is replaced with short placeholders.
@@ -91,11 +91,11 @@ class GrokRealtimeLLMAdapter(BaseLLMAdapter):
class ConvertedMessages:
"""Container for Grok-formatted messages converted from universal context."""
- messages: List[events.ConversationItem]
- system_instruction: Optional[str] = None
+ messages: list[events.ConversationItem]
+ system_instruction: str | None = None
def _from_universal_context_messages(
- self, universal_context_messages: List[LLMContextMessage]
+ self, universal_context_messages: list[LLMContextMessage]
) -> ConvertedMessages:
"""Convert universal context messages to Grok Realtime format.
@@ -211,7 +211,7 @@ class GrokRealtimeLLMAdapter(BaseLLMAdapter):
logger.error(f"Unhandled message type in _from_universal_context_message: {message}")
@staticmethod
- def _to_grok_function_format(function: FunctionSchema) -> Dict[str, Any]:
+ def _to_grok_function_format(function: FunctionSchema) -> dict[str, Any]:
"""Convert a function schema to Grok Realtime function format.
Args:
@@ -231,7 +231,7 @@ class GrokRealtimeLLMAdapter(BaseLLMAdapter):
},
}
- def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
+ def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]:
"""Convert tool schemas to Grok Realtime format.
Supports both standard function tools and Grok-specific tools
diff --git a/src/pipecat/adapters/services/inworld_realtime_adapter.py b/src/pipecat/adapters/services/inworld_realtime_adapter.py
index b022afe6b..db07256f5 100644
--- a/src/pipecat/adapters/services/inworld_realtime_adapter.py
+++ b/src/pipecat/adapters/services/inworld_realtime_adapter.py
@@ -13,7 +13,7 @@ Inworld's Realtime API.
import copy
import json
from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, TypedDict
+from typing import Any, TypedDict
from loguru import logger
@@ -33,9 +33,9 @@ class InworldRealtimeLLMInvocationParams(TypedDict):
tools: List of tool definitions.
"""
- system_instruction: Optional[str]
- messages: List[events.ConversationItem]
- tools: List[Dict[str, Any]]
+ system_instruction: str | None
+ messages: list[events.ConversationItem]
+ tools: list[dict[str, Any]]
class InworldRealtimeLLMAdapter(BaseLLMAdapter):
@@ -51,7 +51,7 @@ class InworldRealtimeLLMAdapter(BaseLLMAdapter):
return "inworld-realtime"
def get_llm_invocation_params(
- self, context: LLMContext, *, system_instruction: Optional[str] = None
+ self, context: LLMContext, *, system_instruction: str | None = None
) -> InworldRealtimeLLMInvocationParams:
"""Get Inworld Realtime-specific LLM invocation parameters from a universal LLM context.
@@ -74,7 +74,7 @@ class InworldRealtimeLLMAdapter(BaseLLMAdapter):
"tools": self.from_standard_tools(context.tools) or [],
}
- def get_messages_for_logging(self, context) -> List[Dict[str, Any]]:
+ def get_messages_for_logging(self, context) -> list[dict[str, Any]]:
"""Get messages from context in a format safe for logging.
Binary data (images, audio) is replaced with short placeholders.
@@ -91,11 +91,11 @@ class InworldRealtimeLLMAdapter(BaseLLMAdapter):
class ConvertedMessages:
"""Container for Inworld-formatted messages converted from universal context."""
- messages: List[events.ConversationItem]
- system_instruction: Optional[str] = None
+ messages: list[events.ConversationItem]
+ system_instruction: str | None = None
def _from_universal_context_messages(
- self, universal_context_messages: List[LLMContextMessage]
+ self, universal_context_messages: list[LLMContextMessage]
) -> ConvertedMessages:
"""Convert universal context messages to Inworld Realtime format.
@@ -211,7 +211,7 @@ class InworldRealtimeLLMAdapter(BaseLLMAdapter):
logger.error(f"Unhandled message type in _from_universal_context_message: {message}")
@staticmethod
- def _to_inworld_function_format(function: FunctionSchema) -> Dict[str, Any]:
+ def _to_inworld_function_format(function: FunctionSchema) -> dict[str, Any]:
"""Convert a function schema to Inworld Realtime function format.
Args:
@@ -231,7 +231,7 @@ class InworldRealtimeLLMAdapter(BaseLLMAdapter):
},
}
- def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
+ def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]:
"""Convert tool schemas to Inworld Realtime format.
Args:
diff --git a/src/pipecat/adapters/services/open_ai_adapter.py b/src/pipecat/adapters/services/open_ai_adapter.py
index a52fb84a6..335843d16 100644
--- a/src/pipecat/adapters/services/open_ai_adapter.py
+++ b/src/pipecat/adapters/services/open_ai_adapter.py
@@ -6,7 +6,7 @@
"""OpenAI LLM adapter for Pipecat."""
-from typing import Any, Dict, List, Optional, TypedDict
+from typing import Any, TypedDict
from openai._types import NotGiven as OpenAINotGiven
from openai.types.chat import (
@@ -29,8 +29,8 @@ from pipecat.processors.aggregators.llm_context import (
class OpenAILLMInvocationParams(TypedDict):
"""Context-based parameters for invoking OpenAI ChatCompletion API."""
- messages: List[ChatCompletionMessageParam]
- tools: List[ChatCompletionToolParam] | OpenAINotGiven
+ messages: list[ChatCompletionMessageParam]
+ tools: list[ChatCompletionToolParam] | OpenAINotGiven
tool_choice: ChatCompletionToolChoiceOptionParam | OpenAINotGiven
@@ -54,7 +54,7 @@ class OpenAILLMAdapter(BaseLLMAdapter[OpenAILLMInvocationParams]):
self,
context: LLMContext,
*,
- system_instruction: Optional[str] = None,
+ system_instruction: str | None = None,
convert_developer_to_user: bool,
) -> OpenAILLMInvocationParams:
"""Get OpenAI-specific LLM invocation parameters from a universal LLM context.
@@ -95,7 +95,7 @@ class OpenAILLMAdapter(BaseLLMAdapter[OpenAILLMInvocationParams]):
"tool_choice": context.tool_choice,
}
- def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[ChatCompletionToolParam]:
+ def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[ChatCompletionToolParam]:
"""Convert function schemas to OpenAI's function-calling format.
Args:
@@ -115,7 +115,7 @@ class OpenAILLMAdapter(BaseLLMAdapter[OpenAILLMInvocationParams]):
custom_openai_tools = tools_schema.custom_tools.get(AdapterType.OPENAI, [])
return formatted_standard_tools + custom_openai_tools
- def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]:
+ def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]:
"""Get messages from a universal LLM context in a format ready for logging about OpenAI.
Binary data (images, audio) is replaced with short placeholders.
@@ -130,10 +130,10 @@ class OpenAILLMAdapter(BaseLLMAdapter[OpenAILLMInvocationParams]):
def _from_universal_context_messages(
self,
- messages: List[LLMContextMessage],
+ messages: list[LLMContextMessage],
*,
convert_developer_to_user: bool,
- ) -> List[ChatCompletionMessageParam]:
+ ) -> list[ChatCompletionMessageParam]:
result = []
for message in messages:
if isinstance(message, LLMSpecificMessage):
diff --git a/src/pipecat/adapters/services/open_ai_realtime_adapter.py b/src/pipecat/adapters/services/open_ai_realtime_adapter.py
index 41f3ce89d..7df7e45c5 100644
--- a/src/pipecat/adapters/services/open_ai_realtime_adapter.py
+++ b/src/pipecat/adapters/services/open_ai_realtime_adapter.py
@@ -9,7 +9,7 @@
import copy
import json
from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, TypedDict
+from typing import Any, TypedDict
from loguru import logger
@@ -26,9 +26,9 @@ class OpenAIRealtimeLLMInvocationParams(TypedDict):
This is a placeholder until support for universal LLMContext machinery is added for OpenAI Realtime.
"""
- system_instruction: Optional[str]
- messages: List[events.ConversationItem]
- tools: List[Dict[str, Any]]
+ system_instruction: str | None
+ messages: list[events.ConversationItem]
+ tools: list[dict[str, Any]]
class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
@@ -44,7 +44,7 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
return "openai-realtime"
def get_llm_invocation_params(
- self, context: LLMContext, *, system_instruction: Optional[str] = None
+ self, context: LLMContext, *, system_instruction: str | None = None
) -> OpenAIRealtimeLLMInvocationParams:
"""Get OpenAI Realtime-specific LLM invocation parameters from a universal LLM context.
@@ -68,7 +68,7 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
"tools": self.from_standard_tools(context.tools) or [],
}
- def get_messages_for_logging(self, context) -> List[Dict[str, Any]]:
+ def get_messages_for_logging(self, context) -> list[dict[str, Any]]:
"""Get messages from a universal LLM context in a format ready for logging about OpenAI Realtime.
Binary data (images, audio) is replaced with short placeholders.
@@ -87,11 +87,11 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
class ConvertedMessages:
"""Container for OpenAI-formatted messages converted from universal context."""
- messages: List[events.ConversationItem]
- system_instruction: Optional[str] = None
+ messages: list[events.ConversationItem]
+ system_instruction: str | None = None
def _from_universal_context_messages(
- self, universal_context_messages: List[LLMContextMessage]
+ self, universal_context_messages: list[LLMContextMessage]
) -> ConvertedMessages:
# We can't load a long conversation history into the openai realtime api yet. (The API/model
# forgets that it can do audio, if you do a series of `conversation.item.create` calls.) So
@@ -188,7 +188,7 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
logger.error(f"Unhandled message type in _from_universal_context_message: {message}")
@staticmethod
- def _to_openai_realtime_function_format(function: FunctionSchema) -> Dict[str, Any]:
+ def _to_openai_realtime_function_format(function: FunctionSchema) -> dict[str, Any]:
"""Convert a function schema to OpenAI Realtime format.
Args:
@@ -208,7 +208,7 @@ class OpenAIRealtimeLLMAdapter(BaseLLMAdapter):
},
}
- def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[Dict[str, Any]]:
+ def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[dict[str, Any]]:
"""Convert tool schemas to OpenAI Realtime function-calling format.
Args:
diff --git a/src/pipecat/adapters/services/open_ai_responses_adapter.py b/src/pipecat/adapters/services/open_ai_responses_adapter.py
index f3dd67e03..c5c6cbc7a 100644
--- a/src/pipecat/adapters/services/open_ai_responses_adapter.py
+++ b/src/pipecat/adapters/services/open_ai_responses_adapter.py
@@ -6,7 +6,7 @@
"""OpenAI Responses API adapter for Pipecat."""
-from typing import Any, Dict, List, Optional, TypedDict
+from typing import Any, TypedDict
from openai._types import NotGiven as OpenAINotGiven
from openai.types.responses import FunctionToolParam, ResponseInputItemParam, ToolParam
@@ -23,8 +23,8 @@ from pipecat.processors.aggregators.llm_context import (
class OpenAIResponsesLLMInvocationParams(TypedDict, total=False):
"""Context-based parameters for invoking OpenAI Responses API."""
- input: List[ResponseInputItemParam]
- tools: List[ToolParam] | OpenAINotGiven
+ input: list[ResponseInputItemParam]
+ tools: list[ToolParam] | OpenAINotGiven
instructions: str
@@ -47,7 +47,7 @@ class OpenAIResponsesLLMAdapter(BaseLLMAdapter[OpenAIResponsesLLMInvocationParam
self,
context: LLMContext,
*,
- system_instruction: Optional[str] = None,
+ system_instruction: str | None = None,
) -> OpenAIResponsesLLMInvocationParams:
"""Get Responses API invocation parameters from a universal LLM context.
@@ -105,7 +105,7 @@ class OpenAIResponsesLLMAdapter(BaseLLMAdapter[OpenAIResponsesLLMInvocationParam
return params
- def to_provider_tools_format(self, tools_schema: ToolsSchema) -> List[ToolParam]:
+ def to_provider_tools_format(self, tools_schema: ToolsSchema) -> list[ToolParam]:
"""Convert function schemas to Responses API function tool format.
Args:
@@ -132,7 +132,7 @@ class OpenAIResponsesLLMAdapter(BaseLLMAdapter[OpenAIResponsesLLMInvocationParam
custom_openai_tools = tools_schema.custom_tools.get(AdapterType.OPENAI, [])
return result + custom_openai_tools
- def get_messages_for_logging(self, context: LLMContext) -> List[Dict[str, Any]]:
+ def get_messages_for_logging(self, context: LLMContext) -> list[dict[str, Any]]:
"""Get messages from context in a format ready for logging.
Binary data (images, audio) is replaced with short placeholders.
@@ -146,8 +146,8 @@ class OpenAIResponsesLLMAdapter(BaseLLMAdapter[OpenAIResponsesLLMInvocationParam
return self.get_messages(context, truncate_large_values=True)
def _convert_messages_to_input(
- self, messages: List[LLMContextMessage]
- ) -> List[ResponseInputItemParam]:
+ self, messages: list[LLMContextMessage]
+ ) -> list[ResponseInputItemParam]:
"""Convert LLMContext messages to Responses API input items.
Args:
@@ -156,7 +156,7 @@ class OpenAIResponsesLLMAdapter(BaseLLMAdapter[OpenAIResponsesLLMInvocationParam
Returns:
List of Responses API input items.
"""
- result: List[ResponseInputItemParam] = []
+ result: list[ResponseInputItemParam] = []
for message in messages:
if isinstance(message, LLMSpecificMessage):
diff --git a/src/pipecat/adapters/services/perplexity_adapter.py b/src/pipecat/adapters/services/perplexity_adapter.py
index 18ebea648..188092b78 100644
--- a/src/pipecat/adapters/services/perplexity_adapter.py
+++ b/src/pipecat/adapters/services/perplexity_adapter.py
@@ -28,7 +28,6 @@ the messages are sent to Perplexity's API.
"""
import copy
-from typing import List, Optional
from openai.types.chat import ChatCompletionMessageParam
@@ -53,7 +52,7 @@ class PerplexityLLMAdapter(OpenAILLMAdapter):
self,
context: LLMContext,
*,
- system_instruction: Optional[str] = None,
+ system_instruction: str | None = None,
convert_developer_to_user: bool,
) -> OpenAILLMInvocationParams:
"""Get OpenAI-compatible invocation parameters with Perplexity message fixes applied.
@@ -78,8 +77,8 @@ class PerplexityLLMAdapter(OpenAILLMAdapter):
return params
def _transform_messages(
- self, messages: List[ChatCompletionMessageParam]
- ) -> List[ChatCompletionMessageParam]:
+ self, messages: list[ChatCompletionMessageParam]
+ ) -> list[ChatCompletionMessageParam]:
"""Transform messages to satisfy Perplexity's API constraints.
Applies three transformation steps in order:
diff --git a/src/pipecat/audio/dtmf/types.py b/src/pipecat/audio/dtmf/types.py
index 1b6eea7a0..aaec06f8c 100644
--- a/src/pipecat/audio/dtmf/types.py
+++ b/src/pipecat/audio/dtmf/types.py
@@ -11,10 +11,10 @@ key on the telephone keypad, facilitating the handling of input in
telecommunication applications.
"""
-from enum import Enum
+from enum import StrEnum
-class KeypadEntry(str, Enum):
+class KeypadEntry(StrEnum):
"""DTMF keypad entries for phone system integration.
Parameters:
diff --git a/src/pipecat/audio/dtmf/utils.py b/src/pipecat/audio/dtmf/utils.py
index eff2aa12f..22026759e 100644
--- a/src/pipecat/audio/dtmf/utils.py
+++ b/src/pipecat/audio/dtmf/utils.py
@@ -15,7 +15,6 @@ import asyncio
import io
import wave
from importlib.resources import files
-from typing import Dict, Optional
import aiofiles
@@ -24,8 +23,8 @@ from pipecat.audio.resamplers.base_audio_resampler import BaseAudioResampler
from pipecat.audio.utils import create_file_resampler
__DTMF_LOCK__ = asyncio.Lock()
-__DTMF_AUDIO__: Dict[KeypadEntry, bytes] = {}
-__DTMF_RESAMPLER__: Optional[BaseAudioResampler] = None
+__DTMF_AUDIO__: dict[KeypadEntry, bytes] = {}
+__DTMF_RESAMPLER__: BaseAudioResampler | None = None
__DTMF_FILE_NAME = {
KeypadEntry.POUND: "dtmf-pound.wav",
diff --git a/src/pipecat/audio/filters/aic_filter.py b/src/pipecat/audio/filters/aic_filter.py
index 752f6f3fa..1bdf723ae 100644
--- a/src/pipecat/audio/filters/aic_filter.py
+++ b/src/pipecat/audio/filters/aic_filter.py
@@ -18,7 +18,6 @@ Classes:
import asyncio
from pathlib import Path
from threading import Lock
-from typing import List, Optional, Tuple
import numpy as np
from aic_sdk import (
@@ -44,14 +43,14 @@ class AICModelManager:
acquires on first use and releases when the last reference is dropped.
"""
- _cache: dict[str, Tuple[Model, int]] = {} # key -> (model, ref_count)
+ _cache: dict[str, tuple[Model, int]] = {} # key -> (model, ref_count)
_lock = Lock()
_loading: dict[
str, asyncio.Task[Model]
] = {} # key -> load task (deduplicates concurrent loads)
@classmethod
- def _increment_reference(cls, cache_key: str, entry: Tuple[Model, int]) -> Tuple[Model, str]:
+ def _increment_reference(cls, cache_key: str, entry: tuple[Model, int]) -> tuple[Model, str]:
"""Increment reference count for cached entry. Caller must hold _lock."""
cached_model, ref_count = entry
cls._cache[cache_key] = (cached_model, ref_count + 1)
@@ -59,7 +58,7 @@ class AICModelManager:
return cached_model, cache_key
@classmethod
- def _store_new_reference(cls, cache_key: str, model: Model) -> Tuple[Model, str]:
+ def _store_new_reference(cls, cache_key: str, model: Model) -> tuple[Model, str]:
"""Store new model in cache with ref count 1. Caller must hold _lock."""
cls._cache[cache_key] = (model, 1)
logger.debug(f"AIC model cached key={cache_key!r} ref_count=1")
@@ -70,9 +69,9 @@ class AICModelManager:
cls,
cache_key: str,
*,
- model_path: Optional[Path] = None,
- model_id: Optional[str] = None,
- model_download_dir: Optional[Path] = None,
+ model_path: Path | None = None,
+ model_id: str | None = None,
+ model_download_dir: Path | None = None,
) -> Model:
"""Run the actual load (file or download). Separate to allow create_task and deduplication."""
if model_path is not None:
@@ -94,9 +93,9 @@ class AICModelManager:
@staticmethod
def _get_cache_key(
*,
- model_path: Optional[Path] = None,
- model_id: Optional[str] = None,
- model_download_dir: Optional[Path] = None,
+ model_path: Path | None = None,
+ model_id: str | None = None,
+ model_download_dir: Path | None = None,
) -> str:
"""Build a stable cache key for the model.
@@ -120,10 +119,10 @@ class AICModelManager:
async def acquire(
cls,
*,
- model_path: Optional[Path] = None,
- model_id: Optional[str] = None,
- model_download_dir: Optional[Path] = None,
- ) -> Tuple[Model, str]:
+ model_path: Path | None = None,
+ model_id: str | None = None,
+ model_download_dir: Path | None = None,
+ ) -> tuple[Model, str]:
"""Get or load a Model and increment its reference count.
Call this when starting a filter. Store the returned key and pass it
@@ -218,10 +217,10 @@ class AICFilter(BaseAudioFilter):
self,
*,
license_key: str,
- model_id: Optional[str] = None,
- model_path: Optional[Path] = None,
- model_download_dir: Optional[Path] = None,
- enhancement_level: Optional[float] = None,
+ model_id: str | None = None,
+ model_path: Path | None = None,
+ model_download_dir: Path | None = None,
+ enhancement_level: float | None = None,
) -> None:
"""Initialize the AIC filter.
@@ -274,7 +273,7 @@ class AICFilter(BaseAudioFilter):
)
# AIC SDK objects; model is shared via AICModelManager
- self._model_cache_key: Optional[str] = None
+ self._model_cache_key: str | None = None
self._model = None
self._processor = None
self._processor_ctx = None
@@ -298,9 +297,9 @@ class AICFilter(BaseAudioFilter):
def create_vad_analyzer(
self,
*,
- speech_hold_duration: Optional[float] = None,
- minimum_speech_duration: Optional[float] = None,
- sensitivity: Optional[float] = None,
+ speech_hold_duration: float | None = None,
+ minimum_speech_duration: float | None = None,
+ sensitivity: float | None = None,
):
"""Return an analyzer that will lazily instantiate the AIC VAD when ready.
@@ -491,7 +490,7 @@ class AICFilter(BaseAudioFilter):
blocks_data = bytes(self._audio_buffer[:total_size])
self._audio_buffer = self._audio_buffer[total_size:]
- filtered_chunks: List[bytes] = []
+ filtered_chunks: list[bytes] = []
for i in range(num_blocks):
start = i * block_size
diff --git a/src/pipecat/audio/filters/koala_filter.py b/src/pipecat/audio/filters/koala_filter.py
index cd5525f98..51dd5871d 100644
--- a/src/pipecat/audio/filters/koala_filter.py
+++ b/src/pipecat/audio/filters/koala_filter.py
@@ -10,7 +10,7 @@ This module provides an audio filter implementation using PicoVoice's Koala
Noise Suppression engine to reduce background noise in audio streams.
"""
-from typing import Sequence
+from collections.abc import Sequence
import numpy as np
from loguru import logger
diff --git a/src/pipecat/audio/mixers/soundfile_mixer.py b/src/pipecat/audio/mixers/soundfile_mixer.py
index 846e845e6..6d00c1c3f 100644
--- a/src/pipecat/audio/mixers/soundfile_mixer.py
+++ b/src/pipecat/audio/mixers/soundfile_mixer.py
@@ -12,7 +12,8 @@ runtime configuration changes.
"""
import asyncio
-from typing import Any, Dict, Mapping
+from collections.abc import Mapping
+from typing import Any
import numpy as np
from loguru import logger
@@ -70,7 +71,7 @@ class SoundfileMixer(BaseAudioMixer):
self._sample_rate = 0
self._sound_pos = 0
- self._sounds: Dict[str, Any] = {}
+ self._sounds: dict[str, Any] = {}
self._current_sound = default_sound
self._mixing = mixing
self._loop = loop
diff --git a/src/pipecat/audio/turn/base_turn_analyzer.py b/src/pipecat/audio/turn/base_turn_analyzer.py
index e8f6b9d13..6bf9f5dcf 100644
--- a/src/pipecat/audio/turn/base_turn_analyzer.py
+++ b/src/pipecat/audio/turn/base_turn_analyzer.py
@@ -12,7 +12,6 @@ when a user has finished speaking in a conversation.
from abc import ABC, abstractmethod
from enum import Enum
-from typing import Optional, Tuple
from pydantic import BaseModel
@@ -44,7 +43,7 @@ class BaseTurnAnalyzer(ABC):
while still defining an abstract interface through abstract methods.
"""
- def __init__(self, *, sample_rate: Optional[int] = None):
+ def __init__(self, *, sample_rate: int | None = None):
"""Initialize the turn analyzer.
Args:
@@ -108,7 +107,7 @@ class BaseTurnAnalyzer(ABC):
pass
@abstractmethod
- async def analyze_end_of_turn(self) -> Tuple[EndOfTurnState, Optional[MetricsData]]:
+ async def analyze_end_of_turn(self) -> tuple[EndOfTurnState, MetricsData | None]:
"""Analyzes if an end of turn has occurred based on the audio input.
Returns:
diff --git a/src/pipecat/audio/turn/krisp_viva_turn.py b/src/pipecat/audio/turn/krisp_viva_turn.py
index 3aa540491..5235a94be 100644
--- a/src/pipecat/audio/turn/krisp_viva_turn.py
+++ b/src/pipecat/audio/turn/krisp_viva_turn.py
@@ -16,7 +16,6 @@ passed directly to the constructor.
import os
import time
-from typing import Optional, Tuple
import numpy as np
from loguru import logger
@@ -61,9 +60,9 @@ class KrispVivaTurn(BaseTurnAnalyzer):
def __init__(
self,
*,
- model_path: Optional[str] = None,
- sample_rate: Optional[int] = None,
- params: Optional[KrispTurnParams] = None,
+ model_path: str | None = None,
+ sample_rate: int | None = None,
+ params: KrispTurnParams | None = None,
api_key: str = "",
) -> None:
"""Initialize the Krisp turn analyzer.
@@ -119,9 +118,9 @@ class KrispVivaTurn(BaseTurnAnalyzer):
self._last_probability = None
self._frame_probabilities = []
self._last_state = EndOfTurnState.INCOMPLETE
- self._speech_stopped_time: Optional[float] = None
- self._e2e_processing_time_ms: Optional[float] = None
- self._last_metrics: Optional[TurnMetricsData] = None
+ self._speech_stopped_time: float | None = None
+ self._e2e_processing_time_ms: float | None = None
+ self._last_metrics: TurnMetricsData | None = None
# Create session with provided sample rate or default to 16000 Hz
# This preloads the model to improve latency when set_sample_rate is called later
@@ -214,7 +213,7 @@ class KrispVivaTurn(BaseTurnAnalyzer):
return self._frame_probabilities
@property
- def last_probability(self) -> Optional[float]:
+ def last_probability(self) -> float | None:
"""Get the last turn probability value computed.
Returns:
@@ -348,7 +347,7 @@ class KrispVivaTurn(BaseTurnAnalyzer):
self._last_state = error_state
return error_state
- async def analyze_end_of_turn(self) -> Tuple[EndOfTurnState, Optional[MetricsData]]:
+ async def analyze_end_of_turn(self) -> tuple[EndOfTurnState, MetricsData | None]:
"""Analyze the current audio state to determine if turn has ended.
Returns:
diff --git a/src/pipecat/audio/turn/smart_turn/base_smart_turn.py b/src/pipecat/audio/turn/smart_turn/base_smart_turn.py
index fa652d884..c1f4e53a2 100644
--- a/src/pipecat/audio/turn/smart_turn/base_smart_turn.py
+++ b/src/pipecat/audio/turn/smart_turn/base_smart_turn.py
@@ -15,7 +15,7 @@ import asyncio
import time
from abc import abstractmethod
from concurrent.futures import ThreadPoolExecutor
-from typing import Any, Dict, Optional, Tuple
+from typing import Any
import numpy as np
from loguru import logger
@@ -57,9 +57,7 @@ class BaseSmartTurn(BaseTurnAnalyzer):
implement the specific model prediction logic.
"""
- def __init__(
- self, *, sample_rate: Optional[int] = None, params: Optional[SmartTurnParams] = None
- ):
+ def __init__(self, *, sample_rate: int | None = None, params: SmartTurnParams | None = None):
"""Initialize the smart turn analyzer.
Args:
@@ -146,7 +144,7 @@ class BaseSmartTurn(BaseTurnAnalyzer):
return state
- async def analyze_end_of_turn(self) -> Tuple[EndOfTurnState, Optional[MetricsData]]:
+ async def analyze_end_of_turn(self) -> tuple[EndOfTurnState, MetricsData | None]:
"""Analyze the current audio state to determine if turn has ended.
Returns:
@@ -178,7 +176,7 @@ class BaseSmartTurn(BaseTurnAnalyzer):
self._speech_start_time = 0
self._silence_ms = 0
- def _process_speech_segment(self, audio_buffer) -> Tuple[EndOfTurnState, Optional[MetricsData]]:
+ def _process_speech_segment(self, audio_buffer) -> tuple[EndOfTurnState, MetricsData | None]:
"""Process accumulated audio segment using ML model."""
state = EndOfTurnState.INCOMPLETE
@@ -248,6 +246,6 @@ class BaseSmartTurn(BaseTurnAnalyzer):
return state, result_data
@abstractmethod
- def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
+ def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]:
"""Predict end-of-turn using ML model from audio data."""
pass
diff --git a/src/pipecat/audio/turn/smart_turn/http_smart_turn.py b/src/pipecat/audio/turn/smart_turn/http_smart_turn.py
index 6b5f4a84d..0d4b717b3 100644
--- a/src/pipecat/audio/turn/smart_turn/http_smart_turn.py
+++ b/src/pipecat/audio/turn/smart_turn/http_smart_turn.py
@@ -12,7 +12,7 @@ HTTP endpoints for ML-based end-of-turn detection.
import asyncio
import io
-from typing import Any, Dict, Optional
+from typing import Any
import aiohttp
import numpy as np
@@ -33,7 +33,7 @@ class HttpSmartTurnAnalyzer(BaseSmartTurn):
*,
url: str,
aiohttp_session: aiohttp.ClientSession,
- headers: Optional[Dict[str, str]] = None,
+ headers: dict[str, str] | None = None,
**kwargs,
):
"""Initialize the HTTP smart turn analyzer.
@@ -58,7 +58,7 @@ class HttpSmartTurnAnalyzer(BaseSmartTurn):
logger.trace(f"Serialized size: {len(serialized_bytes)} bytes")
return serialized_bytes
- async def _send_raw_request(self, data_bytes: bytes) -> Dict[str, Any]:
+ async def _send_raw_request(self, data_bytes: bytes) -> dict[str, Any]:
"""Send raw audio data to the HTTP endpoint for prediction."""
headers = {"Content-Type": "application/octet-stream"}
headers.update(self._headers)
@@ -97,14 +97,14 @@ class HttpSmartTurnAnalyzer(BaseSmartTurn):
logger.trace(text)
raise Exception(f"Non-JSON response: {text}")
- except asyncio.TimeoutError:
+ except TimeoutError:
logger.error(f"Request timed out after {self._params.stop_secs} seconds")
raise SmartTurnTimeoutException(f"Request exceeded {self._params.stop_secs} seconds.")
except aiohttp.ClientError as e:
logger.error(f"Failed to send raw request to Daily Smart Turn: {e}")
raise Exception("Failed to send raw request to Daily Smart Turn.")
- def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
+ def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]:
"""Predict end-of-turn using remote HTTP ML service."""
try:
serialized_array = self._serialize_array(audio_array)
diff --git a/src/pipecat/audio/turn/smart_turn/local_coreml_smart_turn.py b/src/pipecat/audio/turn/smart_turn/local_coreml_smart_turn.py
index 18310c386..f424ae9a2 100644
--- a/src/pipecat/audio/turn/smart_turn/local_coreml_smart_turn.py
+++ b/src/pipecat/audio/turn/smart_turn/local_coreml_smart_turn.py
@@ -11,7 +11,7 @@ local end-of-turn detection without requiring network connectivity.
"""
import warnings
-from typing import Any, Dict
+from typing import Any
import numpy as np
from loguru import logger
@@ -76,7 +76,7 @@ class LocalCoreMLSmartTurnAnalyzer(BaseSmartTurn):
self._turn_model = ct.models.MLModel(core_ml_model_path)
logger.debug("Loaded Local Smart Turn")
- async def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
+ async def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]:
"""Predict end-of-turn using local CoreML model."""
inputs = self._turn_processor(
audio_array,
diff --git a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v2.py b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v2.py
index 8d584ecd2..2ef36b240 100644
--- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v2.py
+++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v2.py
@@ -11,7 +11,7 @@ local end-of-turn detection without requiring network connectivity.
"""
import warnings
-from typing import Any, Dict
+from typing import Any
import numpy as np
from loguru import logger
@@ -87,7 +87,7 @@ class LocalSmartTurnAnalyzerV2(BaseSmartTurn):
self._turn_model.eval()
logger.debug("Loaded Local Smart Turn v2")
- def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
+ def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]:
"""Predict end-of-turn using local PyTorch model."""
inputs = self._turn_processor(
audio_array,
diff --git a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py
index a8cc249fd..a94af41e6 100644
--- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py
+++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py
@@ -10,7 +10,7 @@ This module provides a smart turn analyzer that uses an ONNX model for
local end-of-turn detection without requiring network connectivity.
"""
-from typing import Any, Dict, Optional
+from typing import Any
import numpy as np
import onnxruntime as ort
@@ -32,9 +32,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn):
enabling offline operation without network dependencies.
"""
- def __init__(
- self, *, smart_turn_model_path: Optional[str] = None, cpu_count: int = 1, **kwargs
- ):
+ def __init__(self, *, smart_turn_model_path: str | None = None, cpu_count: int = 1, **kwargs):
"""Initialize the local ONNX smart-turn-v3 analyzer.
Args:
@@ -138,7 +136,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn):
return soxr.resample(audio_array, actual_rate, _MODEL_SAMPLE_RATE, quality="VHQ")
- def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]:
+ def _predict_endpoint(self, audio_array: np.ndarray) -> dict[str, Any]:
"""Predict end-of-turn using local ONNX model."""
def truncate_audio_to_last_n_seconds(
diff --git a/src/pipecat/audio/vad/aic_vad.py b/src/pipecat/audio/vad/aic_vad.py
index 813029e2b..68f4fc67d 100644
--- a/src/pipecat/audio/vad/aic_vad.py
+++ b/src/pipecat/audio/vad/aic_vad.py
@@ -7,7 +7,8 @@ Classes:
AICVADAnalyzer: For aic-sdk (uses 'aic_sdk' module)
"""
-from typing import Any, Callable, Optional
+from collections.abc import Callable
+from typing import Any
from aic_sdk import VadParameter
from loguru import logger
@@ -46,10 +47,10 @@ class AICVADAnalyzer(VADAnalyzer):
def __init__(
self,
*,
- vad_context_factory: Optional[Callable[[], Any]] = None,
- speech_hold_duration: Optional[float] = None,
- minimum_speech_duration: Optional[float] = None,
- sensitivity: Optional[float] = None,
+ vad_context_factory: Callable[[], Any] | None = None,
+ speech_hold_duration: float | None = None,
+ minimum_speech_duration: float | None = None,
+ sensitivity: float | None = None,
):
"""Create an AIC VAD analyzer.
@@ -77,10 +78,10 @@ class AICVADAnalyzer(VADAnalyzer):
super().__init__(sample_rate=None, params=fixed_params)
self._vad_context_factory = vad_context_factory
- self._vad_ctx: Optional[Any] = None
- self._pending_speech_hold_duration: Optional[float] = speech_hold_duration
- self._pending_minimum_speech_duration: Optional[float] = minimum_speech_duration
- self._pending_sensitivity: Optional[float] = sensitivity
+ self._vad_ctx: Any | None = None
+ self._pending_speech_hold_duration: float | None = speech_hold_duration
+ self._pending_minimum_speech_duration: float | None = minimum_speech_duration
+ self._pending_sensitivity: float | None = sensitivity
def bind_vad_context_factory(self, vad_context_factory: Callable[[], Any]):
"""Attach or replace the factory post-construction."""
diff --git a/src/pipecat/audio/vad/krisp_viva_vad.py b/src/pipecat/audio/vad/krisp_viva_vad.py
index 2bcc13ab2..42e787bc8 100644
--- a/src/pipecat/audio/vad/krisp_viva_vad.py
+++ b/src/pipecat/audio/vad/krisp_viva_vad.py
@@ -12,7 +12,6 @@ Supports 8kHz, 16kHz, 32kHz, 44.1kHz and 48kHz sample rates.
"""
import os
-from typing import Optional
import numpy as np
from loguru import logger
@@ -38,10 +37,10 @@ class KrispVivaVadAnalyzer(VADAnalyzer):
def __init__(
self,
*,
- model_path: Optional[str] = None,
+ model_path: str | None = None,
frame_duration: int = 10,
- sample_rate: Optional[int] = None,
- params: Optional[VADParams] = None,
+ sample_rate: int | None = None,
+ params: VADParams | None = None,
):
"""Initialize the Krisp VIVA VAD analyzer.
diff --git a/src/pipecat/audio/vad/silero.py b/src/pipecat/audio/vad/silero.py
index c15ba5b90..2b3d3d629 100644
--- a/src/pipecat/audio/vad/silero.py
+++ b/src/pipecat/audio/vad/silero.py
@@ -12,7 +12,6 @@ Supports 8kHz and 16kHz sample rates.
"""
import time
-from typing import Optional
import numpy as np
from loguru import logger
@@ -135,7 +134,7 @@ class SileroVADAnalyzer(VADAnalyzer):
with automatic model state management and periodic resets.
"""
- def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None):
+ def __init__(self, *, sample_rate: int | None = None, params: VADParams | None = None):
"""Initialize the Silero VAD analyzer.
Args:
diff --git a/src/pipecat/audio/vad/vad_analyzer.py b/src/pipecat/audio/vad/vad_analyzer.py
index c519b0861..32b879957 100644
--- a/src/pipecat/audio/vad/vad_analyzer.py
+++ b/src/pipecat/audio/vad/vad_analyzer.py
@@ -15,7 +15,6 @@ import asyncio
from abc import ABC, abstractmethod
from concurrent.futures import ThreadPoolExecutor
from enum import Enum
-from typing import Optional
from loguru import logger
from pydantic import BaseModel
@@ -68,7 +67,7 @@ class VADAnalyzer(ABC):
Subclasses must implement the core voice confidence calculation.
"""
- def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None):
+ def __init__(self, *, sample_rate: int | None = None, params: VADParams | None = None):
"""Initialize the VAD analyzer.
Args:
diff --git a/src/pipecat/audio/vad/vad_controller.py b/src/pipecat/audio/vad/vad_controller.py
index fefe3bec1..2f16a0c62 100644
--- a/src/pipecat/audio/vad/vad_controller.py
+++ b/src/pipecat/audio/vad/vad_controller.py
@@ -12,7 +12,6 @@ and emit events when speech starts, stops, or is actively detected.
import asyncio
import time
-from typing import Optional, Type
from loguru import logger
@@ -90,7 +89,7 @@ class VADController(BaseObject):
self._vad_analyzer = vad_analyzer
self._vad_state: VADState = VADState.QUIET
- self._task_manager: Optional[BaseTaskManager] = None
+ self._task_manager: BaseTaskManager | None = None
# Last time a on_speech_activity was triggered.
self._speech_activity_time = 0
@@ -102,7 +101,7 @@ class VADController(BaseObject):
# while in SPEAKING state (e.g. user mutes mic mid-speech).
self._last_audio_time: float = 0.0
self._audio_idle_timeout = audio_idle_timeout
- self._audio_idle_task: Optional[asyncio.Task] = None
+ self._audio_idle_task: asyncio.Task | None = None
self._register_event_handler("on_speech_started", sync=True)
self._register_event_handler("on_speech_stopped", sync=True)
@@ -234,7 +233,7 @@ class VADController(BaseObject):
"""
await self._call_event_handler("on_push_frame", frame, direction)
- async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs):
+ async def broadcast_frame(self, frame_cls: type[Frame], **kwargs):
"""Request a frame to be broadcast upstream and downstream.
This emits an on_broadcast_frame event that must be handled by a processor
diff --git a/src/pipecat/extensions/ivr/ivr_navigator.py b/src/pipecat/extensions/ivr/ivr_navigator.py
index a2ff0cde5..f6b36655b 100644
--- a/src/pipecat/extensions/ivr/ivr_navigator.py
+++ b/src/pipecat/extensions/ivr/ivr_navigator.py
@@ -11,7 +11,6 @@ using LLM-based decision making and DTMF tone generation.
"""
from enum import Enum
-from typing import List, Optional
from loguru import logger
@@ -72,7 +71,7 @@ class IVRProcessor(FrameProcessor):
*,
classifier_prompt: str,
ivr_prompt: str,
- ivr_vad_params: Optional[VADParams] = None,
+ ivr_vad_params: VADParams | None = None,
):
"""Initialize the IVR processor.
@@ -88,7 +87,7 @@ class IVRProcessor(FrameProcessor):
self._classifier_prompt = classifier_prompt
# Store saved context messages
- self._saved_messages: List[dict] = []
+ self._saved_messages: list[dict] = []
# XML pattern aggregation
self._aggregator = PatternPairAggregator()
@@ -98,7 +97,7 @@ class IVRProcessor(FrameProcessor):
self._register_event_handler("on_conversation_detected")
self._register_event_handler("on_ivr_status_changed")
- def update_saved_messages(self, messages: List[dict]) -> None:
+ def update_saved_messages(self, messages: list[dict]) -> None:
"""Update the saved context messages.
Sets the messages that are saved when switching between
@@ -109,7 +108,7 @@ class IVRProcessor(FrameProcessor):
"""
self._saved_messages = messages
- def _get_conversation_history(self) -> List[dict]:
+ def _get_conversation_history(self) -> list[dict]:
"""Get saved context messages without the system message.
Returns:
@@ -409,7 +408,7 @@ Remember: Respond with `NUMBER` (single or multiple for sequences),
*,
llm: LLMService,
ivr_prompt: str,
- ivr_vad_params: Optional[VADParams] = None,
+ ivr_vad_params: VADParams | None = None,
):
"""Initialize the IVR navigator.
diff --git a/src/pipecat/extensions/voicemail/voicemail_detector.py b/src/pipecat/extensions/voicemail/voicemail_detector.py
index 470f5dd54..3ab7f2f7d 100644
--- a/src/pipecat/extensions/voicemail/voicemail_detector.py
+++ b/src/pipecat/extensions/voicemail/voicemail_detector.py
@@ -16,7 +16,6 @@ Note:
"""
import asyncio
-from typing import List, Optional
from loguru import logger
@@ -71,7 +70,7 @@ class NotifierGate(FrameProcessor):
self._notifier = notifier
self._task_name = task_name
self._gate_opened = True
- self._gate_task: Optional[asyncio.Task] = None
+ self._gate_task: asyncio.Task | None = None
async def setup(self, setup: FrameProcessorSetup):
"""Set up the processor with required components.
@@ -143,7 +142,7 @@ class ClassifierGate(NotifierGate):
super().__init__(gate_notifier, task_name="classifier_gate")
self._conversation_notifier = conversation_notifier
self._conversation_detected = False
- self._conversation_task: Optional[asyncio.Task] = None
+ self._conversation_task: asyncio.Task | None = None
async def setup(self, setup: FrameProcessorSetup):
"""Set up the processor with required components.
@@ -267,7 +266,7 @@ class ClassificationProcessor(FrameProcessor):
# Voicemail timing state
self._voicemail_detected = False
- self._voicemail_task: Optional[asyncio.Task] = None
+ self._voicemail_task: asyncio.Task | None = None
self._voicemail_event = asyncio.Event()
self._voicemail_event.set()
@@ -390,7 +389,7 @@ class ClassificationProcessor(FrameProcessor):
self._voicemail_event.wait(), timeout=self._voicemail_response_delay
)
await asyncio.sleep(0.1)
- except asyncio.TimeoutError:
+ except TimeoutError:
await self._call_event_handler("on_voicemail_detected")
break
@@ -423,10 +422,10 @@ class TTSGate(FrameProcessor):
super().__init__()
self._conversation_notifier = conversation_notifier
self._voicemail_notifier = voicemail_notifier
- self._frame_buffer: List[tuple[Frame, FrameDirection]] = []
+ self._frame_buffer: list[tuple[Frame, FrameDirection]] = []
self._gating_active = True
- self._conversation_task: Optional[asyncio.Task] = None
- self._voicemail_task: Optional[asyncio.Task] = None
+ self._conversation_task: asyncio.Task | None = None
+ self._voicemail_task: asyncio.Task | None = None
async def setup(self, setup: FrameProcessorSetup):
"""Set up the processor with required components.
@@ -591,7 +590,7 @@ VOICEMAIL SYSTEM (respond "VOICEMAIL"):
*,
llm: LLMService,
voicemail_response_delay: float = 2.0,
- custom_system_prompt: Optional[str] = None,
+ custom_system_prompt: str | None = None,
):
"""Initialize the voicemail detector with classification and buffering components.
diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py
index 21780d6cb..7fd215caf 100644
--- a/src/pipecat/frames/frames.py
+++ b/src/pipecat/frames/frames.py
@@ -11,20 +11,16 @@ including data frames, system frames, and control frames for audio, video, text,
and LLM processing.
"""
+from __future__ import annotations
+
import time
+from collections.abc import Awaitable, Callable, Mapping, Sequence
from dataclasses import dataclass, field
from typing import (
TYPE_CHECKING,
Any,
- Awaitable,
- Callable,
- Dict,
- List,
Literal,
- Mapping,
Optional,
- Sequence,
- Tuple,
)
from pipecat.adapters.schemas.tools_schema import ToolsSchema
@@ -45,7 +41,7 @@ if TYPE_CHECKING:
from pipecat.utils.tracing.tracing_context import TracingContext
-def format_pts(pts: Optional[int]):
+def format_pts(pts: int | None):
"""Format presentation timestamp (PTS) in nanoseconds to a human-readable string.
Converts a PTS value in nanoseconds to a string representation.
@@ -77,20 +73,20 @@ class Frame:
id: int = field(init=False)
name: str = field(init=False)
- pts: Optional[int] = field(init=False)
- broadcast_sibling_id: Optional[int] = field(init=False)
- metadata: Dict[str, Any] = field(init=False)
- transport_source: Optional[str] = field(init=False)
- transport_destination: Optional[str] = field(init=False)
+ pts: int | None = field(init=False)
+ broadcast_sibling_id: int | None = field(init=False)
+ metadata: dict[str, Any] = field(init=False)
+ transport_source: str | None = field(init=False)
+ transport_destination: str | None = field(init=False)
def __post_init__(self):
self.id: int = obj_id()
self.name: str = f"{self.__class__.__name__}#{obj_count(self)}"
- self.pts: Optional[int] = None
- self.broadcast_sibling_id: Optional[int] = None
- self.metadata: Dict[str, Any] = {}
- self.transport_source: Optional[str] = None
- self.transport_destination: Optional[str] = None
+ self.pts: int | None = None
+ self.broadcast_sibling_id: int | None = None
+ self.metadata: dict[str, Any] = {}
+ self.transport_source: str | None = None
+ self.transport_destination: str | None = None
def __str__(self):
return self.name
@@ -183,8 +179,8 @@ class ImageRawFrame:
"""
image: bytes
- size: Tuple[int, int]
- format: Optional[str]
+ size: tuple[int, int]
+ format: str | None
#
@@ -242,7 +238,7 @@ class TTSAudioRawFrame(OutputAudioRawFrame):
context_id: Unique identifier for the TTS context that generated this audio.
"""
- context_id: Optional[str] = None
+ context_id: str | None = None
@dataclass
@@ -268,7 +264,7 @@ class URLImageRawFrame(OutputImageRawFrame):
url: URL where the image can be downloaded from.
"""
- url: Optional[str] = None
+ url: str | None = None
def __str__(self):
pts = format_pts(self.pts)
@@ -287,7 +283,7 @@ class SpriteFrame(DataFrame):
images: List of image frames that make up the sprite animation.
"""
- images: List[OutputImageRawFrame]
+ images: list[OutputImageRawFrame]
def __str__(self):
pts = format_pts(self.pts)
@@ -312,7 +308,7 @@ class TextFrame(DataFrame):
"""
text: str
- skip_tts: Optional[bool] = field(init=False)
+ skip_tts: bool | None = field(init=False)
# Whether any necessary inter-frame (leading/trailing) spaces are already
# included in the text.
# NOTE: Ideally this would be available at init time with a default value,
@@ -357,7 +353,7 @@ class AggregatedTextFrame(TextFrame):
"""
aggregated_by: AggregationType | str
- context_id: Optional[str] = None
+ context_id: str | None = None
@dataclass
@@ -375,7 +371,7 @@ class TTSTextFrame(AggregatedTextFrame):
context_id: Unique identifier for the TTS context that generated this text.
"""
- context_id: Optional[str] = None
+ context_id: str | None = None
@dataclass
@@ -396,8 +392,8 @@ class TranscriptionFrame(TextFrame):
user_id: str
timestamp: str
- language: Optional[Language] = None
- result: Optional[Any] = None
+ language: Language | None = None
+ result: Any | None = None
finalized: bool = False
def __str__(self):
@@ -422,8 +418,8 @@ class InterimTranscriptionFrame(TextFrame):
text: str
user_id: str
timestamp: str
- language: Optional[Language] = None
- result: Optional[Any] = None
+ language: Language | None = None
+ result: Any | None = None
def __str__(self):
return f"{self.name}(user: {self.user_id}, text: [{self.text}], language: {self.language}, timestamp: {self.timestamp})"
@@ -444,7 +440,7 @@ class TranslationFrame(TextFrame):
user_id: str
timestamp: str
- language: Optional[Language] = None
+ language: Language | None = None
def __str__(self):
return f"{self.name}(user: {self.user_id}, text: [{self.text}], language: {self.language}, timestamp: {self.timestamp})"
@@ -472,7 +468,7 @@ class LLMContextFrame(Frame):
context: The LLM context containing messages, tools, and configuration.
"""
- context: "LLMContext"
+ context: LLMContext
@dataclass
@@ -489,7 +485,7 @@ class LLMThoughtStartFrame(ControlFrame):
"""
append_to_context: bool = False
- llm: Optional[str] = None
+ llm: str | None = None
def __post_init__(self):
super().__post_init__()
@@ -567,8 +563,8 @@ class LLMMessagesAppendFrame(DataFrame):
run_llm: Whether the context update should be sent to the LLM.
"""
- messages: List[dict]
- run_llm: Optional[bool] = None
+ messages: list[dict]
+ run_llm: bool | None = None
@dataclass
@@ -583,8 +579,8 @@ class LLMMessagesUpdateFrame(DataFrame):
run_llm: Whether the context update should be sent to the LLM.
"""
- messages: List[dict]
- run_llm: Optional[bool] = None
+ messages: list[dict]
+ run_llm: bool | None = None
@dataclass
@@ -600,8 +596,8 @@ class LLMMessagesTransformFrame(DataFrame):
run_llm: Whether the context update should be sent to the LLM.
"""
- transform: Callable[[List["LLMContextMessage"]], List["LLMContextMessage"]]
- run_llm: Optional[bool] = None
+ transform: Callable[[list[LLMContextMessage]], list[LLMContextMessage]]
+ run_llm: bool | None = None
@dataclass
@@ -616,7 +612,7 @@ class LLMSetToolsFrame(DataFrame):
tools: List of tool/function definitions for the LLM.
"""
- tools: List[dict] | ToolsSchema | "NotGiven"
+ tools: list[dict] | ToolsSchema | NotGiven
@dataclass
@@ -668,8 +664,8 @@ class FunctionCallResultProperties:
Only meaningful for async function calls (``cancel_on_interruption=False``).
"""
- run_llm: Optional[bool] = None
- on_context_updated: Optional[Callable[[], Awaitable[None]]] = None
+ run_llm: bool | None = None
+ on_context_updated: Callable[[], Awaitable[None]] | None = None
is_final: bool = True
@@ -694,8 +690,8 @@ class FunctionCallResultFrame(DataFrame, UninterruptibleFrame):
tool_call_id: str
arguments: Any
result: Any
- run_llm: Optional[bool] = None
- properties: Optional[FunctionCallResultProperties] = None
+ run_llm: bool | None = None
+ properties: FunctionCallResultProperties | None = None
@dataclass
@@ -711,7 +707,7 @@ class TTSSpeakFrame(DataFrame):
"""
text: str
- append_to_context: Optional[bool] = None
+ append_to_context: bool | None = None
@dataclass
@@ -752,8 +748,8 @@ class OutputDTMFFrame(DTMFFrame, DataFrame):
:meth:`from_string` to build this from a string like ``"123#"``.
"""
- button: Optional[KeypadEntry] = None
- buttons: Optional[List[KeypadEntry]] = None
+ button: KeypadEntry | None = None
+ buttons: list[KeypadEntry] | None = None
def __post_init__(self):
super().__post_init__()
@@ -766,7 +762,7 @@ class OutputDTMFFrame(DTMFFrame, DataFrame):
return f"{self.name}(buttons: {self.to_string()})"
@classmethod
- def from_string(cls, buttons: str, **kwargs) -> "OutputDTMFFrame":
+ def from_string(cls, buttons: str, **kwargs) -> OutputDTMFFrame:
"""Build an ``OutputDTMFFrame`` from a string of DTMF characters.
Args:
@@ -820,7 +816,7 @@ class StartFrame(SystemFrame):
enable_tracing: bool = False
enable_usage_metrics: bool = False
report_only_initial_ttfb: bool = False
- tracing_context: Optional["TracingContext"] = None
+ tracing_context: TracingContext | None = None
@dataclass
@@ -834,7 +830,7 @@ class CancelFrame(SystemFrame):
reason: Optional reason for pushing a cancel frame.
"""
- reason: Optional[Any] = None
+ reason: Any | None = None
def __str__(self):
return f"{self.name}(reason: {self.reason})"
@@ -857,8 +853,8 @@ class ErrorFrame(SystemFrame):
error: str
fatal: bool = False
- processor: Optional["FrameProcessor"] = None
- exception: Optional[Exception] = None
+ processor: FrameProcessor | None = None
+ exception: Exception | None = None
def __str__(self):
return f"{self.name}(error: {self.error}, fatal: {self.fatal})"
@@ -891,7 +887,7 @@ class FrameProcessorPauseUrgentFrame(SystemFrame):
processor: The frame processor to pause.
"""
- processor: "FrameProcessor"
+ processor: FrameProcessor
@dataclass
@@ -906,7 +902,7 @@ class FrameProcessorResumeUrgentFrame(SystemFrame):
processor: The frame processor to resume.
"""
- processor: "FrameProcessor"
+ processor: FrameProcessor
@dataclass
@@ -1050,7 +1046,7 @@ class MetricsFrame(SystemFrame):
data: List of metrics data collected by the processor.
"""
- data: List[MetricsData]
+ data: list[MetricsData]
@dataclass
@@ -1156,12 +1152,12 @@ class UserImageRequestFrame(SystemFrame):
"""
user_id: str
- text: Optional[str] = None
- append_to_context: Optional[bool] = None
- video_source: Optional[str] = None
- function_name: Optional[str] = None
- tool_call_id: Optional[str] = None
- result_callback: Optional[Any] = None
+ text: str | None = None
+ append_to_context: bool | None = None
+ video_source: str | None = None
+ function_name: str | None = None
+ tool_call_id: str | None = None
+ result_callback: Any | None = None
def __str__(self):
return f"{self.name}(user: {self.user_id}, text: {self.text}, append_to_context: {self.append_to_context}, {self.video_source})"
@@ -1244,9 +1240,9 @@ class UserImageRawFrame(InputImageRawFrame):
"""
user_id: str = ""
- text: Optional[str] = None
- append_to_context: Optional[bool] = None
- request: Optional[UserImageRequestFrame] = None
+ text: str | None = None
+ append_to_context: bool | None = None
+ request: UserImageRequestFrame | None = None
def __str__(self):
pts = format_pts(self.pts)
@@ -1266,8 +1262,8 @@ class AssistantImageRawFrame(OutputImageRawFrame):
original_mime_type: The MIME type of the original image data.
"""
- original_data: Optional[bytes] = None
- original_mime_type: Optional[str] = None
+ original_data: bytes | None = None
+ original_mime_type: str | None = None
@dataclass
@@ -1296,8 +1292,8 @@ class OutputDTMFUrgentFrame(DTMFFrame, SystemFrame):
:meth:`from_string` to build this from a string like ``"123#"``.
"""
- button: Optional[KeypadEntry] = None
- buttons: Optional[List[KeypadEntry]] = None
+ button: KeypadEntry | None = None
+ buttons: list[KeypadEntry] | None = None
def __post_init__(self):
super().__post_init__()
@@ -1310,7 +1306,7 @@ class OutputDTMFUrgentFrame(DTMFFrame, SystemFrame):
return f"{self.name}(buttons: {self.to_string()})"
@classmethod
- def from_string(cls, buttons: str, **kwargs) -> "OutputDTMFUrgentFrame":
+ def from_string(cls, buttons: str, **kwargs) -> OutputDTMFUrgentFrame:
"""Build an ``OutputDTMFUrgentFrame`` from a string of DTMF characters.
Args:
@@ -1349,8 +1345,8 @@ class SpeechControlParamsFrame(SystemFrame):
turn_params: Current turn-taking analysis parameters.
"""
- vad_params: Optional[VADParams] = None
- turn_params: Optional[BaseTurnParams] = None
+ vad_params: VADParams | None = None
+ turn_params: BaseTurnParams | None = None
@dataclass
@@ -1396,7 +1392,7 @@ class ServiceSwitcherRequestMetadataFrame(ControlFrame):
service: The target service that should re-emit its metadata.
"""
- service: "FrameProcessor"
+ service: FrameProcessor
#
@@ -1444,7 +1440,7 @@ class EndTaskFrame(TaskFrame, UninterruptibleFrame):
reason: Optional reason for pushing an end frame.
"""
- reason: Optional[Any] = None
+ reason: Any | None = None
def __str__(self):
return f"{self.name}(reason: {self.reason})"
@@ -1475,7 +1471,7 @@ class CancelTaskFrame(TaskSystemFrame):
reason: Optional reason for pushing a cancel frame.
"""
- reason: Optional[Any] = None
+ reason: Any | None = None
def __str__(self):
return f"{self.name}(reason: {self.reason})"
@@ -1516,7 +1512,7 @@ class EndFrame(ControlFrame, UninterruptibleFrame):
reason: Optional reason for pushing an end frame.
"""
- reason: Optional[Any] = None
+ reason: Any | None = None
def __str__(self):
return f"{self.name}(reason: {self.reason})"
@@ -1598,7 +1594,7 @@ class FrameProcessorPauseFrame(ControlFrame):
processor: The frame processor to pause.
"""
- processor: "FrameProcessor"
+ processor: FrameProcessor
@dataclass
@@ -1613,7 +1609,7 @@ class FrameProcessorResumeFrame(ControlFrame):
processor: The frame processor to resume.
"""
- processor: "FrameProcessor"
+ processor: FrameProcessor
@dataclass
@@ -1624,7 +1620,7 @@ class LLMFullResponseStartFrame(ControlFrame):
more TextFrames and a final LLMFullResponseEndFrame.
"""
- skip_tts: Optional[bool] = field(init=False)
+ skip_tts: bool | None = field(init=False)
def __post_init__(self):
super().__post_init__()
@@ -1635,7 +1631,7 @@ class LLMFullResponseStartFrame(ControlFrame):
class LLMFullResponseEndFrame(ControlFrame):
"""Frame indicating the end of an LLM response."""
- skip_tts: Optional[bool] = field(init=False)
+ skip_tts: bool | None = field(init=False)
def __post_init__(self):
super().__post_init__()
@@ -1665,7 +1661,7 @@ class LLMSummarizeContextFrame(ControlFrame):
is used.
"""
- config: Optional["LLMContextSummaryConfig"] = None
+ config: LLMContextSummaryConfig | None = None
@dataclass
@@ -1692,11 +1688,11 @@ class LLMContextSummaryRequestFrame(ControlFrame):
"""
request_id: str
- context: "LLMContext"
+ context: LLMContext
min_messages_to_keep: int
target_context_tokens: int
summarization_prompt: str
- summarization_timeout: Optional[float] = None
+ summarization_timeout: float | None = None
@dataclass
@@ -1718,7 +1714,7 @@ class LLMContextSummaryResultFrame(ControlFrame, UninterruptibleFrame):
request_id: str
summary: str
last_summarized_index: int
- error: Optional[str] = None
+ error: str | None = None
@dataclass
@@ -1745,7 +1741,7 @@ class FunctionCallInProgressFrame(ControlFrame, UninterruptibleFrame):
tool_call_id: str
arguments: Any
cancel_on_interruption: bool = False
- group_id: Optional[str] = None
+ group_id: str | None = None
@dataclass
@@ -1781,7 +1777,7 @@ class TTSStartedFrame(ControlFrame):
context_id: Unique identifier for this TTS context.
"""
- context_id: Optional[str] = None
+ context_id: str | None = None
@dataclass
@@ -1792,7 +1788,7 @@ class TTSStoppedFrame(ControlFrame):
context_id: Unique identifier for this TTS context.
"""
- context_id: Optional[str] = None
+ context_id: str | None = None
@dataclass
@@ -1817,8 +1813,8 @@ class ServiceUpdateSettingsFrame(ControlFrame, UninterruptibleFrame):
"""
settings: Mapping[str, Any] = field(default_factory=dict)
- delta: Optional["ServiceSettings"] = None
- service: Optional["FrameProcessor"] = None
+ delta: ServiceSettings | None = None
+ service: FrameProcessor | None = None
@dataclass
@@ -1942,4 +1938,4 @@ class ManuallySwitchServiceFrame(ServiceSwitcherFrame):
Handled by ServiceSwitcherStrategyManual to switch the active service.
"""
- service: "FrameProcessor"
+ service: FrameProcessor
diff --git a/src/pipecat/metrics/metrics.py b/src/pipecat/metrics/metrics.py
index 2030306e5..5d0dbddc8 100644
--- a/src/pipecat/metrics/metrics.py
+++ b/src/pipecat/metrics/metrics.py
@@ -11,8 +11,6 @@ collected throughout the pipeline, including timing, token usage, and
processing statistics.
"""
-from typing import Optional
-
from pydantic import BaseModel
@@ -25,7 +23,7 @@ class MetricsData(BaseModel):
"""
processor: str
- model: Optional[str] = None
+ model: str | None = None
class TTFBMetricsData(MetricsData):
@@ -62,9 +60,9 @@ class LLMTokenUsage(BaseModel):
prompt_tokens: int
completion_tokens: int
total_tokens: int
- cache_read_input_tokens: Optional[int] = None
- cache_creation_input_tokens: Optional[int] = None
- reasoning_tokens: Optional[int] = None
+ cache_read_input_tokens: int | None = None
+ cache_creation_input_tokens: int | None = None
+ reasoning_tokens: int | None = None
class LLMUsageMetricsData(MetricsData):
diff --git a/src/pipecat/observers/base_observer.py b/src/pipecat/observers/base_observer.py
index 70c79224a..69d81f0fe 100644
--- a/src/pipecat/observers/base_observer.py
+++ b/src/pipecat/observers/base_observer.py
@@ -12,8 +12,7 @@ for logging, debugging, analytics, and monitoring pipeline behavior.
"""
from dataclasses import dataclass
-
-from typing_extensions import TYPE_CHECKING
+from typing import TYPE_CHECKING
from pipecat.frames.frames import Frame
from pipecat.utils.base_object import BaseObject
diff --git a/src/pipecat/observers/loggers/debug_log_observer.py b/src/pipecat/observers/loggers/debug_log_observer.py
index c5704a33b..1267dd0bf 100644
--- a/src/pipecat/observers/loggers/debug_log_observer.py
+++ b/src/pipecat/observers/loggers/debug_log_observer.py
@@ -13,7 +13,6 @@ understanding frame flow between processors.
from dataclasses import fields, is_dataclass
from enum import Enum, auto
-from typing import Dict, Optional, Set, Tuple, Type, Union
from loguru import logger
@@ -75,10 +74,10 @@ class DebugLogObserver(BaseObserver):
def __init__(
self,
- frame_types: Optional[
- Union[Tuple[Type[Frame], ...], Dict[Type[Frame], Optional[Tuple[Type, FrameEndpoint]]]]
- ] = None,
- exclude_fields: Optional[Set[str]] = None,
+ frame_types: tuple[type[Frame], ...]
+ | dict[type[Frame], tuple[type, FrameEndpoint] | None]
+ | None = None,
+ exclude_fields: set[str] | None = None,
**kwargs,
):
"""Initialize the debug log observer.
diff --git a/src/pipecat/observers/loggers/metrics_log_observer.py b/src/pipecat/observers/loggers/metrics_log_observer.py
index 7f4c1635c..8b3072894 100644
--- a/src/pipecat/observers/loggers/metrics_log_observer.py
+++ b/src/pipecat/observers/loggers/metrics_log_observer.py
@@ -11,8 +11,6 @@ allowing developers to monitor performance metrics, token usage, and other
statistics in real-time.
"""
-from typing import Optional, Set, Type
-
from loguru import logger
from pipecat.frames.frames import MetricsFrame
@@ -60,7 +58,7 @@ class MetricsLogObserver(BaseObserver):
def __init__(
self,
- include_metrics: Optional[Set[Type[MetricsData]]] = None,
+ include_metrics: set[type[MetricsData]] | None = None,
**kwargs,
):
"""Initialize the metrics log observer.
diff --git a/src/pipecat/observers/startup_timing_observer.py b/src/pipecat/observers/startup_timing_observer.py
index a1ea04d47..6c6eb8204 100644
--- a/src/pipecat/observers/startup_timing_observer.py
+++ b/src/pipecat/observers/startup_timing_observer.py
@@ -36,7 +36,6 @@ Example::
import time
from dataclasses import dataclass
-from typing import Dict, List, Optional, Tuple, Type
from pydantic import BaseModel, Field
@@ -84,7 +83,7 @@ class StartupTimingReport(BaseModel):
start_time: float
total_duration_secs: float
- processor_timings: List[ProcessorStartupTiming] = Field(default_factory=list)
+ processor_timings: list[ProcessorStartupTiming] = Field(default_factory=list)
class TransportTimingReport(BaseModel):
@@ -98,8 +97,8 @@ class TransportTimingReport(BaseModel):
"""
start_time: float
- bot_connected_secs: Optional[float] = None
- client_connected_secs: Optional[float] = None
+ bot_connected_secs: float | None = None
+ client_connected_secs: float | None = None
class StartupTimingObserver(BaseObserver):
@@ -157,7 +156,7 @@ class StartupTimingObserver(BaseObserver):
def __init__(
self,
*,
- processor_types: Optional[Tuple[Type[FrameProcessor], ...]] = None,
+ processor_types: tuple[type[FrameProcessor], ...] | None = None,
**kwargs,
):
"""Initialize the startup timing observer.
@@ -171,13 +170,13 @@ class StartupTimingObserver(BaseObserver):
self._processor_types = processor_types
# Map processor ID -> arrival info.
- self._arrivals: Dict[int, _ArrivalInfo] = {}
+ self._arrivals: dict[int, _ArrivalInfo] = {}
# Collected timings in pipeline order.
- self._timings: List[ProcessorStartupTiming] = []
+ self._timings: list[ProcessorStartupTiming] = []
# Lock onto the first StartFrame we see (by frame ID).
- self._start_frame_id: Optional[str] = None
+ self._start_frame_id: str | None = None
# Whether we've already emitted the startup timing report.
self._startup_timing_reported = False
@@ -186,13 +185,13 @@ class StartupTimingObserver(BaseObserver):
self._transport_timing_reported = False
# Timestamp (ns) when we first see a StartFrame arrive at a processor.
- self._start_frame_arrival_ns: Optional[int] = None
+ self._start_frame_arrival_ns: int | None = None
# Bot connected timing (stored for inclusion in the transport report).
- self._bot_connected_secs: Optional[float] = None
+ self._bot_connected_secs: float | None = None
# Wall clock time when the StartFrame was first seen.
- self._start_wall_clock: Optional[float] = None
+ self._start_wall_clock: float | None = None
self._register_event_handler("on_startup_timing_report")
self._register_event_handler("on_transport_timing_report")
diff --git a/src/pipecat/observers/user_bot_latency_observer.py b/src/pipecat/observers/user_bot_latency_observer.py
index 0672b689c..5e4084406 100644
--- a/src/pipecat/observers/user_bot_latency_observer.py
+++ b/src/pipecat/observers/user_bot_latency_observer.py
@@ -14,7 +14,6 @@ is measured. Optionally collects per-service latency breakdown metrics
import time
from collections import deque
-from typing import Dict, List, Optional
from pydantic import BaseModel, Field
@@ -48,7 +47,7 @@ class TTFBBreakdownMetrics(BaseModel):
"""
processor: str
- model: Optional[str] = None
+ model: str | None = None
start_time: float
duration_secs: float
@@ -105,13 +104,13 @@ class LatencyBreakdown(BaseModel):
this cycle. Empty if no function calls occurred.
"""
- ttfb: List[TTFBBreakdownMetrics] = Field(default_factory=list)
- text_aggregation: Optional[TextAggregationBreakdownMetrics] = None
- user_turn_start_time: Optional[float] = None
- user_turn_secs: Optional[float] = None
- function_calls: List[FunctionCallMetrics] = Field(default_factory=list)
+ ttfb: list[TTFBBreakdownMetrics] = Field(default_factory=list)
+ text_aggregation: TextAggregationBreakdownMetrics | None = None
+ user_turn_start_time: float | None = None
+ user_turn_secs: float | None = None
+ function_calls: list[FunctionCallMetrics] = Field(default_factory=list)
- def chronological_events(self) -> List[str]:
+ def chronological_events(self) -> list[str]:
"""Return human-readable event labels sorted by start time.
Collects all sub-metrics into a flat list, sorts by ``start_time``,
@@ -120,7 +119,7 @@ class LatencyBreakdown(BaseModel):
Returns:
List of formatted strings, one per event, in chronological order.
"""
- events: List[tuple] = []
+ events: list[tuple] = []
if self.user_turn_start_time is not None and self.user_turn_secs is not None:
events.append((self.user_turn_start_time, f"User turn: {self.user_turn_secs:.3f}s"))
@@ -181,12 +180,12 @@ class UserBotLatencyObserver(BaseObserver):
**kwargs: Additional arguments passed to parent class.
"""
super().__init__(**kwargs)
- self._user_stopped_time: Optional[float] = None
- self._user_turn_start_time: Optional[float] = None
- self._user_turn: Optional[float] = None
+ self._user_stopped_time: float | None = None
+ self._user_turn_start_time: float | None = None
+ self._user_turn: float | None = None
# First bot speech tracking
- self._client_connected_time: Optional[float] = None
+ self._client_connected_time: float | None = None
self._first_bot_speech_measured: bool = False
# Frame deduplication (bounded deque + set pattern)
@@ -194,10 +193,10 @@ class UserBotLatencyObserver(BaseObserver):
self._frame_history: deque = deque(maxlen=max_frames)
# Per-cycle metric accumulators
- self._ttfb: List[TTFBBreakdownMetrics] = []
- self._text_aggregation: Optional[TextAggregationBreakdownMetrics] = None
- self._function_call_starts: Dict[str, tuple[str, float]] = {}
- self._function_call_metrics: List[FunctionCallMetrics] = []
+ self._ttfb: list[TTFBBreakdownMetrics] = []
+ self._text_aggregation: TextAggregationBreakdownMetrics | None = None
+ self._function_call_starts: dict[str, tuple[str, float]] = {}
+ self._function_call_metrics: list[FunctionCallMetrics] = []
self._register_event_handler("on_latency_measured")
self._register_event_handler("on_latency_breakdown")
diff --git a/src/pipecat/pipeline/base_task.py b/src/pipecat/pipeline/base_task.py
index 788342482..b5ba06645 100644
--- a/src/pipecat/pipeline/base_task.py
+++ b/src/pipecat/pipeline/base_task.py
@@ -12,8 +12,8 @@ tasks that manage the lifecycle and execution of frame processing pipelines.
import asyncio
from abc import abstractmethod
+from collections.abc import AsyncIterable, Iterable
from dataclasses import dataclass
-from typing import AsyncIterable, Iterable
from pipecat.frames.frames import Frame
from pipecat.utils.base_object import BaseObject
diff --git a/src/pipecat/pipeline/llm_switcher.py b/src/pipecat/pipeline/llm_switcher.py
index d65d50d08..71a7c7974 100644
--- a/src/pipecat/pipeline/llm_switcher.py
+++ b/src/pipecat/pipeline/llm_switcher.py
@@ -6,7 +6,7 @@
"""LLM switcher for switching between different LLMs at runtime, with different switching strategies."""
-from typing import Any, List, Optional, Type
+from typing import Any
from pipecat.adapters.schemas.direct_function import DirectFunction
from pipecat.pipeline.service_switcher import (
@@ -28,8 +28,8 @@ class LLMSwitcher(ServiceSwitcher[StrategyType]):
def __init__(
self,
- llms: List[LLMService],
- strategy_type: Type[StrategyType] = ServiceSwitcherStrategyManual,
+ llms: list[LLMService],
+ strategy_type: type[StrategyType] = ServiceSwitcherStrategyManual,
):
"""Initialize the service switcher with a list of LLMs and a switching strategy.
@@ -41,7 +41,7 @@ class LLMSwitcher(ServiceSwitcher[StrategyType]):
super().__init__(llms, strategy_type)
@property
- def llms(self) -> List[LLMService]:
+ def llms(self) -> list[LLMService]:
"""Get the list of LLMs managed by this switcher.
Returns:
@@ -58,7 +58,7 @@ class LLMSwitcher(ServiceSwitcher[StrategyType]):
"""
return self.strategy.active_service
- async def run_inference(self, context: LLMContext, **kwargs) -> Optional[str]:
+ async def run_inference(self, context: LLMContext, **kwargs) -> str | None:
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context, using the currently active LLM.
Args:
@@ -75,11 +75,11 @@ class LLMSwitcher(ServiceSwitcher[StrategyType]):
def register_function(
self,
- function_name: Optional[str],
+ function_name: str | None,
handler: Any,
*,
cancel_on_interruption: bool = True,
- timeout_secs: Optional[float] = None,
+ timeout_secs: float | None = None,
):
"""Register a function handler for LLM function calls, on all LLMs, active or not.
@@ -105,7 +105,7 @@ class LLMSwitcher(ServiceSwitcher[StrategyType]):
handler: DirectFunction,
*,
cancel_on_interruption: bool = True,
- timeout_secs: Optional[float] = None,
+ timeout_secs: float | None = None,
):
"""Register a direct function handler for LLM function calls, on all LLMs, active or not.
diff --git a/src/pipecat/pipeline/parallel_pipeline.py b/src/pipecat/pipeline/parallel_pipeline.py
index 1e2e03a8f..d92fdada9 100644
--- a/src/pipecat/pipeline/parallel_pipeline.py
+++ b/src/pipecat/pipeline/parallel_pipeline.py
@@ -12,7 +12,6 @@ handling of pipeline lifecycle events.
"""
from itertools import chain
-from typing import Dict, List
from loguru import logger
@@ -51,7 +50,7 @@ class ParallelPipeline(BasePipeline):
self._pipelines = []
self._seen_ids = set()
- self._frame_counter: Dict[int, int] = {}
+ self._frame_counter: dict[int, int] = {}
self._synchronizing: bool = False
self._buffered_frames: list[tuple[Frame, FrameDirection]] = []
@@ -93,7 +92,7 @@ class ParallelPipeline(BasePipeline):
return self._pipelines
@property
- def entry_processors(self) -> List["FrameProcessor"]:
+ def entry_processors(self) -> list["FrameProcessor"]:
"""Return the list of entry processors for this processor.
Entry processors are the first processors in a compound processor
@@ -106,7 +105,7 @@ class ParallelPipeline(BasePipeline):
"""
return self._pipelines
- def processors_with_metrics(self) -> List[FrameProcessor]:
+ def processors_with_metrics(self) -> list[FrameProcessor]:
"""Collect processors that can generate metrics from all parallel branches.
Returns:
diff --git a/src/pipecat/pipeline/pipeline.py b/src/pipecat/pipeline/pipeline.py
index 9114b9b09..325cedb82 100644
--- a/src/pipecat/pipeline/pipeline.py
+++ b/src/pipecat/pipeline/pipeline.py
@@ -11,7 +11,7 @@ in sequence and manages frame flow between them, along with helper classes
for pipeline source and sink operations.
"""
-from typing import Callable, Coroutine, List, Optional
+from collections.abc import Callable, Coroutine
from pipecat.frames.frames import Frame
from pipecat.pipeline.base_pipeline import BasePipeline
@@ -98,10 +98,10 @@ class Pipeline(BasePipeline):
def __init__(
self,
- processors: List[FrameProcessor],
+ processors: list[FrameProcessor],
*,
- source: Optional[FrameProcessor] = None,
- sink: Optional[FrameProcessor] = None,
+ source: FrameProcessor | None = None,
+ sink: FrameProcessor | None = None,
):
"""Initialize the pipeline with a list of processors.
@@ -116,7 +116,7 @@ class Pipeline(BasePipeline):
# downstream outside of the pipeline.
self._source = source or PipelineSource(self.push_frame, name=f"{self}::Source")
self._sink = sink or PipelineSink(self.push_frame, name=f"{self}::Sink")
- self._processors: List[FrameProcessor] = [self._source] + processors + [self._sink]
+ self._processors: list[FrameProcessor] = [self._source] + processors + [self._sink]
self._link_processors()
@@ -137,7 +137,7 @@ class Pipeline(BasePipeline):
return self._processors
@property
- def entry_processors(self) -> List["FrameProcessor"]:
+ def entry_processors(self) -> list["FrameProcessor"]:
"""Return the list of entry processors for this processor.
Entry processors are the first processors in a compound processor
diff --git a/src/pipecat/pipeline/runner.py b/src/pipecat/pipeline/runner.py
index b584377c6..db1767866 100644
--- a/src/pipecat/pipeline/runner.py
+++ b/src/pipecat/pipeline/runner.py
@@ -14,7 +14,6 @@ management.
import asyncio
import gc
import signal
-from typing import Optional
from loguru import logger
@@ -34,11 +33,11 @@ class PipelineRunner(BaseObject):
def __init__(
self,
*,
- name: Optional[str] = None,
+ name: str | None = None,
handle_sigint: bool = True,
handle_sigterm: bool = False,
force_gc: bool = False,
- loop: Optional[asyncio.AbstractEventLoop] = None,
+ loop: asyncio.AbstractEventLoop | None = None,
):
"""Initialize the pipeline runner.
diff --git a/src/pipecat/pipeline/service_switcher.py b/src/pipecat/pipeline/service_switcher.py
index 9d2e2a56e..a7f98f9b1 100644
--- a/src/pipecat/pipeline/service_switcher.py
+++ b/src/pipecat/pipeline/service_switcher.py
@@ -6,7 +6,7 @@
"""Service switcher for switching between different services at runtime, with different switching strategies."""
-from typing import Any, Generic, List, Optional, Type, TypeVar
+from typing import Any, Generic, TypeVar
from loguru import logger
@@ -42,7 +42,7 @@ class ServiceSwitcherStrategy(BaseObject):
...
"""
- def __init__(self, services: List[FrameProcessor]):
+ def __init__(self, services: list[FrameProcessor]):
"""Initialize the service switcher strategy with a list of services.
Note:
@@ -62,7 +62,7 @@ class ServiceSwitcherStrategy(BaseObject):
self._register_event_handler("on_service_switched")
@property
- def services(self) -> List[FrameProcessor]:
+ def services(self) -> list[FrameProcessor]:
"""Return the list of available services."""
return self._services
@@ -73,7 +73,7 @@ class ServiceSwitcherStrategy(BaseObject):
async def handle_frame(
self, frame: ServiceSwitcherFrame, direction: FrameDirection
- ) -> Optional[FrameProcessor]:
+ ) -> FrameProcessor | None:
"""Handle a frame that controls service switching.
The base implementation returns ``None`` for all frames. Subclasses
@@ -88,7 +88,7 @@ class ServiceSwitcherStrategy(BaseObject):
"""
return None
- async def handle_error(self, error: ErrorFrame) -> Optional[FrameProcessor]:
+ async def handle_error(self, error: ErrorFrame) -> FrameProcessor | None:
"""Handle an error from the active service.
Called by ``ServiceSwitcher`` when a non-fatal ``ErrorFrame`` is pushed
@@ -103,7 +103,7 @@ class ServiceSwitcherStrategy(BaseObject):
"""
return None
- async def _set_active_if_available(self, service: FrameProcessor) -> Optional[FrameProcessor]:
+ async def _set_active_if_available(self, service: FrameProcessor) -> FrameProcessor | None:
"""Set the active service to the given one, if it is in the list of available services.
If it's not in the list, the request is ignored, as it may have been
@@ -139,7 +139,7 @@ class ServiceSwitcherStrategyManual(ServiceSwitcherStrategy):
async def handle_frame(
self, frame: ServiceSwitcherFrame, direction: FrameDirection
- ) -> Optional[FrameProcessor]:
+ ) -> FrameProcessor | None:
"""Handle a frame that controls service switching.
Args:
@@ -179,7 +179,7 @@ class ServiceSwitcherStrategyFailover(ServiceSwitcherStrategyManual):
...
"""
- async def handle_error(self, error: ErrorFrame) -> Optional[FrameProcessor]:
+ async def handle_error(self, error: ErrorFrame) -> FrameProcessor | None:
"""Handle an error from the active service by failing over.
Switches to the next service in the list. The failed service remains
@@ -223,8 +223,8 @@ class ServiceSwitcher(ParallelPipeline, Generic[StrategyType]):
def __init__(
self,
- services: List[FrameProcessor],
- strategy_type: Type[StrategyType] = ServiceSwitcherStrategyManual,
+ services: list[FrameProcessor],
+ strategy_type: type[StrategyType] = ServiceSwitcherStrategyManual,
):
"""Initialize the service switcher with a list of services and a switching strategy.
@@ -244,14 +244,14 @@ class ServiceSwitcher(ParallelPipeline, Generic[StrategyType]):
return self._strategy
@property
- def services(self) -> List[FrameProcessor]:
+ def services(self) -> list[FrameProcessor]:
"""Return the list of available services."""
return self._services
@staticmethod
def _make_pipeline_definitions(
- services: List[FrameProcessor], strategy: ServiceSwitcherStrategy
- ) -> List[Any]:
+ services: list[FrameProcessor], strategy: ServiceSwitcherStrategy
+ ) -> list[Any]:
pipelines = []
for service in services:
pipelines.append(ServiceSwitcher._make_pipeline_definition(service, strategy))
diff --git a/src/pipecat/pipeline/sync_parallel_pipeline.py b/src/pipecat/pipeline/sync_parallel_pipeline.py
index 74cfdfdb9..b265e9eb3 100644
--- a/src/pipecat/pipeline/sync_parallel_pipeline.py
+++ b/src/pipecat/pipeline/sync_parallel_pipeline.py
@@ -20,7 +20,6 @@ import asyncio
from dataclasses import dataclass
from enum import Enum
from itertools import chain
-from typing import List
from loguru import logger
@@ -215,7 +214,7 @@ class SyncParallelPipeline(BasePipeline):
return self._pipelines
@property
- def entry_processors(self) -> List["FrameProcessor"]:
+ def entry_processors(self) -> list["FrameProcessor"]:
"""Return the list of entry processors for this processor.
Entry processors are the first processors in a compound processor
@@ -228,7 +227,7 @@ class SyncParallelPipeline(BasePipeline):
"""
return [s["processor"] for s in self._sources]
- def processors_with_metrics(self) -> List[FrameProcessor]:
+ def processors_with_metrics(self) -> list[FrameProcessor]:
"""Collect processors that can generate metrics from all parallel pipelines.
Returns:
diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py
index 1f9de78a6..394b3d2e2 100644
--- a/src/pipecat/pipeline/task.py
+++ b/src/pipecat/pipeline/task.py
@@ -14,8 +14,9 @@ including heartbeats, idle detection, and observer integration.
import asyncio
import importlib.util
import os
+from collections.abc import AsyncIterable, Iterable
from pathlib import Path
-from typing import Any, AsyncIterable, Dict, Iterable, List, Optional, Set, Tuple, Type, TypeVar
+from typing import Any, TypeVar
from loguru import logger
from pydantic import BaseModel, ConfigDict, Field
@@ -74,7 +75,7 @@ class IdleFrameObserver(BaseObserver):
"""
- def __init__(self, *, idle_event: asyncio.Event, idle_timeout_frames: Tuple[Type[Frame], ...]):
+ def __init__(self, *, idle_event: asyncio.Event, idle_timeout_frames: tuple[type[Frame], ...]):
"""Initialize the observer.
Args:
@@ -134,7 +135,7 @@ class PipelineParams(BaseModel):
heartbeats_monitor_secs: float = HEARTBEAT_MONITOR_SECS
report_only_initial_ttfb: bool = False
send_initial_empty_metrics: bool = True
- start_metadata: Dict[str, Any] = Field(default_factory=dict)
+ start_metadata: dict[str, Any] = Field(default_factory=dict)
class PipelineTask(BasePipelineTask):
@@ -190,22 +191,22 @@ class PipelineTask(BasePipelineTask):
self,
pipeline: BasePipeline,
*,
- params: Optional[PipelineParams] = None,
- additional_span_attributes: Optional[dict] = None,
+ params: PipelineParams | None = None,
+ additional_span_attributes: dict | None = None,
cancel_on_idle_timeout: bool = True,
cancel_timeout_secs: float = CANCEL_TIMEOUT_SECS,
check_dangling_tasks: bool = True,
- clock: Optional[BaseClock] = None,
- conversation_id: Optional[str] = None,
+ clock: BaseClock | None = None,
+ conversation_id: str | None = None,
enable_tracing: bool = False,
enable_turn_tracking: bool = True,
enable_rtvi: bool = True,
- idle_timeout_frames: Tuple[Type[Frame], ...] = (BotSpeakingFrame, UserSpeakingFrame),
- idle_timeout_secs: Optional[float] = IDLE_TIMEOUT_SECS,
- observers: Optional[List[BaseObserver]] = None,
- rtvi_processor: Optional[RTVIProcessor] = None,
- rtvi_observer_params: Optional[RTVIObserverParams] = None,
- task_manager: Optional[BaseTaskManager] = None,
+ idle_timeout_frames: tuple[type[Frame], ...] = (BotSpeakingFrame, UserSpeakingFrame),
+ idle_timeout_secs: float | None = IDLE_TIMEOUT_SECS,
+ observers: list[BaseObserver] | None = None,
+ rtvi_processor: RTVIProcessor | None = None,
+ rtvi_observer_params: RTVIObserverParams | None = None,
+ task_manager: BaseTaskManager | None = None,
):
"""Initialize the PipelineTask.
@@ -246,10 +247,10 @@ class PipelineTask(BasePipelineTask):
self._enable_turn_tracking = enable_turn_tracking
self._idle_timeout_secs = idle_timeout_secs
observers = observers or []
- self._turn_tracking_observer: Optional[TurnTrackingObserver] = None
- self._user_bot_latency_observer: Optional[UserBotLatencyObserver] = None
- self._turn_trace_observer: Optional[TurnTraceObserver] = None
- self._tracing_context: Optional[TracingContext] = None
+ self._turn_tracking_observer: TurnTrackingObserver | None = None
+ self._user_bot_latency_observer: UserBotLatencyObserver | None = None
+ self._turn_trace_observer: TurnTraceObserver | None = None
+ self._tracing_context: TracingContext | None = None
if self._enable_turn_tracking:
self._turn_tracking_observer = TurnTrackingObserver()
observers.append(self._turn_tracking_observer)
@@ -278,13 +279,13 @@ class PipelineTask(BasePipelineTask):
# This queue is the queue used to push frames to the pipeline.
self._push_queue = asyncio.Queue()
- self._process_push_task: Optional[asyncio.Task] = None
+ self._process_push_task: asyncio.Task | None = None
# This is the heartbeat queue. When a heartbeat frame is received in the
# down queue we add it to the heartbeat queue for processing.
self._heartbeat_queue = asyncio.Queue()
- self._heartbeat_push_task: Optional[asyncio.Task] = None
- self._heartbeat_monitor_task: Optional[asyncio.Task] = None
+ self._heartbeat_push_task: asyncio.Task | None = None
+ self._heartbeat_monitor_task: asyncio.Task | None = None
# RTVI support
self._rtvi = None
@@ -323,7 +324,7 @@ class PipelineTask(BasePipelineTask):
# processor we consider the pipeline is not idle. We use an observer
# which will be listening any part of the pipeline.
self._idle_event = asyncio.Event()
- self._idle_monitor_task: Optional[asyncio.Task] = None
+ self._idle_monitor_task: asyncio.Task | None = None
if self._idle_timeout_secs:
idle_frame_observer = IdleFrameObserver(
idle_event=self._idle_event,
@@ -365,8 +366,8 @@ class PipelineTask(BasePipelineTask):
# in. This is mainly for efficiency reason because each event handler
# creates a task and most likely you only care about one or two frame
# types.
- self._reached_upstream_types: Set[Type[Frame]] = set()
- self._reached_downstream_types: Set[Type[Frame]] = set()
+ self._reached_upstream_types: set[type[Frame]] = set()
+ self._reached_downstream_types: set[type[Frame]] = set()
self._register_event_handler("on_frame_reached_upstream")
self._register_event_handler("on_frame_reached_downstream")
self._register_event_handler("on_idle_timeout")
@@ -395,7 +396,7 @@ class PipelineTask(BasePipelineTask):
return self._pipeline
@property
- def turn_tracking_observer(self) -> Optional[TurnTrackingObserver]:
+ def turn_tracking_observer(self) -> TurnTrackingObserver | None:
"""Get the turn tracking observer if enabled.
Returns:
@@ -404,7 +405,7 @@ class PipelineTask(BasePipelineTask):
return self._turn_tracking_observer
@property
- def turn_trace_observer(self) -> Optional[TurnTraceObserver]:
+ def turn_trace_observer(self) -> TurnTraceObserver | None:
"""Get the turn trace observer if enabled.
Returns:
@@ -424,7 +425,7 @@ class PipelineTask(BasePipelineTask):
return self._rtvi
@property
- def reached_upstream_types(self) -> Tuple[Type[Frame], ...]:
+ def reached_upstream_types(self) -> tuple[type[Frame], ...]:
"""Get the currently configured upstream frame type filters.
Returns:
@@ -433,7 +434,7 @@ class PipelineTask(BasePipelineTask):
return tuple(self._reached_upstream_types)
@property
- def reached_downstream_types(self) -> Tuple[Type[Frame], ...]:
+ def reached_downstream_types(self) -> tuple[type[Frame], ...]:
"""Get the currently configured downstream frame type filters.
Returns:
@@ -457,7 +458,7 @@ class PipelineTask(BasePipelineTask):
"""
await self._observer.remove_observer(observer)
- def set_reached_upstream_filter(self, types: Tuple[Type[Frame], ...]):
+ def set_reached_upstream_filter(self, types: tuple[type[Frame], ...]):
"""Set which frame types trigger the on_frame_reached_upstream event.
Args:
@@ -465,7 +466,7 @@ class PipelineTask(BasePipelineTask):
"""
self._reached_upstream_types = set(types)
- def set_reached_downstream_filter(self, types: Tuple[Type[Frame], ...]):
+ def set_reached_downstream_filter(self, types: tuple[type[Frame], ...]):
"""Set which frame types trigger the on_frame_reached_downstream event.
Args:
@@ -473,7 +474,7 @@ class PipelineTask(BasePipelineTask):
"""
self._reached_downstream_types = set(types)
- def add_reached_upstream_filter(self, types: Tuple[Type[Frame], ...]):
+ def add_reached_upstream_filter(self, types: tuple[type[Frame], ...]):
"""Add frame types to trigger the on_frame_reached_upstream event.
Args:
@@ -481,7 +482,7 @@ class PipelineTask(BasePipelineTask):
"""
self._reached_upstream_types.update(types)
- def add_reached_downstream_filter(self, types: Tuple[Type[Frame], ...]):
+ def add_reached_downstream_filter(self, types: tuple[type[Frame], ...]):
"""Add frame types to trigger the on_frame_reached_downstream event.
Args:
@@ -509,7 +510,7 @@ class PipelineTask(BasePipelineTask):
logger.debug(f"Task {self} scheduled to stop when done")
await self.queue_frame(EndFrame())
- async def cancel(self, *, reason: Optional[str] = None):
+ async def cancel(self, *, reason: str | None = None):
"""Request the running pipeline to cancel.
Args:
@@ -597,7 +598,7 @@ class PipelineTask(BasePipelineTask):
for frame in frames:
await self.queue_frame(frame, direction)
- async def _cancel(self, *, reason: Optional[str] = None):
+ async def _cancel(self, *, reason: str | None = None):
"""Internal cancellation logic for the pipeline task.
Args:
@@ -685,7 +686,7 @@ class PipelineTask(BasePipelineTask):
self._pipeline_end_event.wait(), timeout=self._cancel_timeout_secs
)
logger.debug(f"{self}: {frame} reached the end of the pipeline.")
- except asyncio.TimeoutError:
+ except TimeoutError:
logger.warning(
f"{self}: timeout waiting for {frame} to reach the end of the pipeline (being blocked somewhere?)."
)
@@ -895,7 +896,7 @@ class PipelineTask(BasePipelineTask):
process_time = (self._clock.get_time() - frame.timestamp) / 1_000_000_000
logger.trace(f"{self}: heartbeat frame processed in {process_time} seconds")
self._heartbeat_queue.task_done()
- except asyncio.TimeoutError:
+ except TimeoutError:
logger.warning(
f"{self}: heartbeat frame not received for more than {wait_time} seconds"
)
@@ -913,7 +914,7 @@ class PipelineTask(BasePipelineTask):
try:
await asyncio.wait_for(self._idle_event.wait(), timeout=self._idle_timeout_secs)
self._idle_event.clear()
- except asyncio.TimeoutError:
+ except TimeoutError:
running = await self._idle_timeout_detected()
async def _idle_timeout_detected(self) -> bool:
@@ -972,7 +973,7 @@ class PipelineTask(BasePipelineTask):
if tasks:
logger.warning(f"{self} dangling tasks detected: {tasks}")
- def _create_start_metadata(self) -> Dict[str, Any]:
+ def _create_start_metadata(self) -> dict[str, Any]:
"""Build and return start metadata including user-provided values."""
start_metadata = {}
@@ -981,7 +982,7 @@ class PipelineTask(BasePipelineTask):
return start_metadata
- def _find_processor(self, processor: FrameProcessor, processor_type: Type[T]) -> Optional[T]:
+ def _find_processor(self, processor: FrameProcessor, processor_type: type[T]) -> T | None:
"""Recursively find a processor of the given type in the pipeline."""
if isinstance(processor, processor_type):
return processor
diff --git a/src/pipecat/pipeline/task_observer.py b/src/pipecat/pipeline/task_observer.py
index 44326d7c3..c6603c1d8 100644
--- a/src/pipecat/pipeline/task_observer.py
+++ b/src/pipecat/pipeline/task_observer.py
@@ -12,7 +12,7 @@ the main pipeline execution.
"""
import asyncio
-from typing import Any, Dict, List, Optional
+from typing import Any
from attr import dataclass
@@ -61,7 +61,7 @@ class TaskObserver(BaseObserver):
def __init__(
self,
*,
- observers: Optional[List[BaseObserver]] = None,
+ observers: list[BaseObserver] | None = None,
task_manager: BaseTaskManager,
**kwargs,
):
@@ -75,7 +75,7 @@ class TaskObserver(BaseObserver):
super().__init__(**kwargs)
self._observers = observers or []
self._task_manager = task_manager
- self._proxies: Optional[Dict[BaseObserver, Proxy]] = (
+ self._proxies: dict[BaseObserver, Proxy] | None = (
None # Becomes a dict after start() is called
)
@@ -164,7 +164,7 @@ class TaskObserver(BaseObserver):
proxy = Proxy(queue=queue, task=task, observer=observer)
return proxy
- def _create_proxies(self, observers: List[BaseObserver]) -> Dict[BaseObserver, Proxy]:
+ def _create_proxies(self, observers: list[BaseObserver]) -> dict[BaseObserver, Proxy]:
"""Create proxies for all observers."""
proxies = {}
for observer in observers:
diff --git a/src/pipecat/processors/aggregators/dtmf_aggregator.py b/src/pipecat/processors/aggregators/dtmf_aggregator.py
index ea56ba6fc..289aca085 100644
--- a/src/pipecat/processors/aggregators/dtmf_aggregator.py
+++ b/src/pipecat/processors/aggregators/dtmf_aggregator.py
@@ -12,7 +12,6 @@ for downstream processing by LLM context aggregators.
"""
import asyncio
-from typing import Optional
from pipecat.audio.dtmf.types import KeypadEntry
from pipecat.frames.frames import (
@@ -62,7 +61,7 @@ class DTMFAggregator(FrameProcessor):
self._prefix = prefix
self._digit_event = asyncio.Event()
- self._aggregation_task: Optional[asyncio.Task] = None
+ self._aggregation_task: asyncio.Task | None = None
async def cleanup(self) -> None:
"""Clean up resources."""
@@ -130,7 +129,7 @@ class DTMFAggregator(FrameProcessor):
try:
await asyncio.wait_for(self._digit_event.wait(), timeout=self._idle_timeout)
self._digit_event.clear()
- except asyncio.TimeoutError:
+ except TimeoutError:
if self._aggregation:
await self._flush_aggregation()
diff --git a/src/pipecat/processors/aggregators/gated.py b/src/pipecat/processors/aggregators/gated.py
index 6a37fce66..5bb73e334 100644
--- a/src/pipecat/processors/aggregators/gated.py
+++ b/src/pipecat/processors/aggregators/gated.py
@@ -11,8 +11,6 @@ custom gate open/close functions, allowing for conditional frame buffering
and release in frame processing pipelines.
"""
-from typing import List, Tuple
-
from loguru import logger
from pipecat.frames.frames import Frame, SystemFrame
@@ -48,7 +46,7 @@ class GatedAggregator(FrameProcessor):
self._gate_close_fn = gate_close_fn
self._gate_open = start_open
self._direction = direction
- self._accumulator: List[Tuple[Frame, FrameDirection]] = []
+ self._accumulator: list[tuple[Frame, FrameDirection]] = []
async def process_frame(self, frame: Frame, direction: FrameDirection):
"""Process incoming frames with gated accumulation logic.
diff --git a/src/pipecat/processors/aggregators/llm_context.py b/src/pipecat/processors/aggregators/llm_context.py
index 7bcb68c86..b34dbfaec 100644
--- a/src/pipecat/processors/aggregators/llm_context.py
+++ b/src/pipecat/processors/aggregators/llm_context.py
@@ -19,8 +19,9 @@ import base64
import copy
import io
import wave
+from collections.abc import Callable
from dataclasses import dataclass
-from typing import Any, Callable, List, Optional, TypeAlias, Union
+from typing import Any, TypeAlias
from loguru import logger
from openai._types import NOT_GIVEN as OPEN_AI_NOT_GIVEN
@@ -57,7 +58,7 @@ class LLMSpecificMessage:
message: Any
-LLMContextMessage: TypeAlias = Union[LLMStandardMessage, LLMSpecificMessage]
+LLMContextMessage: TypeAlias = LLMStandardMessage | LLMSpecificMessage
class LLMContext:
@@ -70,7 +71,7 @@ class LLMContext:
def __init__(
self,
- messages: Optional[List[LLMContextMessage]] = None,
+ messages: list[LLMContextMessage] | None = None,
tools: ToolsSchema | NotGiven = NOT_GIVEN,
tool_choice: LLMContextToolChoice | NotGiven = NOT_GIVEN,
):
@@ -81,7 +82,7 @@ class LLMContext:
tools: Available tools for the LLM to use.
tool_choice: Tool selection strategy for the LLM.
"""
- self._messages: List[LLMContextMessage] = messages if messages else []
+ self._messages: list[LLMContextMessage] = messages if messages else []
self._tools: ToolsSchema | NotGiven = LLMContext._normalize_and_validate_tools(tools)
self._tool_choice: LLMContextToolChoice | NotGiven = tool_choice
@@ -90,7 +91,7 @@ class LLMContext:
*,
role: str = "user",
url: str,
- text: Optional[str] = None,
+ text: str | None = None,
) -> LLMContextMessage:
"""Create a context message containing an image URL.
@@ -114,7 +115,7 @@ class LLMContext:
format: str,
size: tuple[int, int],
image: bytes,
- text: Optional[str] = None,
+ text: str | None = None,
) -> LLMContextMessage:
"""Create a context message containing an image.
@@ -187,7 +188,7 @@ class LLMContext:
return {"role": role, "content": content}
@property
- def messages(self) -> List[LLMContextMessage]:
+ def messages(self) -> list[LLMContextMessage]:
"""Get the current messages list.
NOTE: This is equivalent to calling `get_messages()` with no filter. If
@@ -201,10 +202,10 @@ class LLMContext:
def get_messages(
self,
- llm_specific_filter: Optional[str] = None,
+ llm_specific_filter: str | None = None,
*,
truncate_large_values: bool = False,
- ) -> List[LLMContextMessage]:
+ ) -> list[LLMContextMessage]:
"""Get the current messages list.
Args:
@@ -242,8 +243,8 @@ class LLMContext:
@staticmethod
def _truncate_large_values_from_messages(
- messages: List[LLMContextMessage],
- ) -> List[LLMContextMessage]:
+ messages: list[LLMContextMessage],
+ ) -> list[LLMContextMessage]:
"""Return deep copies of messages with large values replaced by placeholders.
For standard (universal-format) messages, the following known binary
@@ -344,7 +345,7 @@ class LLMContext:
"""
self._messages.append(message)
- def add_messages(self, messages: List[LLMContextMessage]):
+ def add_messages(self, messages: list[LLMContextMessage]):
"""Add multiple messages to the context.
Args:
@@ -352,7 +353,7 @@ class LLMContext:
"""
self._messages.extend(messages)
- def set_messages(self, messages: List[LLMContextMessage]):
+ def set_messages(self, messages: list[LLMContextMessage]):
"""Replace all messages in the context.
Args:
@@ -361,7 +362,7 @@ class LLMContext:
self._messages[:] = messages
def transform_messages(
- self, transform: Callable[[List[LLMContextMessage]], List[LLMContextMessage]]
+ self, transform: Callable[[list[LLMContextMessage]], list[LLMContextMessage]]
):
"""Transform the current messages using the provided function.
@@ -393,7 +394,7 @@ class LLMContext:
format: str,
size: tuple[int, int],
image: bytes,
- text: Optional[str] = None,
+ text: str | None = None,
role: str = "user",
):
"""Add a message containing an image frame.
diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py
index 40be383fa..516b6062f 100644
--- a/src/pipecat/processors/aggregators/llm_context_summarizer.py
+++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py
@@ -9,7 +9,7 @@
import asyncio
import uuid
from dataclasses import dataclass
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING
from loguru import logger
@@ -101,7 +101,7 @@ class LLMContextSummarizer(BaseObject):
self,
*,
context: LLMContext,
- config: Optional[LLMAutoContextSummarizationConfig] = None,
+ config: LLMAutoContextSummarizationConfig | None = None,
auto_trigger: bool = True,
):
"""Initialize the context summarizer.
@@ -122,10 +122,10 @@ class LLMContextSummarizer(BaseObject):
self._auto_config = config or LLMAutoContextSummarizationConfig()
self._auto_trigger = auto_trigger
- self._task_manager: Optional[BaseTaskManager] = None
+ self._task_manager: BaseTaskManager | None = None
self._summarization_in_progress = False
- self._pending_summary_request_id: Optional[str] = None
+ self._pending_summary_request_id: str | None = None
self._register_event_handler("on_request_summarization", sync=True)
self._register_event_handler("on_summary_applied")
@@ -269,9 +269,7 @@ class LLMContextSummarizer(BaseObject):
logger.debug(f"{self}: ✓ Summarization needed - {', '.join(reason)}")
return True
- async def _request_summarization(
- self, config_override: Optional[LLMContextSummaryConfig] = None
- ):
+ async def _request_summarization(self, config_override: LLMContextSummaryConfig | None = None):
"""Request context summarization from LLM service.
Creates a summarization request frame and either handles it directly
@@ -338,7 +336,7 @@ class LLMContextSummarizer(BaseObject):
summary=summary,
last_summarized_index=last_index,
)
- except asyncio.TimeoutError:
+ except TimeoutError:
error = f"Context summarization timed out after {timeout}s"
logger.error(f"{self}: {error}")
result_frame = LLMContextSummaryResultFrame(
diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py
index bc4129b43..bf910a0c4 100644
--- a/src/pipecat/processors/aggregators/llm_response_universal.py
+++ b/src/pipecat/processors/aggregators/llm_response_universal.py
@@ -15,8 +15,9 @@ import asyncio
import json
import warnings
from abc import abstractmethod
+from collections.abc import Callable
from dataclasses import dataclass, field
-from typing import Any, Callable, Dict, List, Literal, Optional, Set, Type
+from typing import Any, Literal
from loguru import logger
@@ -119,14 +120,14 @@ class LLMUserAggregatorParams:
filter_incomplete_user_turns is True.
"""
- user_turn_strategies: Optional[UserTurnStrategies] = None
- user_mute_strategies: List[BaseUserMuteStrategy] = field(default_factory=list)
+ user_turn_strategies: UserTurnStrategies | None = None
+ user_mute_strategies: list[BaseUserMuteStrategy] = field(default_factory=list)
user_turn_stop_timeout: float = 5.0
user_idle_timeout: float = 0
- vad_analyzer: Optional[VADAnalyzer] = None
+ vad_analyzer: VADAnalyzer | None = None
audio_idle_timeout: float = 1.0
filter_incomplete_user_turns: bool = False
- user_turn_completion_config: Optional[UserTurnCompletionConfig] = None
+ user_turn_completion_config: UserTurnCompletionConfig | None = None
@dataclass
@@ -145,14 +146,14 @@ class LLMAssistantAggregatorParams:
"""
enable_auto_context_summarization: bool = False
- auto_context_summarization_config: Optional[LLMAutoContextSummarizationConfig] = None
+ auto_context_summarization_config: LLMAutoContextSummarizationConfig | None = None
# ---------------------------------------------------------------------------
# Deprecated field names — kept for backward compatibility.
# Use enable_auto_context_summarization and auto_context_summarization_config instead.
# ---------------------------------------------------------------------------
- enable_context_summarization: Optional[bool] = None
- context_summarization_config: Optional[LLMContextSummarizationConfig] = None
+ enable_context_summarization: bool | None = None
+ context_summarization_config: LLMContextSummarizationConfig | None = None
def __post_init__(self):
if self.enable_context_summarization is not None:
@@ -198,7 +199,7 @@ class UserTurnStoppedMessage:
content: str
timestamp: str
- user_id: Optional[str] = None
+ user_id: str | None = None
@dataclass
@@ -259,10 +260,10 @@ class LLMContextAggregator(FrameProcessor):
self._context = context
self._role = role
- self._aggregation: List[TextPartForConcatenation] = []
+ self._aggregation: list[TextPartForConcatenation] = []
@property
- def messages(self) -> List[LLMContextMessage]:
+ def messages(self) -> list[LLMContextMessage]:
"""Get messages from the LLM context.
Returns:
@@ -322,7 +323,7 @@ class LLMContextAggregator(FrameProcessor):
self._context.set_messages(messages)
def transform_messages(
- self, transform: Callable[[List[LLMContextMessage]], List[LLMContextMessage]]
+ self, transform: Callable[[list[LLMContextMessage]], list[LLMContextMessage]]
):
"""Transform the context messages using a provided function.
@@ -423,7 +424,7 @@ class LLMUserAggregator(LLMContextAggregator):
self,
context: LLMContext,
*,
- params: Optional[LLMUserAggregatorParams] = None,
+ params: LLMUserAggregatorParams | None = None,
**kwargs,
):
"""Initialize the user context aggregator.
@@ -473,7 +474,7 @@ class LLMUserAggregator(LLMContextAggregator):
self._user_idle_controller.add_event_handler("on_user_turn_idle", self._on_user_turn_idle)
# VAD controller
- self._vad_controller: Optional[VADController] = None
+ self._vad_controller: VADController | None = None
if self._params.vad_analyzer:
self._vad_controller = VADController(
self._params.vad_analyzer,
@@ -681,7 +682,7 @@ class LLMUserAggregator(LLMContextAggregator):
)
)
- async def _queued_broadcast_frame(self, frame_cls: Type[Frame], **kwargs):
+ async def _queued_broadcast_frame(self, frame_cls: type[Frame], **kwargs):
"""Broadcasts a frame upstream and queues it for internal processing.
Queues the frame so it flows through `process_frame` and is handled
@@ -701,7 +702,7 @@ class LLMUserAggregator(LLMContextAggregator):
):
await self.queue_frame(frame, direction)
- async def _on_broadcast_frame(self, controller, frame_cls: Type[Frame], **kwargs):
+ async def _on_broadcast_frame(self, controller, frame_cls: type[Frame], **kwargs):
await self._queued_broadcast_frame(frame_cls, **kwargs)
async def _on_vad_speech_started(self, controller):
@@ -768,7 +769,7 @@ class LLMUserAggregator(LLMContextAggregator):
async def _maybe_emit_user_turn_stopped(
self,
- strategy: Optional[BaseUserTurnStopStrategy] = None,
+ strategy: BaseUserTurnStopStrategy | None = None,
on_session_end: bool = False,
):
"""Maybe emit user turn stopped event.
@@ -832,7 +833,7 @@ class LLMAssistantAggregator(LLMContextAggregator):
self,
context: LLMContext,
*,
- params: Optional[LLMAssistantAggregatorParams] = None,
+ params: LLMAssistantAggregatorParams | None = None,
**kwargs,
):
"""Initialize the assistant context aggregator.
@@ -845,9 +846,9 @@ class LLMAssistantAggregator(LLMContextAggregator):
super().__init__(context=context, role="assistant", **kwargs)
self._params = params or LLMAssistantAggregatorParams()
- self._function_calls_in_progress: Dict[str, Optional[FunctionCallInProgressFrame]] = {}
- self._function_calls_image_results: Dict[str, UserImageRawFrame] = {}
- self._context_updated_tasks: Set[asyncio.Task] = set()
+ self._function_calls_in_progress: dict[str, FunctionCallInProgressFrame | None] = {}
+ self._function_calls_image_results: dict[str, UserImageRawFrame] = {}
+ self._context_updated_tasks: set[asyncio.Task] = set()
self._user_speaking: bool = False
self._bot_speaking: bool = False
@@ -862,14 +863,14 @@ class LLMAssistantAggregator(LLMContextAggregator):
self._thought_append_to_context = False
self._thought_llm: str = ""
- self._thought_aggregation: List[TextPartForConcatenation] = []
+ self._thought_aggregation: list[TextPartForConcatenation] = []
self._thought_start_time: str = ""
# Context summarization — always create the summarizer so that manually
# pushed LLMSummarizeContextFrame frames are always handled.
# Auto-triggering based on thresholds is only enabled when
# enable_auto_context_summarization is True.
- self._summarizer: Optional[LLMContextSummarizer] = LLMContextSummarizer(
+ self._summarizer: LLMContextSummarizer | None = LLMContextSummarizer(
context=self._context,
config=self._params.auto_context_summarization_config,
auto_trigger=self._params.enable_auto_context_summarization,
@@ -1475,8 +1476,8 @@ class LLMContextAggregatorPair:
self,
context: LLMContext,
*,
- user_params: Optional[LLMUserAggregatorParams] = None,
- assistant_params: Optional[LLMAssistantAggregatorParams] = None,
+ user_params: LLMUserAggregatorParams | None = None,
+ assistant_params: LLMAssistantAggregatorParams | None = None,
):
"""Initialize the LLM context aggregator pair.
diff --git a/src/pipecat/processors/aggregators/llm_text_processor.py b/src/pipecat/processors/aggregators/llm_text_processor.py
index dce739656..862cf138b 100644
--- a/src/pipecat/processors/aggregators/llm_text_processor.py
+++ b/src/pipecat/processors/aggregators/llm_text_processor.py
@@ -13,8 +13,6 @@ components such as TTS services or context aggregators. It can be used to pre-ag
and categorize, modify, or filter direct output tokens from the LLM.
"""
-from typing import Optional
-
from pipecat.frames.frames import (
AggregatedTextFrame,
EndFrame,
@@ -38,7 +36,7 @@ class LLMTextProcessor(FrameProcessor):
output tokens from the LLM.
"""
- def __init__(self, *, text_aggregator: Optional[BaseTextAggregator] = None, **kwargs):
+ def __init__(self, *, text_aggregator: BaseTextAggregator | None = None, **kwargs):
"""Initialize the LLM text processor.
Args:
@@ -91,7 +89,7 @@ class LLMTextProcessor(FrameProcessor):
out_frame.skip_tts = in_frame.skip_tts
await self.push_frame(out_frame)
- async def _handle_llm_end(self, skip_tts: Optional[bool] = None):
+ async def _handle_llm_end(self, skip_tts: bool | None = None):
# Flush any remaining text
remaining = await self._text_aggregator.flush()
if remaining:
diff --git a/src/pipecat/processors/async_generator.py b/src/pipecat/processors/async_generator.py
index 4fac1a9d4..643b5eacc 100644
--- a/src/pipecat/processors/async_generator.py
+++ b/src/pipecat/processors/async_generator.py
@@ -7,7 +7,8 @@
"""Async generator processor for frame serialization and streaming."""
import asyncio
-from typing import Any, AsyncGenerator
+from collections.abc import AsyncGenerator
+from typing import Any
from pipecat.frames.frames import (
CancelFrame,
diff --git a/src/pipecat/processors/audio/audio_buffer_processor.py b/src/pipecat/processors/audio/audio_buffer_processor.py
index 40a907224..21d6a4528 100644
--- a/src/pipecat/processors/audio/audio_buffer_processor.py
+++ b/src/pipecat/processors/audio/audio_buffer_processor.py
@@ -11,8 +11,6 @@ of audio from both user input and bot output sources, with support for various a
configurations and event-driven processing.
"""
-from typing import Optional
-
from pipecat.audio.utils import create_stream_resampler, interleave_stereo_audio, mix_audio
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
@@ -55,7 +53,7 @@ class AudioBufferProcessor(FrameProcessor):
def __init__(
self,
*,
- sample_rate: Optional[int] = None,
+ sample_rate: int | None = None,
num_channels: int = 1,
buffer_size: int = 0,
enable_turn_audio: bool = False,
@@ -263,7 +261,7 @@ class AudioBufferProcessor(FrameProcessor):
silence_needed = target_position - current_len
buffer.extend(b"\x00" * silence_needed)
- async def _process_turn_recording(self, frame: Frame, resampled_audio: Optional[bytes] = None):
+ async def _process_turn_recording(self, frame: Frame, resampled_audio: bytes | None = None):
"""Process frames for turn-based audio recording."""
# Speaking state (_user_speaking / _bot_speaking) is maintained by
# _process_recording so it is always up-to-date here.
diff --git a/src/pipecat/processors/audio/vad_processor.py b/src/pipecat/processors/audio/vad_processor.py
index aaa769061..75bcaf1d9 100644
--- a/src/pipecat/processors/audio/vad_processor.py
+++ b/src/pipecat/processors/audio/vad_processor.py
@@ -10,8 +10,6 @@ This module provides a VADProcessor that wraps a VADController to process
audio frames and push VAD-related frames into the pipeline.
"""
-from typing import Type
-
from loguru import logger
from pipecat.audio.vad.vad_analyzer import VADAnalyzer
@@ -94,7 +92,7 @@ class VADProcessor(FrameProcessor):
await self.push_frame(frame, direction)
@self._vad_controller.event_handler("on_broadcast_frame")
- async def on_broadcast_frame(_controller, frame_cls: Type[Frame], **kwargs):
+ async def on_broadcast_frame(_controller, frame_cls: type[Frame], **kwargs):
await self.broadcast_frame(frame_cls, **kwargs)
async def cleanup(self):
diff --git a/src/pipecat/processors/consumer_processor.py b/src/pipecat/processors/consumer_processor.py
index f7349031a..8600a6ec4 100644
--- a/src/pipecat/processors/consumer_processor.py
+++ b/src/pipecat/processors/consumer_processor.py
@@ -7,7 +7,7 @@
"""Consumer processor for consuming frames from ProducerProcessor queues."""
import asyncio
-from typing import Awaitable, Callable, Optional
+from collections.abc import Awaitable, Callable
from pipecat.frames.frames import CancelFrame, EndFrame, Frame, StartFrame
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -42,7 +42,7 @@ class ConsumerProcessor(FrameProcessor):
self._transformer = transformer
self._direction = direction
self._producer = producer
- self._consumer_task: Optional[asyncio.Task] = None
+ self._consumer_task: asyncio.Task | None = None
async def process_frame(self, frame: Frame, direction: FrameDirection):
"""Process incoming frames and handle lifecycle events.
diff --git a/src/pipecat/processors/filters/frame_filter.py b/src/pipecat/processors/filters/frame_filter.py
index 3784409a6..67dd5b7b4 100644
--- a/src/pipecat/processors/filters/frame_filter.py
+++ b/src/pipecat/processors/filters/frame_filter.py
@@ -6,8 +6,6 @@
"""Frame filtering processor for the Pipecat framework."""
-from typing import Tuple, Type
-
from pipecat.frames.frames import EndFrame, Frame, SystemFrame
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -20,7 +18,7 @@ class FrameFilter(FrameProcessor):
automatically allowed to pass through to maintain pipeline integrity.
"""
- def __init__(self, types: Tuple[Type[Frame], ...]):
+ def __init__(self, types: tuple[type[Frame], ...]):
"""Initialize the frame filter.
Args:
diff --git a/src/pipecat/processors/filters/function_filter.py b/src/pipecat/processors/filters/function_filter.py
index 46b1945ce..d955b8f9e 100644
--- a/src/pipecat/processors/filters/function_filter.py
+++ b/src/pipecat/processors/filters/function_filter.py
@@ -10,7 +10,7 @@ This module provides a processor that filters frames based on a custom function,
allowing for flexible frame filtering logic in processing pipelines.
"""
-from typing import Awaitable, Callable, Optional
+from collections.abc import Awaitable, Callable
from pipecat.frames.frames import CancelFrame, EndFrame, Frame, StartFrame, SystemFrame
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -29,7 +29,7 @@ class FunctionFilter(FrameProcessor):
def __init__(
self,
filter: FilterType,
- direction: Optional[FrameDirection] = FrameDirection.DOWNSTREAM,
+ direction: FrameDirection | None = FrameDirection.DOWNSTREAM,
filter_system_frames: bool = False,
**kwargs,
):
diff --git a/src/pipecat/processors/filters/wake_check_filter.py b/src/pipecat/processors/filters/wake_check_filter.py
index 6a9e524e6..e5159f4d5 100644
--- a/src/pipecat/processors/filters/wake_check_filter.py
+++ b/src/pipecat/processors/filters/wake_check_filter.py
@@ -18,7 +18,6 @@ import re
import time
import warnings
from enum import Enum
-from typing import List
from loguru import logger
@@ -71,7 +70,7 @@ class WakeCheckFilter(FrameProcessor):
self.wake_timer = 0.0
self.accumulator = ""
- def __init__(self, wake_phrases: List[str], keepalive_timeout: float = 3):
+ def __init__(self, wake_phrases: list[str], keepalive_timeout: float = 3):
"""Initialize the wake phrase filter.
.. deprecated:: 0.0.106
diff --git a/src/pipecat/processors/filters/wake_notifier_filter.py b/src/pipecat/processors/filters/wake_notifier_filter.py
index 91c9b5969..f244dff43 100644
--- a/src/pipecat/processors/filters/wake_notifier_filter.py
+++ b/src/pipecat/processors/filters/wake_notifier_filter.py
@@ -6,7 +6,7 @@
"""Wake notifier filter for conditional frame-based notifications."""
-from typing import Awaitable, Callable, Tuple, Type
+from collections.abc import Awaitable, Callable
from pipecat.frames.frames import Frame
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -25,7 +25,7 @@ class WakeNotifierFilter(FrameProcessor):
self,
notifier: BaseNotifier,
*,
- types: Tuple[Type[Frame], ...],
+ types: tuple[type[Frame], ...],
filter: Callable[[Frame], Awaitable[bool]],
**kwargs,
):
diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py
index 77a35fb29..92cf920ae 100644
--- a/src/pipecat/processors/frame_processor.py
+++ b/src/pipecat/processors/frame_processor.py
@@ -11,21 +11,17 @@ audio/video processing pipelines. It includes frame processors, pipeline
management, and frame flow control mechanisms.
"""
+from __future__ import annotations
+
import asyncio
import dataclasses
import traceback
+from collections.abc import Awaitable, Callable, Coroutine
from dataclasses import dataclass
from enum import Enum
from typing import (
Any,
- Awaitable,
- Callable,
- Coroutine,
- List,
Optional,
- Tuple,
- Type,
- Union,
)
from loguru import logger
@@ -79,7 +75,7 @@ class FrameProcessorSetup:
clock: BaseClock
task_manager: BaseTaskManager
- observer: Optional[BaseObserver] = None
+ observer: BaseObserver | None = None
class FrameProcessorQueue(asyncio.PriorityQueue):
@@ -100,7 +96,7 @@ class FrameProcessorQueue(asyncio.PriorityQueue):
self.__high_counter = 0
self.__low_counter = 0
- async def put(self, item: Tuple[Frame, FrameDirection, FrameCallback]):
+ async def put(self, item: tuple[Frame, FrameDirection, FrameCallback]):
"""Put an item into the priority queue.
System frames (`SystemFrame`) have higher priority than any other
@@ -160,9 +156,9 @@ class FrameProcessor(BaseObject):
def __init__(
self,
*,
- name: Optional[str] = None,
+ name: str | None = None,
enable_direct_mode: bool = False,
- metrics: Optional[FrameProcessorMetrics] = None,
+ metrics: FrameProcessorMetrics | None = None,
**kwargs,
):
"""Initialize the frame processor.
@@ -174,20 +170,20 @@ class FrameProcessor(BaseObject):
**kwargs: Additional arguments passed to parent class.
"""
super().__init__(name=name, **kwargs)
- self._prev: Optional["FrameProcessor"] = None
- self._next: Optional["FrameProcessor"] = None
+ self._prev: FrameProcessor | None = None
+ self._next: FrameProcessor | None = None
# Enable direct mode to skip queues and process frames right away.
self._enable_direct_mode = enable_direct_mode
# Clock
- self._clock: Optional[BaseClock] = None
+ self._clock: BaseClock | None = None
# Task Manager
- self._task_manager: Optional[BaseTaskManager] = None
+ self._task_manager: BaseTaskManager | None = None
# Observer
- self._observer: Optional[BaseObserver] = None
+ self._observer: BaseObserver | None = None
# Other properties
self._enable_metrics = False
@@ -221,8 +217,8 @@ class FrameProcessor(BaseObject):
# frames right away and queues non-system frames for later processing.
self.__should_block_system_frames = False
self.__input_queue = FrameProcessorQueue()
- self.__input_event: Optional[asyncio.Event] = None
- self.__input_frame_task: Optional[asyncio.Task] = None
+ self.__input_event: asyncio.Event | None = None
+ self.__input_frame_task: asyncio.Task | None = None
# The process task processes non-system frames. Non-system frames will
# be processed as soon as they are received by the processing task
@@ -231,9 +227,9 @@ class FrameProcessor(BaseObject):
# `resume_processing_frames()` which will wake up the event.
self.__should_block_frames = False
self.__process_queue = FrameQueue(frame_getter=lambda item: item[0])
- self.__process_event: Optional[asyncio.Event] = None
- self.__process_frame_task: Optional[asyncio.Task] = None
- self.__process_current_frame: Optional[Frame] = None
+ self.__process_event: asyncio.Event | None = None
+ self.__process_frame_task: asyncio.Task | None = None
+ self.__process_current_frame: Frame | None = None
# Frame processor events.
self._register_event_handler("on_before_process_frame", sync=True)
@@ -261,7 +257,7 @@ class FrameProcessor(BaseObject):
return self._name
@property
- def processors(self) -> List["FrameProcessor"]:
+ def processors(self) -> list[FrameProcessor]:
"""Return the list of sub-processors contained within this processor.
Only compound processors (e.g. pipelines and parallel pipelines) have
@@ -273,7 +269,7 @@ class FrameProcessor(BaseObject):
return []
@property
- def entry_processors(self) -> List["FrameProcessor"]:
+ def entry_processors(self) -> list[FrameProcessor]:
"""Return the list of entry processors for this processor.
Entry processors are the first processors in a compound processor
@@ -287,7 +283,7 @@ class FrameProcessor(BaseObject):
return []
@property
- def next(self) -> Optional["FrameProcessor"]:
+ def next(self) -> FrameProcessor | None:
"""Get the next processor.
Returns:
@@ -296,7 +292,7 @@ class FrameProcessor(BaseObject):
return self._next
@property
- def previous(self) -> Optional["FrameProcessor"]:
+ def previous(self) -> FrameProcessor | None:
"""Get the previous processor.
Returns:
@@ -372,7 +368,7 @@ class FrameProcessor(BaseObject):
"""
self._metrics.set_core_metrics_data(data)
- async def start_ttfb_metrics(self, *, start_time: Optional[float] = None):
+ async def start_ttfb_metrics(self, *, start_time: float | None = None):
"""Start time-to-first-byte metrics collection.
Args:
@@ -384,7 +380,7 @@ class FrameProcessor(BaseObject):
start_time=start_time, report_only_initial_ttfb=self._report_only_initial_ttfb
)
- async def stop_ttfb_metrics(self, *, end_time: Optional[float] = None):
+ async def stop_ttfb_metrics(self, *, end_time: float | None = None):
"""Stop time-to-first-byte metrics collection and push results.
Args:
@@ -396,7 +392,7 @@ class FrameProcessor(BaseObject):
if frame:
await self.push_frame(frame)
- async def start_processing_metrics(self, *, start_time: Optional[float] = None):
+ async def start_processing_metrics(self, *, start_time: float | None = None):
"""Start processing metrics collection.
Args:
@@ -406,7 +402,7 @@ class FrameProcessor(BaseObject):
if self.can_generate_metrics() and self.metrics_enabled:
await self._metrics.start_processing_metrics(start_time=start_time)
- async def stop_processing_metrics(self, *, end_time: Optional[float] = None):
+ async def stop_processing_metrics(self, *, end_time: float | None = None):
"""Stop processing metrics collection and push results.
Args:
@@ -458,7 +454,7 @@ class FrameProcessor(BaseObject):
await self.stop_processing_metrics()
await self.stop_text_aggregation_metrics()
- def create_task(self, coroutine: Coroutine, name: Optional[str] = None) -> asyncio.Task:
+ def create_task(self, coroutine: Coroutine, name: str | None = None) -> asyncio.Task:
"""Create a new task managed by this processor.
Args:
@@ -474,7 +470,7 @@ class FrameProcessor(BaseObject):
name = f"{self}::{coroutine.cr_code.co_name}"
return self.task_manager.create_task(coroutine, name)
- async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = 1.0):
+ async def cancel_task(self, task: asyncio.Task, timeout: float | None = 1.0):
"""Cancel a task managed by this processor.
A default timeout if 1 second is used in order to avoid potential
@@ -511,7 +507,7 @@ class FrameProcessor(BaseObject):
if self._metrics is not None:
await self._metrics.cleanup()
- def link(self, processor: "FrameProcessor"):
+ def link(self, processor: FrameProcessor):
"""Link this processor to the next processor in the pipeline.
Args:
@@ -546,7 +542,7 @@ class FrameProcessor(BaseObject):
self,
frame: Frame,
direction: FrameDirection = FrameDirection.DOWNSTREAM,
- callback: Optional[FrameCallback] = None,
+ callback: FrameCallback | None = None,
):
"""Queue a frame for processing.
@@ -622,7 +618,7 @@ class FrameProcessor(BaseObject):
async def push_error(
self,
error_msg: str,
- exception: Optional[Exception] = None,
+ exception: Exception | None = None,
fatal: bool = False,
):
"""Creates and pushes an ErrorFrame upstream.
@@ -720,7 +716,7 @@ class FrameProcessor(BaseObject):
await self.broadcast_interruption()
- async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs):
+ async def broadcast_frame(self, frame_cls: type[Frame], **kwargs):
"""Broadcasts a frame of the specified class upstream and downstream.
This method creates two instances of the given frame class using the
@@ -929,7 +925,7 @@ class FrameProcessor(BaseObject):
"""Reset non-system frame processing queue."""
self.__process_queue.reset()
- def has_queued_frame(self, frame_type: Union[Type[Frame], Type[UninterruptibleFrame]]) -> bool:
+ def has_queued_frame(self, frame_type: type[Frame] | type[UninterruptibleFrame]) -> bool:
"""Return True if a frame of the given type is waiting in the processing queue.
Delegates to :meth:`FrameQueue.has_frame` so the check is O(distinct
@@ -951,7 +947,7 @@ class FrameProcessor(BaseObject):
self.__process_frame_task = None
async def __process_frame(
- self, frame: Frame, direction: FrameDirection, callback: Optional[FrameCallback]
+ self, frame: Frame, direction: FrameDirection, callback: FrameCallback | None
):
try:
await self._call_event_handler("on_before_process_frame", frame)
diff --git a/src/pipecat/processors/frameworks/langchain.py b/src/pipecat/processors/frameworks/langchain.py
index 165f749ea..4400327fc 100644
--- a/src/pipecat/processors/frameworks/langchain.py
+++ b/src/pipecat/processors/frameworks/langchain.py
@@ -6,8 +6,6 @@
"""Langchain integration processor for Pipecat."""
-from typing import Optional, Union
-
from loguru import logger
from pipecat.frames.frames import (
@@ -45,7 +43,7 @@ class LangchainProcessor(FrameProcessor):
super().__init__()
self._chain = chain
self._transcript_key = transcript_key
- self._participant_id: Optional[str] = None
+ self._participant_id: str | None = None
def set_participant_id(self, participant_id: str):
"""Set the participant ID for session tracking.
@@ -76,7 +74,7 @@ class LangchainProcessor(FrameProcessor):
await self.push_frame(frame, direction)
@staticmethod
- def __get_token_value(text: Union[str, AIMessageChunk]) -> str:
+ def __get_token_value(text: str | AIMessageChunk) -> str:
"""Extract token value from various text types.
Args:
diff --git a/src/pipecat/processors/frameworks/rtvi/frames.py b/src/pipecat/processors/frameworks/rtvi/frames.py
index 36070e6e8..092755510 100644
--- a/src/pipecat/processors/frameworks/rtvi/frames.py
+++ b/src/pipecat/processors/frameworks/rtvi/frames.py
@@ -7,7 +7,7 @@
"""RTVI pipeline frame definitions."""
from dataclasses import dataclass
-from typing import Any, Optional
+from typing import Any
from pipecat.frames.frames import SystemFrame
@@ -37,7 +37,7 @@ class RTVIClientMessageFrame(SystemFrame):
msg_id: str
type: str
- data: Optional[Any] = None
+ data: Any | None = None
@dataclass
@@ -53,5 +53,5 @@ class RTVIServerResponseFrame(SystemFrame):
"""
client_msg: RTVIClientMessageFrame
- data: Optional[Any] = None
- error: Optional[str] = None
+ data: Any | None = None
+ error: str | None = None
diff --git a/src/pipecat/processors/frameworks/rtvi/models.py b/src/pipecat/processors/frameworks/rtvi/models.py
index 9e54dd227..81c1b2aae 100644
--- a/src/pipecat/processors/frameworks/rtvi/models.py
+++ b/src/pipecat/processors/frameworks/rtvi/models.py
@@ -14,12 +14,10 @@ Import this module under the ``RTVI`` alias to use as a namespace::
msg = RTVI.BotReady(id="1", data=RTVI.BotReadyData(version=RTVI.PROTOCOL_VERSION))
"""
+from collections.abc import Mapping
from typing import (
Any,
- Dict,
Literal,
- Mapping,
- Optional,
)
from pydantic import BaseModel
@@ -46,7 +44,7 @@ class Message(BaseModel):
label: MessageLiteral = MESSAGE_LABEL
type: str
id: str
- data: Optional[Dict[str, Any]] = None
+ data: dict[str, Any] | None = None
# -- Client -> Pipecat messages.
@@ -56,7 +54,7 @@ class RawClientMessageData(BaseModel):
"""Data structure expected from client messages sent to the RTVI server."""
t: str
- d: Optional[Any] = None
+ d: Any | None = None
class ClientMessage(BaseModel):
@@ -64,14 +62,14 @@ class ClientMessage(BaseModel):
msg_id: str
type: str
- data: Optional[Any] = None
+ data: Any | None = None
class RawServerResponseData(BaseModel):
"""Data structure for server responses to client messages."""
t: str
- d: Optional[Any] = None
+ d: Any | None = None
class ServerResponse(BaseModel):
@@ -94,10 +92,10 @@ class AboutClientData(BaseModel):
"""
library: str
- library_version: Optional[str] = None
- platform: Optional[str] = None
- platform_version: Optional[str] = None
- platform_details: Optional[Any] = None
+ library_version: str | None = None
+ platform: str | None = None
+ platform_version: str | None = None
+ platform_details: Any | None = None
class ClientReadyData(BaseModel):
@@ -165,7 +163,7 @@ class BotReadyData(BaseModel):
"""
version: str
- about: Optional[Mapping[str, Any]] = None
+ about: Mapping[str, Any] | None = None
class BotReady(BaseModel):
@@ -226,7 +224,7 @@ class SendTextData(BaseModel):
"""
content: str
- options: Optional[SendTextOptions] = None
+ options: SendTextOptions | None = None
class LLMFunctionCallStartMessageData(BaseModel):
@@ -236,7 +234,7 @@ class LLMFunctionCallStartMessageData(BaseModel):
the configured function_call_report_level for security.
"""
- function_name: Optional[str] = None
+ function_name: str | None = None
class LLMFunctionCallStartMessage(BaseModel):
@@ -270,8 +268,8 @@ class LLMFunctionCallInProgressMessageData(BaseModel):
"""
tool_call_id: str
- function_name: Optional[str] = None
- arguments: Optional[Mapping[str, Any]] = None
+ function_name: str | None = None
+ arguments: Mapping[str, Any] | None = None
class LLMFunctionCallInProgressMessage(BaseModel):
@@ -295,8 +293,8 @@ class LLMFunctionCallStoppedMessageData(BaseModel):
tool_call_id: str
cancelled: bool
- function_name: Optional[str] = None
- result: Optional[Any] = None
+ function_name: str | None = None
+ result: Any | None = None
class LLMFunctionCallStoppedMessage(BaseModel):
diff --git a/src/pipecat/processors/frameworks/rtvi/observer.py b/src/pipecat/processors/frameworks/rtvi/observer.py
index 958ba8841..7b952530d 100644
--- a/src/pipecat/processors/frameworks/rtvi/observer.py
+++ b/src/pipecat/processors/frameworks/rtvi/observer.py
@@ -7,17 +7,12 @@
"""RTVI observer for converting pipeline frames to outgoing RTVI messages."""
import time
+from collections.abc import Awaitable, Callable
from dataclasses import dataclass, field
-from enum import Enum
+from enum import Enum, StrEnum
from typing import (
TYPE_CHECKING,
- Awaitable,
- Callable,
- Dict,
- List,
Optional,
- Set,
- Tuple,
)
from loguru import logger
@@ -71,7 +66,7 @@ if TYPE_CHECKING:
from pipecat.processors.frameworks.rtvi.processor import RTVIProcessor
-class RTVIFunctionCallReportLevel(str, Enum):
+class RTVIFunctionCallReportLevel(StrEnum):
"""Level of detail to include in function call RTVI events.
Controls what information is exposed in function call events for security.
@@ -148,18 +143,14 @@ class RTVIObserverParams:
user_audio_level_enabled: bool = False
metrics_enabled: bool = True
system_logs_enabled: bool = False
- ignored_sources: List[FrameProcessor] = field(default_factory=list)
- skip_aggregator_types: Optional[List[AggregationType | str]] = None
- bot_output_transforms: Optional[
- List[
- Tuple[
- AggregationType | str,
- Callable[[str, AggregationType | str], Awaitable[str]],
- ]
- ]
- ] = None
+ ignored_sources: list[FrameProcessor] = field(default_factory=list)
+ skip_aggregator_types: list[AggregationType | str] | None = None
+ bot_output_transforms: (
+ list[tuple[AggregationType | str, Callable[[str, AggregationType | str], Awaitable[str]]]]
+ | None
+ ) = None
audio_level_period_secs: float = 0.15
- function_call_report_level: Dict[str, RTVIFunctionCallReportLevel] = field(
+ function_call_report_level: dict[str, RTVIFunctionCallReportLevel] = field(
default_factory=lambda: {"*": RTVIFunctionCallReportLevel.NONE}
)
@@ -180,7 +171,7 @@ class RTVIObserver(BaseObserver):
self,
rtvi: Optional["RTVIProcessor"] = None,
*,
- params: Optional[RTVIObserverParams] = None,
+ params: RTVIObserverParams | None = None,
**kwargs,
):
"""Initialize the RTVI observer.
@@ -194,7 +185,7 @@ class RTVIObserver(BaseObserver):
self._rtvi = rtvi
self._params = params or RTVIObserverParams()
- self._ignored_sources: Set[FrameProcessor] = set(self._params.ignored_sources)
+ self._ignored_sources: set[FrameProcessor] = set(self._params.ignored_sources)
self._frames_seen = set()
self._bot_transcription = ""
@@ -203,13 +194,13 @@ class RTVIObserver(BaseObserver):
# Track bot speaking state for queuing aggregated text frames
self._bot_is_speaking = False
- self._queued_aggregated_text_frames: List[AggregatedTextFrame] = []
+ self._queued_aggregated_text_frames: list[AggregatedTextFrame] = []
if self._params.system_logs_enabled:
self._system_logger_id = logger.add(self._logger_sink)
- self._aggregation_transforms: List[
- Tuple[AggregationType | str, Callable[[str, AggregationType | str], Awaitable[str]]]
+ self._aggregation_transforms: list[
+ tuple[AggregationType | str, Callable[[str, AggregationType | str], Awaitable[str]]]
] = self._params.bot_output_transforms or []
def add_bot_output_transformer(
diff --git a/src/pipecat/processors/frameworks/rtvi/processor.py b/src/pipecat/processors/frameworks/rtvi/processor.py
index 0a05560c0..5586ec8ae 100644
--- a/src/pipecat/processors/frameworks/rtvi/processor.py
+++ b/src/pipecat/processors/frameworks/rtvi/processor.py
@@ -8,7 +8,8 @@
import asyncio
import base64
-from typing import Any, Mapping, Optional
+from collections.abc import Mapping
+from typing import Any
from loguru import logger
from pydantic import BaseModel, ValidationError
@@ -51,7 +52,7 @@ class RTVIProcessor(FrameProcessor):
def __init__(
self,
*,
- transport: Optional[BaseTransport] = None,
+ transport: BaseTransport | None = None,
**kwargs,
):
"""Initialize the RTVI processor.
@@ -70,7 +71,7 @@ class RTVIProcessor(FrameProcessor):
self._llm_skip_tts: bool = False # Keep in sync with llm_service.py's configuration.
# A task to process incoming transport messages.
- self._message_task: Optional[asyncio.Task] = None
+ self._message_task: asyncio.Task | None = None
self._register_event_handler("on_bot_started")
self._register_event_handler("on_client_ready")
@@ -84,7 +85,7 @@ class RTVIProcessor(FrameProcessor):
self._input_transport = input_transport
self._input_transport.enable_audio_in_stream_on_start(False)
- def create_rtvi_observer(self, *, params: Optional[RTVIObserverParams] = None, **kwargs):
+ def create_rtvi_observer(self, *, params: RTVIObserverParams | None = None, **kwargs):
"""Creates a new RTVI Observer.
Args:
diff --git a/src/pipecat/processors/frameworks/strands_agents.py b/src/pipecat/processors/frameworks/strands_agents.py
index eb1edbfdc..7383cd089 100644
--- a/src/pipecat/processors/frameworks/strands_agents.py
+++ b/src/pipecat/processors/frameworks/strands_agents.py
@@ -4,8 +4,6 @@ This module provides integration with Strands Agents for handling conversational
interactions. It supports both single agent and multi-agent graphs.
"""
-from typing import Optional
-
from loguru import logger
from pipecat.frames.frames import (
@@ -38,9 +36,9 @@ class StrandsAgentsProcessor(FrameProcessor):
def __init__(
self,
- agent: Optional[Agent] = None,
- graph: Optional[Graph] = None,
- graph_exit_node: Optional[str] = None,
+ agent: Agent | None = None,
+ graph: Graph | None = None,
+ graph_exit_node: str | None = None,
):
"""Initialize the Strands Agents processor.
diff --git a/src/pipecat/processors/gstreamer/pipeline_source.py b/src/pipecat/processors/gstreamer/pipeline_source.py
index ec7ef0ec7..c2a819f69 100644
--- a/src/pipecat/processors/gstreamer/pipeline_source.py
+++ b/src/pipecat/processors/gstreamer/pipeline_source.py
@@ -7,7 +7,6 @@
"""GStreamer pipeline source integration for Pipecat."""
import asyncio
-from typing import Optional
from loguru import logger
from pydantic import BaseModel
@@ -58,11 +57,11 @@ class GStreamerPipelineSource(FrameProcessor):
video_width: int = 1280
video_height: int = 720
- audio_sample_rate: Optional[int] = None
+ audio_sample_rate: int | None = None
audio_channels: int = 1
clock_sync: bool = True
- def __init__(self, *, pipeline: str, out_params: Optional[OutputParams] = None, **kwargs):
+ def __init__(self, *, pipeline: str, out_params: OutputParams | None = None, **kwargs):
"""Initialize the GStreamer pipeline source.
Args:
diff --git a/src/pipecat/processors/idle_frame_processor.py b/src/pipecat/processors/idle_frame_processor.py
index 3a7f1b860..9426006be 100644
--- a/src/pipecat/processors/idle_frame_processor.py
+++ b/src/pipecat/processors/idle_frame_processor.py
@@ -7,7 +7,7 @@
"""Idle frame processor for timeout-based callback execution."""
import asyncio
-from typing import Awaitable, Callable, List, Optional
+from collections.abc import Awaitable, Callable
from pipecat.frames.frames import Frame, StartFrame
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -26,7 +26,7 @@ class IdleFrameProcessor(FrameProcessor):
*,
callback: Callable[["IdleFrameProcessor"], Awaitable[None]],
timeout: float,
- types: Optional[List[type]] = None,
+ types: list[type] | None = None,
**kwargs,
):
"""Initialize the idle frame processor.
@@ -86,5 +86,5 @@ class IdleFrameProcessor(FrameProcessor):
try:
await asyncio.wait_for(self._idle_event.wait(), timeout=self._timeout)
self._idle_event.clear()
- except asyncio.TimeoutError:
+ except TimeoutError:
await self._callback(self)
diff --git a/src/pipecat/processors/logger.py b/src/pipecat/processors/logger.py
index 2e50c5a75..6cfea1b02 100644
--- a/src/pipecat/processors/logger.py
+++ b/src/pipecat/processors/logger.py
@@ -6,8 +6,6 @@
"""Frame logging utilities for debugging and monitoring frame flow in Pipecat pipelines."""
-from typing import Optional, Tuple, Type
-
from loguru import logger
from pipecat.frames.frames import (
@@ -33,8 +31,8 @@ class FrameLogger(FrameProcessor):
def __init__(
self,
prefix="Frame",
- color: Optional[str] = None,
- ignored_frame_types: Tuple[Type[Frame], ...] = (
+ color: str | None = None,
+ ignored_frame_types: tuple[type[Frame], ...] = (
BotSpeakingFrame,
UserSpeakingFrame,
InputAudioRawFrame,
diff --git a/src/pipecat/processors/metrics/frame_processor_metrics.py b/src/pipecat/processors/metrics/frame_processor_metrics.py
index 7a52895a2..18ef7f580 100644
--- a/src/pipecat/processors/metrics/frame_processor_metrics.py
+++ b/src/pipecat/processors/metrics/frame_processor_metrics.py
@@ -7,7 +7,6 @@
"""Frame processor metrics collection and reporting."""
import time
-from typing import Optional
from loguru import logger
@@ -70,7 +69,7 @@ class FrameProcessorMetrics(BaseObject):
return self._task_manager
@property
- def ttfb(self) -> Optional[float]:
+ def ttfb(self) -> float | None:
"""Get the current TTFB value in seconds.
Returns:
@@ -110,7 +109,7 @@ class FrameProcessorMetrics(BaseObject):
self._core_metrics_data = MetricsData(processor=name)
async def start_ttfb_metrics(
- self, *, start_time: Optional[float] = None, report_only_initial_ttfb: bool
+ self, *, start_time: float | None = None, report_only_initial_ttfb: bool
):
"""Start measuring time-to-first-byte (TTFB).
@@ -124,7 +123,7 @@ class FrameProcessorMetrics(BaseObject):
self._last_ttfb_time = 0
self._should_report_ttfb = not report_only_initial_ttfb
- async def stop_ttfb_metrics(self, *, end_time: Optional[float] = None):
+ async def stop_ttfb_metrics(self, *, end_time: float | None = None):
"""Stop TTFB measurement and generate metrics frame.
Args:
@@ -147,7 +146,7 @@ class FrameProcessorMetrics(BaseObject):
self._start_ttfb_time = 0
return MetricsFrame(data=[ttfb])
- async def start_processing_metrics(self, *, start_time: Optional[float] = None):
+ async def start_processing_metrics(self, *, start_time: float | None = None):
"""Start measuring processing time.
Args:
@@ -156,7 +155,7 @@ class FrameProcessorMetrics(BaseObject):
"""
self._start_processing_time = start_time or time.time()
- async def stop_processing_metrics(self, *, end_time: Optional[float] = None):
+ async def stop_processing_metrics(self, *, end_time: float | None = None):
"""Stop processing time measurement and generate metrics frame.
Args:
diff --git a/src/pipecat/processors/metrics/sentry.py b/src/pipecat/processors/metrics/sentry.py
index c865ee470..b043b9058 100644
--- a/src/pipecat/processors/metrics/sentry.py
+++ b/src/pipecat/processors/metrics/sentry.py
@@ -7,7 +7,6 @@
"""Sentry integration for frame processor metrics."""
import asyncio
-from typing import Optional
from loguru import logger
@@ -72,7 +71,7 @@ class SentryMetrics(FrameProcessorMetrics):
sentry_sdk.flush(timeout=5.0)
async def start_ttfb_metrics(
- self, *, start_time: Optional[float] = None, report_only_initial_ttfb: bool
+ self, *, start_time: float | None = None, report_only_initial_ttfb: bool
):
"""Start tracking time-to-first-byte metrics.
@@ -93,7 +92,7 @@ class SentryMetrics(FrameProcessorMetrics):
f"{self} Sentry transaction started (ID: {self._ttfb_metrics_tx.span_id} Name: {self._ttfb_metrics_tx.name})"
)
- async def stop_ttfb_metrics(self, *, end_time: Optional[float] = None):
+ async def stop_ttfb_metrics(self, *, end_time: float | None = None):
"""Stop tracking time-to-first-byte metrics.
Args:
@@ -105,7 +104,7 @@ class SentryMetrics(FrameProcessorMetrics):
await self._sentry_queue.put(self._ttfb_metrics_tx)
self._ttfb_metrics_tx = None
- async def start_processing_metrics(self, *, start_time: Optional[float] = None):
+ async def start_processing_metrics(self, *, start_time: float | None = None):
"""Start tracking frame processing metrics.
Args:
@@ -122,7 +121,7 @@ class SentryMetrics(FrameProcessorMetrics):
f"{self} Sentry transaction started (ID: {self._processing_metrics_tx.span_id} Name: {self._processing_metrics_tx.name})"
)
- async def stop_processing_metrics(self, *, end_time: Optional[float] = None):
+ async def stop_processing_metrics(self, *, end_time: float | None = None):
"""Stop tracking frame processing metrics.
Args:
diff --git a/src/pipecat/processors/producer_processor.py b/src/pipecat/processors/producer_processor.py
index 3de6efc40..c70c066f1 100644
--- a/src/pipecat/processors/producer_processor.py
+++ b/src/pipecat/processors/producer_processor.py
@@ -7,7 +7,7 @@
"""Producer processor for frame filtering and distribution."""
import asyncio
-from typing import Awaitable, Callable, List
+from collections.abc import Awaitable, Callable
from pipecat.frames.frames import Frame
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -55,7 +55,7 @@ class ProducerProcessor(FrameProcessor):
self._filter = filter
self._transformer = transformer
self._passthrough = passthrough
- self._consumers: List[asyncio.Queue] = []
+ self._consumers: list[asyncio.Queue] = []
def add_consumer(self):
"""Add a new consumer and return its associated queue.
diff --git a/src/pipecat/processors/text_transformer.py b/src/pipecat/processors/text_transformer.py
index c97ef4e46..dffee7ac4 100644
--- a/src/pipecat/processors/text_transformer.py
+++ b/src/pipecat/processors/text_transformer.py
@@ -6,7 +6,7 @@
"""Stateless text transformation processor for Pipecat."""
-from typing import Callable, Coroutine, Union
+from collections.abc import Callable, Coroutine
from pipecat.frames.frames import Frame, TextFrame
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -21,7 +21,7 @@ class StatelessTextTransformer(FrameProcessor):
"""
def __init__(
- self, transform_fn: Union[Callable[[str], str], Callable[[str], Coroutine[None, None, str]]]
+ self, transform_fn: Callable[[str], str] | Callable[[str], Coroutine[None, None, str]]
):
"""Initialize the text transformer.
diff --git a/src/pipecat/runner/daily.py b/src/pipecat/runner/daily.py
index 082d7271c..bc80f0641 100644
--- a/src/pipecat/runner/daily.py
+++ b/src/pipecat/runner/daily.py
@@ -37,7 +37,6 @@ Example::
import os
import time
import uuid
-from typing import Dict, List, Optional
import aiohttp
from loguru import logger
@@ -64,7 +63,7 @@ class DailyRoomConfig(BaseModel):
room_url: str
token: str
- sip_endpoint: Optional[str] = None
+ sip_endpoint: str | None = None
def __iter__(self):
"""Enable tuple unpacking for backward compatibility.
@@ -78,18 +77,18 @@ class DailyRoomConfig(BaseModel):
async def configure(
aiohttp_session: aiohttp.ClientSession,
*,
- api_key: Optional[str] = None,
+ api_key: str | None = None,
room_exp_duration: float = 2.0,
token_exp_duration: float = 2.0,
- sip_caller_phone: Optional[str] = None,
+ sip_caller_phone: str | None = None,
sip_enable_video: bool = False,
sip_num_endpoints: int = 1,
enable_dialout: bool = False,
- sip_codecs: Optional[Dict[str, List[str]]] = None,
- sip_provider: Optional[str] = None,
- room_geo: Optional[str] = None,
- room_properties: Optional[DailyRoomProperties] = None,
- token_properties: Optional[DailyMeetingTokenProperties] = None,
+ sip_codecs: dict[str, list[str]] | None = None,
+ sip_provider: str | None = None,
+ room_geo: str | None = None,
+ room_properties: DailyRoomProperties | None = None,
+ token_properties: DailyMeetingTokenProperties | None = None,
) -> DailyRoomConfig:
"""Configure Daily room URL and token with optional SIP capabilities.
diff --git a/src/pipecat/runner/livekit.py b/src/pipecat/runner/livekit.py
index 4eef59207..1be30f3d0 100644
--- a/src/pipecat/runner/livekit.py
+++ b/src/pipecat/runner/livekit.py
@@ -30,7 +30,6 @@ Example::
import argparse
import os
-from typing import Optional
from livekit import api
from loguru import logger
@@ -98,7 +97,7 @@ async def configure():
return (url, token, room_name)
-async def configure_with_args(parser: Optional[argparse.ArgumentParser] = None):
+async def configure_with_args(parser: argparse.ArgumentParser | None = None):
"""Configure LiveKit room with command-line argument parsing.
Args:
diff --git a/src/pipecat/runner/run.py b/src/pipecat/runner/run.py
index 90f7b8ce1..c6d43fbbd 100644
--- a/src/pipecat/runner/run.py
+++ b/src/pipecat/runner/run.py
@@ -74,7 +74,7 @@ import uuid
from contextlib import asynccontextmanager
from http import HTTPMethod
from pathlib import Path
-from typing import Any, Dict, List, Optional, TypedDict, Union
+from typing import Any, TypedDict
import aiohttp
from fastapi.responses import FileResponse, Response
@@ -106,7 +106,7 @@ os.environ["ENV"] = "local"
TELEPHONY_TRANSPORTS = ["twilio", "telnyx", "plivo", "exotel"]
-RUNNER_DOWNLOADS_FOLDER: Optional[str] = None
+RUNNER_DOWNLOADS_FOLDER: str | None = None
RUNNER_HOST: str = "localhost"
RUNNER_PORT: int = 7860
@@ -220,17 +220,17 @@ def _setup_webrtc_routes(app: FastAPI, args: argparse.Namespace):
return
class IceServer(TypedDict, total=False):
- urls: Union[str, List[str]]
+ urls: str | list[str]
class IceConfig(TypedDict):
- iceServers: List[IceServer]
+ iceServers: list[IceServer]
class StartBotResult(TypedDict, total=False):
sessionId: str
- iceConfig: Optional[IceConfig]
+ iceConfig: IceConfig | None
# In-memory store of active sessions: session_id -> session info
- active_sessions: Dict[str, Dict[str, Any]] = {}
+ active_sessions: dict[str, dict[str, Any]] = {}
# Mount the frontend
app.mount("/client", SmallWebRTCPrebuiltUI)
@@ -418,7 +418,7 @@ def _setup_whatsapp_routes(app: FastAPI, args: argparse.Namespace):
return
# Global WhatsApp client instance
- whatsapp_client: Optional[WhatsAppClient] = None
+ whatsapp_client: WhatsAppClient | None = None
@app.get(
"/whatsapp",
@@ -857,7 +857,7 @@ def _validate_and_clean_proxy(proxy: str) -> str:
return proxy
-def runner_downloads_folder() -> Optional[str]:
+def runner_downloads_folder() -> str | None:
"""Returns the folder where files are stored for later download."""
return RUNNER_DOWNLOADS_FOLDER
@@ -872,7 +872,7 @@ def runner_port() -> int:
return RUNNER_PORT
-def main(parser: Optional[argparse.ArgumentParser] = None):
+def main(parser: argparse.ArgumentParser | None = None):
"""Start the Pipecat development runner.
Parses command-line arguments and starts a FastAPI server configured
diff --git a/src/pipecat/runner/types.py b/src/pipecat/runner/types.py
index e48f10a08..055824a22 100644
--- a/src/pipecat/runner/types.py
+++ b/src/pipecat/runner/types.py
@@ -12,7 +12,7 @@ information to bot functions.
import argparse
from dataclasses import dataclass, field
-from typing import Any, Dict, Optional
+from typing import Any
from fastapi import WebSocket
from pydantic import BaseModel
@@ -34,9 +34,9 @@ class DialinSettings(BaseModel):
call_id: str
call_domain: str
- To: Optional[str] = None
- From: Optional[str] = None
- sip_headers: Optional[Dict[str, str]] = None
+ To: str | None = None
+ From: str | None = None
+ sip_headers: dict[str, str] | None = None
class DailyDialinRequest(BaseModel):
@@ -64,8 +64,8 @@ class RunnerArguments:
handle_sigint: bool = field(init=False, kw_only=True)
handle_sigterm: bool = field(init=False, kw_only=True)
pipeline_idle_timeout_secs: int = field(init=False, kw_only=True)
- body: Optional[Any] = field(default_factory=dict, kw_only=True)
- cli_args: Optional[argparse.Namespace] = field(default=None, init=False, kw_only=True)
+ body: Any | None = field(default_factory=dict, kw_only=True)
+ cli_args: argparse.Namespace | None = field(default=None, init=False, kw_only=True)
def __post_init__(self):
self.handle_sigint = False
@@ -84,7 +84,7 @@ class DailyRunnerArguments(RunnerArguments):
"""
room_url: str
- token: Optional[str] = None
+ token: str | None = None
@dataclass
@@ -122,4 +122,4 @@ class LiveKitRunnerArguments(RunnerArguments):
room_name: str
url: str
- token: Optional[str] = None
+ token: str | None = None
diff --git a/src/pipecat/runner/utils.py b/src/pipecat/runner/utils.py
index d0bb44a88..7a4b3034c 100644
--- a/src/pipecat/runner/utils.py
+++ b/src/pipecat/runner/utils.py
@@ -32,7 +32,8 @@ Example::
import json
import os
import re
-from typing import Any, Callable, Dict, Optional
+from collections.abc import Callable
+from typing import Any
from fastapi import WebSocket
from loguru import logger
@@ -373,7 +374,7 @@ def _smallwebrtc_sdp_cleanup_fingerprints(text: str) -> str:
return "\r\n".join(result) + "\r\n"
-def smallwebrtc_sdp_munging(sdp: str, host: Optional[str]) -> str:
+def smallwebrtc_sdp_munging(sdp: str, host: str | None) -> str:
"""Apply SDP modifications for SmallWebRTC compatibility.
Args:
@@ -389,7 +390,7 @@ def smallwebrtc_sdp_munging(sdp: str, host: Optional[str]) -> str:
return sdp
-def _get_transport_params(transport_key: str, transport_params: Dict[str, Callable]) -> Any:
+def _get_transport_params(transport_key: str, transport_params: dict[str, Callable]) -> Any:
"""Get transport parameters from factory function.
Args:
@@ -415,7 +416,7 @@ def _get_transport_params(transport_key: str, transport_params: Dict[str, Callab
async def _create_telephony_transport(
websocket: WebSocket,
- params: Optional[Any] = None,
+ params: Any | None = None,
transport_type: str = None,
call_data: dict = None,
) -> BaseTransport:
@@ -488,7 +489,7 @@ async def _create_telephony_transport(
async def create_transport(
- runner_args: Any, transport_params: Dict[str, Callable]
+ runner_args: Any, transport_params: dict[str, Callable]
) -> BaseTransport:
"""Create a transport from runner arguments using factory functions.
diff --git a/src/pipecat/serializers/base_serializer.py b/src/pipecat/serializers/base_serializer.py
index d9414e43d..7c354d253 100644
--- a/src/pipecat/serializers/base_serializer.py
+++ b/src/pipecat/serializers/base_serializer.py
@@ -7,7 +7,6 @@
"""Frame serialization interfaces for Pipecat."""
from abc import abstractmethod
-from typing import Optional
from pydantic import BaseModel
@@ -39,7 +38,7 @@ class FrameSerializer(BaseObject):
ignore_rtvi_messages: bool = True
- def __init__(self, params: Optional[InputParams] = None, **kwargs):
+ def __init__(self, params: InputParams | None = None, **kwargs):
"""Initialize the FrameSerializer.
Args:
diff --git a/src/pipecat/serializers/exotel.py b/src/pipecat/serializers/exotel.py
index abf170d65..ff2510f57 100644
--- a/src/pipecat/serializers/exotel.py
+++ b/src/pipecat/serializers/exotel.py
@@ -8,7 +8,6 @@
import base64
import json
-from typing import Optional
from loguru import logger
@@ -48,10 +47,10 @@ class ExotelFrameSerializer(FrameSerializer):
"""
exotel_sample_rate: int = 8000
- sample_rate: Optional[int] = None
+ sample_rate: int | None = None
def __init__(
- self, stream_sid: str, call_sid: Optional[str] = None, params: Optional[InputParams] = None
+ self, stream_sid: str, call_sid: str | None = None, params: InputParams | None = None
):
"""Initialize the ExotelFrameSerializer.
diff --git a/src/pipecat/serializers/genesys.py b/src/pipecat/serializers/genesys.py
index 0cfdba22b..e52abc6aa 100644
--- a/src/pipecat/serializers/genesys.py
+++ b/src/pipecat/serializers/genesys.py
@@ -23,8 +23,8 @@ Audio Format:
import json
import uuid
from datetime import timedelta
-from enum import Enum
-from typing import Any, Dict, List, Optional
+from enum import StrEnum
+from typing import Any
from loguru import logger
@@ -46,7 +46,7 @@ from pipecat.frames.frames import (
from pipecat.serializers.base_serializer import FrameSerializer
-class AudioHookMessageType(str, Enum):
+class AudioHookMessageType(StrEnum):
"""AudioHook protocol message types."""
OPEN = "open"
@@ -63,7 +63,7 @@ class AudioHookMessageType(str, Enum):
DISCONNECT = "disconnect"
-class AudioHookChannel(str, Enum):
+class AudioHookChannel(StrEnum):
"""AudioHook audio channel configuration."""
EXTERNAL = "external" # Customer audio only (mono)
@@ -71,7 +71,7 @@ class AudioHookChannel(str, Enum):
BOTH = "both" # Stereo: external=left, internal=right
-class AudioHookMediaFormat(str, Enum):
+class AudioHookMediaFormat(StrEnum):
"""Supported audio formats."""
PCMU = "PCMU" # μ-law, 8kHz
@@ -146,18 +146,18 @@ class GenesysAudioHookSerializer(FrameSerializer):
"""
genesys_sample_rate: int = 8000
- sample_rate: Optional[int] = None
+ sample_rate: int | None = None
channel: AudioHookChannel = AudioHookChannel.EXTERNAL
media_format: AudioHookMediaFormat = AudioHookMediaFormat.PCMU
process_external: bool = True
process_internal: bool = False
- supported_languages: Optional[List[str]] = None
- selected_language: Optional[str] = None
+ supported_languages: list[str] | None = None
+ selected_language: str | None = None
start_paused: bool = False
def __init__(
self,
- params: Optional[InputParams] = None,
+ params: InputParams | None = None,
**kwargs,
):
"""Initialize the GenesysAudioHookSerializer.
@@ -185,12 +185,12 @@ class GenesysAudioHookSerializer(FrameSerializer):
self._position = timedelta(0)
# Session metadata
- self._conversation_id: Optional[str] = None
- self._participant: Optional[Dict[str, Any]] = None
- self._custom_config: Optional[Dict[str, Any]] = None
- self._media_info: Optional[List[Dict[str, Any]]] = None
- self._input_variables: Optional[Dict[str, Any]] = None # Custom input from Genesys
- self._output_variables: Optional[Dict[str, Any]] = None # Custom output to Genesys
+ self._conversation_id: str | None = None
+ self._participant: dict[str, Any] | None = None
+ self._custom_config: dict[str, Any] | None = None
+ self._media_info: list[dict[str, Any]] | None = None
+ self._input_variables: dict[str, Any] | None = None # Custom input from Genesys
+ self._output_variables: dict[str, Any] | None = None # Custom output to Genesys
# Event handlers
self._register_event_handler("on_open")
@@ -207,7 +207,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
return self._session_id
@property
- def conversation_id(self) -> Optional[str]:
+ def conversation_id(self) -> str | None:
"""Get the Genesys conversation ID."""
return self._conversation_id
@@ -222,21 +222,21 @@ class GenesysAudioHookSerializer(FrameSerializer):
return self._is_paused
@property
- def participant(self) -> Optional[Dict[str, Any]]:
+ def participant(self) -> dict[str, Any] | None:
"""Get participant info (ani, dnis, etc.) from the open message."""
return self._participant
@property
- def input_variables(self) -> Optional[Dict[str, Any]]:
+ def input_variables(self) -> dict[str, Any] | None:
"""Get custom input variables from the open message."""
return self._input_variables
@property
- def output_variables(self) -> Optional[Dict[str, Any]]:
+ def output_variables(self) -> dict[str, Any] | None:
"""Get custom output variables to send back to Genesys."""
return self._output_variables
- def set_output_variables(self, variables: Dict[str, Any]) -> None:
+ def set_output_variables(self, variables: dict[str, Any]) -> None:
"""Set custom output variables to send back to Genesys on close.
These variables will be included in the 'closed' response when Genesys
@@ -305,9 +305,9 @@ class GenesysAudioHookSerializer(FrameSerializer):
def _create_message(
self,
msg_type: AudioHookMessageType,
- parameters: Optional[Dict[str, Any]] = None,
+ parameters: dict[str, Any] | None = None,
include_position: bool = True,
- ) -> Dict[str, Any]:
+ ) -> dict[str, Any]:
"""Create a protocol message with common fields.
Based on the Genesys AudioHook protocol, responses include:
@@ -341,9 +341,9 @@ class GenesysAudioHookSerializer(FrameSerializer):
def create_opened_response(
self,
start_paused: bool = False,
- supported_languages: Optional[List[str]] = None,
- selected_language: Optional[str] = None,
- ) -> Dict[str, Any]:
+ supported_languages: list[str] | None = None,
+ selected_language: str | None = None,
+ ) -> dict[str, Any]:
"""Create an 'opened' response message for the client.
This should be sent in response to an 'open' message from Genesys.
@@ -397,8 +397,8 @@ class GenesysAudioHookSerializer(FrameSerializer):
def create_closed_response(
self,
- output_variables: Optional[Dict[str, Any]] = None,
- ) -> Dict[str, Any]:
+ output_variables: dict[str, Any] | None = None,
+ ) -> dict[str, Any]:
"""Create a 'closed' response message.
This should be sent in response to a 'close' message from Genesys.
@@ -422,7 +422,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
}
)
"""
- parameters: Optional[Dict[str, Any]] = None
+ parameters: dict[str, Any] | None = None
if output_variables:
parameters = {"outputVariables": output_variables}
@@ -437,7 +437,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
return msg
- def create_pong_response(self) -> Dict[str, Any]:
+ def create_pong_response(self) -> dict[str, Any]:
"""Create a 'pong' response message.
This should be sent in response to a 'ping' message from Genesys.
@@ -448,7 +448,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
msg = self._create_message(AudioHookMessageType.PONG)
return msg
- def create_resumed_response(self) -> Dict[str, Any]:
+ def create_resumed_response(self) -> dict[str, Any]:
"""Create a 'resumed' response message.
This should be sent in response to a 'pause' message when ready to resume.
@@ -463,7 +463,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
return msg
- def create_barge_in_event(self) -> Dict[str, Any]:
+ def create_barge_in_event(self) -> dict[str, Any]:
"""Create a barge-in event message.
This notifies Genesys Cloud that the user has interrupted the bot's
@@ -485,9 +485,9 @@ class GenesysAudioHookSerializer(FrameSerializer):
self,
reason: str = "completed",
action: str = "transfer",
- output_variables: Optional[Dict[str, Any]] = None,
- info: Optional[str] = None,
- ) -> Dict[str, Any]:
+ output_variables: dict[str, Any] | None = None,
+ info: str | None = None,
+ ) -> dict[str, Any]:
"""Create a 'disconnect' message to initiate session termination.
Args:
@@ -499,7 +499,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
Returns:
Dictionary of the disconnect message.
"""
- parameters: Dict[str, Any] = {"reason": reason}
+ parameters: dict[str, Any] = {"reason": reason}
# Build outputVariables
out_vars = {"action": action}
@@ -523,7 +523,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
code: int,
message: str,
retryable: bool = False,
- ) -> Dict[str, Any]:
+ ) -> dict[str, Any]:
"""Create an 'error' message.
Args:
@@ -700,7 +700,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
return audio_frame
- async def _handle_control_message(self, message: Dict[str, Any]) -> Frame | None:
+ async def _handle_control_message(self, message: dict[str, Any]) -> Frame | None:
"""Handle a JSON control message from Genesys.
Args:
@@ -748,7 +748,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
logger.warning(f"Unknown AudioHook message type: {msg_type}")
return None
- async def _handle_open(self, message: Dict[str, Any]) -> Frame | None:
+ async def _handle_open(self, message: dict[str, Any]) -> Frame | None:
"""Handle an 'open' message from Genesys.
This initializes the session with metadata from Genesys Cloud and
@@ -781,7 +781,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
# media is a list like: [{"type": "audio", "format": "PCMU", "channels": ["external"], "rate": 8000}]
media_list = self._media_info
if media_list and isinstance(media_list, list) and len(media_list) > 0:
- audio_media: Dict[str, Any] = media_list[0] # Get first media entry
+ audio_media: dict[str, Any] = media_list[0] # Get first media entry
channels = audio_media.get("channels", [])
logger.debug(
f"📡 Genesys audio config: format={audio_media.get('format')}, channels={channels}, rate={audio_media.get('rate')}"
@@ -815,7 +815,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
)
)
- async def _handle_close(self, message: Dict[str, Any]) -> Frame | None:
+ async def _handle_close(self, message: dict[str, Any]) -> Frame | None:
"""Handle a 'close' message from Genesys.
Automatically responds with a 'closed' message. If output_variables
@@ -846,7 +846,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
message=self.create_closed_response(output_variables=self._output_variables)
)
- async def _handle_ping(self, message: Dict[str, Any]) -> Frame | None:
+ async def _handle_ping(self, message: dict[str, Any]) -> Frame | None:
"""Handle a 'ping' message from Genesys.
Automatically responds with a 'pong' message to maintain the connection.
@@ -864,7 +864,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
# Return as urgent frame to be sent through pipeline immediately
return OutputTransportMessageUrgentFrame(message=self.create_pong_response())
- async def _handle_pause(self, message: Dict[str, Any]) -> Frame | None:
+ async def _handle_pause(self, message: dict[str, Any]) -> Frame | None:
"""Handle a 'pause' message from Genesys.
This is used when audio streaming is temporarily suspended
@@ -888,7 +888,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
# Note: Application should call create_resumed_response() when ready
return None
- async def _handle_update(self, message: Dict[str, Any]) -> Frame | None:
+ async def _handle_update(self, message: dict[str, Any]) -> Frame | None:
"""Handle an 'update' message from Genesys.
Updates may include changes to participants or configuration.
@@ -910,7 +910,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
return None
- async def _handle_error(self, message: Dict[str, Any]) -> Frame | None:
+ async def _handle_error(self, message: dict[str, Any]) -> Frame | None:
"""Handle an 'error' message from Genesys.
Args:
@@ -929,7 +929,7 @@ class GenesysAudioHookSerializer(FrameSerializer):
return None
- async def _handle_dtmf(self, message: Dict[str, Any]) -> Frame | None:
+ async def _handle_dtmf(self, message: dict[str, Any]) -> Frame | None:
"""Handle a 'dtmf' message from Genesys.
DTMF (Dual-Tone Multi-Frequency) events are sent when the user
diff --git a/src/pipecat/serializers/plivo.py b/src/pipecat/serializers/plivo.py
index b6346d542..e86d18b8c 100644
--- a/src/pipecat/serializers/plivo.py
+++ b/src/pipecat/serializers/plivo.py
@@ -8,7 +8,6 @@
import base64
import json
-from typing import Optional
from loguru import logger
@@ -52,16 +51,16 @@ class PlivoFrameSerializer(FrameSerializer):
"""
plivo_sample_rate: int = 8000
- sample_rate: Optional[int] = None
+ sample_rate: int | None = None
auto_hang_up: bool = True
def __init__(
self,
stream_id: str,
- call_id: Optional[str] = None,
- auth_id: Optional[str] = None,
- auth_token: Optional[str] = None,
- params: Optional[InputParams] = None,
+ call_id: str | None = None,
+ auth_id: str | None = None,
+ auth_token: str | None = None,
+ params: InputParams | None = None,
):
"""Initialize the PlivoFrameSerializer.
diff --git a/src/pipecat/serializers/protobuf.py b/src/pipecat/serializers/protobuf.py
index 66f4d0daa..78d20fa24 100644
--- a/src/pipecat/serializers/protobuf.py
+++ b/src/pipecat/serializers/protobuf.py
@@ -8,7 +8,6 @@
import dataclasses
import json
-from typing import Optional
from loguru import logger
@@ -61,7 +60,7 @@ class ProtobufFrameSerializer(FrameSerializer):
}
DESERIALIZABLE_FIELDS = {v: k for k, v in DESERIALIZABLE_TYPES.items()}
- def __init__(self, params: Optional[FrameSerializer.InputParams] = None):
+ def __init__(self, params: FrameSerializer.InputParams | None = None):
"""Initialize the Protobuf frame serializer.
Args:
diff --git a/src/pipecat/serializers/telnyx.py b/src/pipecat/serializers/telnyx.py
index 1c0405ade..0d74664ab 100644
--- a/src/pipecat/serializers/telnyx.py
+++ b/src/pipecat/serializers/telnyx.py
@@ -8,7 +8,6 @@
import base64
import json
-from typing import Optional
import aiohttp
from loguru import logger
@@ -59,7 +58,7 @@ class TelnyxFrameSerializer(FrameSerializer):
"""
telnyx_sample_rate: int = 8000
- sample_rate: Optional[int] = None
+ sample_rate: int | None = None
inbound_encoding: str = "PCMU"
outbound_encoding: str = "PCMU"
auto_hang_up: bool = True
@@ -69,9 +68,9 @@ class TelnyxFrameSerializer(FrameSerializer):
stream_id: str,
outbound_encoding: str,
inbound_encoding: str,
- call_control_id: Optional[str] = None,
- api_key: Optional[str] = None,
- params: Optional[InputParams] = None,
+ call_control_id: str | None = None,
+ api_key: str | None = None,
+ params: InputParams | None = None,
):
"""Initialize the TelnyxFrameSerializer.
diff --git a/src/pipecat/serializers/twilio.py b/src/pipecat/serializers/twilio.py
index 4d4b5344a..857610b4e 100644
--- a/src/pipecat/serializers/twilio.py
+++ b/src/pipecat/serializers/twilio.py
@@ -8,7 +8,6 @@
import base64
import json
-from typing import Optional
from loguru import logger
@@ -52,18 +51,18 @@ class TwilioFrameSerializer(FrameSerializer):
"""
twilio_sample_rate: int = 8000
- sample_rate: Optional[int] = None
+ sample_rate: int | None = None
auto_hang_up: bool = True
def __init__(
self,
stream_sid: str,
- call_sid: Optional[str] = None,
- account_sid: Optional[str] = None,
- auth_token: Optional[str] = None,
- region: Optional[str] = None,
- edge: Optional[str] = None,
- params: Optional[InputParams] = None,
+ call_sid: str | None = None,
+ account_sid: str | None = None,
+ auth_token: str | None = None,
+ region: str | None = None,
+ edge: str | None = None,
+ params: InputParams | None = None,
):
"""Initialize the TwilioFrameSerializer.
diff --git a/src/pipecat/serializers/vonage.py b/src/pipecat/serializers/vonage.py
index c14ae4025..d778cf62c 100644
--- a/src/pipecat/serializers/vonage.py
+++ b/src/pipecat/serializers/vonage.py
@@ -7,7 +7,6 @@
"""Vonage Audio Connector WebSocket serializer for Pipecat."""
import json
-from typing import Optional
from loguru import logger
@@ -47,9 +46,9 @@ class VonageFrameSerializer(FrameSerializer):
"""
vonage_sample_rate: int = 16000
- sample_rate: Optional[int] = None
+ sample_rate: int | None = None
- def __init__(self, params: Optional[InputParams] = None):
+ def __init__(self, params: InputParams | None = None):
"""Initialize the VonageFrameSerializer.
Args:
diff --git a/src/pipecat/services/ai_service.py b/src/pipecat/services/ai_service.py
index dd9ef1dba..5d914dd00 100644
--- a/src/pipecat/services/ai_service.py
+++ b/src/pipecat/services/ai_service.py
@@ -11,7 +11,8 @@ model management, settings handling, and frame processing lifecycle methods.
"""
import warnings
-from typing import Any, AsyncGenerator, Dict
+from collections.abc import AsyncGenerator
+from typing import Any
from loguru import logger
@@ -51,7 +52,7 @@ class AIService(FrameProcessor):
or ServiceSettings()
)
self._sync_model_name_to_metrics()
- self._session_properties: Dict[str, Any] = {}
+ self._session_properties: dict[str, Any] = {}
self._tracing_enabled: bool = False
self._tracing_context = None
@@ -104,7 +105,7 @@ class AIService(FrameProcessor):
"""
pass
- async def _update_settings(self, delta: ServiceSettings) -> Dict[str, Any]:
+ async def _update_settings(self, delta: ServiceSettings) -> dict[str, Any]:
"""Apply a settings delta and return the changed fields.
The delta is applied to ``_settings`` and a dict mapping each changed
diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py
index f51e7927e..03286bee3 100644
--- a/src/pipecat/services/anthropic/llm.py
+++ b/src/pipecat/services/anthropic/llm.py
@@ -14,7 +14,7 @@ import asyncio
import json
import re
from dataclasses import dataclass, field
-from typing import Any, Dict, Literal, Optional, Union
+from typing import Any, Literal, Optional, Union
import httpx
from loguru import logger
@@ -66,7 +66,7 @@ class AnthropicThinkingConfig(BaseModel):
# No client-side validation on budget_tokens — we let the server
# enforce the rules so we stay forward-compatible if they change.
- budget_tokens: Optional[int] = None
+ budget_tokens: int | None = None
@dataclass
@@ -133,26 +133,26 @@ class AnthropicLLMService(LLMService):
extra: Additional parameters to pass to the API.
"""
- enable_prompt_caching: Optional[bool] = None
- max_tokens: Optional[int] = Field(default_factory=lambda: 4096, ge=1)
- temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
- top_k: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0)
- top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
+ enable_prompt_caching: bool | None = None
+ max_tokens: int | None = Field(default_factory=lambda: 4096, ge=1)
+ temperature: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
+ top_k: int | None = Field(default_factory=lambda: NOT_GIVEN, ge=0)
+ top_p: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
thinking: Optional["AnthropicLLMService.ThinkingConfig"] = Field(
default_factory=lambda: NOT_GIVEN
)
- extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
+ extra: dict[str, Any] | None = Field(default_factory=dict)
def __init__(
self,
*,
api_key: str,
- model: Optional[str] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
client=None,
- retry_timeout_secs: Optional[float] = 5.0,
- retry_on_timeout: Optional[bool] = False,
+ retry_timeout_secs: float | None = 5.0,
+ retry_on_timeout: bool | None = False,
**kwargs,
):
"""Initialize the Anthropic LLM service.
@@ -251,7 +251,7 @@ class AnthropicLLMService(LLMService):
api_call(**params), timeout=self._retry_timeout_secs
)
return response
- except (APITimeoutError, asyncio.TimeoutError):
+ except (TimeoutError, APITimeoutError):
# Retry, this time without a timeout so we get a response
logger.debug(f"{self}: Retrying message creation due to timeout")
response = await api_call(**params)
@@ -263,9 +263,9 @@ class AnthropicLLMService(LLMService):
async def run_inference(
self,
context: LLMContext,
- max_tokens: Optional[int] = None,
- system_instruction: Optional[str] = None,
- ) -> Optional[str]:
+ max_tokens: int | None = None,
+ system_instruction: str | None = None,
+ ) -> str | None:
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
Args:
diff --git a/src/pipecat/services/assemblyai/models.py b/src/pipecat/services/assemblyai/models.py
index cffebcf06..dd223e2be 100644
--- a/src/pipecat/services/assemblyai/models.py
+++ b/src/pipecat/services/assemblyai/models.py
@@ -10,7 +10,7 @@ This module defines Pydantic models for handling AssemblyAI's real-time
transcription WebSocket messages and connection configuration.
"""
-from typing import List, Literal, Optional
+from typing import Literal
from loguru import logger
from pydantic import BaseModel, ConfigDict, Field, model_validator
@@ -85,10 +85,10 @@ class TurnMessage(BaseMessage):
end_of_turn: bool
transcript: str
end_of_turn_confidence: float
- words: List[Word]
- language_code: Optional[str] = None
- language_confidence: Optional[float] = None
- speaker: Optional[str] = Field(default=None, alias="speaker_label")
+ words: list[Word]
+ language_code: str | None = None
+ language_confidence: float | None = None
+ speaker: str | None = Field(default=None, alias="speaker_label")
class SpeechStartedMessage(BaseMessage):
@@ -158,19 +158,19 @@ class AssemblyAIConnectionParams(BaseModel):
sample_rate: int = 16000
encoding: Literal["pcm_s16le", "pcm_mulaw"] = "pcm_s16le"
- end_of_turn_confidence_threshold: Optional[float] = None
- min_turn_silence: Optional[int] = None
- min_end_of_turn_silence_when_confident: Optional[int] = None # Deprecated
- max_turn_silence: Optional[int] = None
- keyterms_prompt: Optional[List[str]] = None
- prompt: Optional[str] = None
+ end_of_turn_confidence_threshold: float | None = None
+ min_turn_silence: int | None = None
+ min_end_of_turn_silence_when_confident: int | None = None # Deprecated
+ max_turn_silence: int | None = None
+ keyterms_prompt: list[str] | None = None
+ prompt: str | None = None
speech_model: Literal[
"universal-streaming-english", "universal-streaming-multilingual", "u3-rt-pro"
] = "u3-rt-pro"
- language_detection: Optional[bool] = None
+ language_detection: bool | None = None
format_turns: bool = True
- speaker_labels: Optional[bool] = None
- vad_threshold: Optional[float] = None
+ speaker_labels: bool | None = None
+ vad_threshold: float | None = None
@model_validator(mode="after")
def handle_deprecated_param(self):
diff --git a/src/pipecat/services/assemblyai/stt.py b/src/pipecat/services/assemblyai/stt.py
index 8b273188f..dbaa61c86 100644
--- a/src/pipecat/services/assemblyai/stt.py
+++ b/src/pipecat/services/assemblyai/stt.py
@@ -12,8 +12,9 @@ WebSocket API for streaming audio transcription.
import asyncio
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Dict, List, Optional
+from typing import Any
from urllib.parse import urlencode
from loguru import logger
@@ -114,7 +115,7 @@ class AssemblyAISTTSettings(STTSettings):
)
min_turn_silence: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
max_turn_silence: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
- keyterms_prompt: List[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+ keyterms_prompt: list[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
prompt: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
language_detection: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
format_turns: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
@@ -148,16 +149,16 @@ class AssemblyAISTTService(WebsocketSTTService):
self,
*,
api_key: str,
- language: Optional[Language] = None,
+ language: Language | None = None,
api_endpoint_base_url: str = "wss://streaming.assemblyai.com/v3/ws",
sample_rate: int = 16000,
encoding: str = "pcm_s16le",
- connection_params: Optional[AssemblyAIConnectionParams] = None,
+ connection_params: AssemblyAIConnectionParams | None = None,
vad_force_turn_endpoint: bool = True,
should_interrupt: bool = True,
- speaker_format: Optional[str] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = ASSEMBLYAI_TTFS_P99,
+ speaker_format: str | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = ASSEMBLYAI_TTFS_P99,
**kwargs,
):
"""Initialize the AssemblyAI STT service.
@@ -543,7 +544,7 @@ class AssemblyAISTTService(WebsocketSTTService):
try:
await asyncio.wait_for(self._termination_event.wait(), timeout=5.0)
- except asyncio.TimeoutError:
+ except TimeoutError:
logger.warning("Timed out waiting for termination message from server")
except Exception as e:
@@ -625,7 +626,7 @@ class AssemblyAISTTService(WebsocketSTTService):
except json.JSONDecodeError:
logger.warning(f"Received non-JSON message: {message}")
- def _parse_message(self, message: Dict[str, Any]) -> BaseMessage:
+ def _parse_message(self, message: dict[str, Any]) -> BaseMessage:
"""Parse a raw message into the appropriate message type."""
msg_type = message.get("type")
@@ -640,7 +641,7 @@ class AssemblyAISTTService(WebsocketSTTService):
else:
raise ValueError(f"Unknown message type: {msg_type}")
- async def _handle_message(self, message: Dict[str, Any]):
+ async def _handle_message(self, message: dict[str, Any]):
"""Handle AssemblyAI WebSocket messages."""
try:
parsed_message = self._parse_message(message)
diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py
index 0f140281d..3d8750f0c 100644
--- a/src/pipecat/services/asyncai/tts.py
+++ b/src/pipecat/services/asyncai/tts.py
@@ -9,8 +9,9 @@
import asyncio
import base64
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
import aiohttp
from loguru import logger
@@ -41,7 +42,7 @@ except ModuleNotFoundError as e:
raise Exception(f"Missing module: {e}")
-def language_to_async_language(language: Language) -> Optional[str]:
+def language_to_async_language(language: Language) -> str | None:
"""Convert a Language enum to Async language code.
Args:
@@ -98,23 +99,23 @@ class AsyncAITTSService(WebsocketTTSService):
language: Language to use for synthesis.
"""
- language: Optional[Language] = None
+ language: Language | None = None
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
version: str = "v1",
url: str = "wss://api.async.com/text_to_speech/websocket/ws",
- model: Optional[str] = None,
- sample_rate: Optional[int] = None,
+ model: str | None = None,
+ sample_rate: int | None = None,
encoding: str = "pcm_s16le",
container: str = "raw",
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- aggregate_sentences: Optional[bool] = None,
- text_aggregation_mode: Optional[TextAggregationMode] = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ aggregate_sentences: bool | None = None,
+ text_aggregation_mode: TextAggregationMode | None = None,
**kwargs,
):
"""Initialize the Async TTS service.
@@ -222,7 +223,7 @@ class AsyncAITTSService(WebsocketTTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Async language format.
Args:
@@ -339,7 +340,7 @@ class AsyncAITTSService(WebsocketTTSService):
return self._websocket
raise Exception("Websocket not connected")
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio.
Args:
@@ -499,22 +500,22 @@ class AsyncAIHttpTTSService(TTSService):
language: Language to use for synthesis.
"""
- language: Optional[Language] = None
+ language: Language | None = None
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
aiohttp_session: aiohttp.ClientSession,
- model: Optional[str] = None,
+ model: str | None = None,
url: str = "https://api.async.com",
version: str = "v1",
- sample_rate: Optional[int] = None,
+ sample_rate: int | None = None,
encoding: str = "pcm_s16le",
container: str = "raw",
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Async TTS service.
@@ -598,7 +599,7 @@ class AsyncAIHttpTTSService(TTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Async language format.
Args:
diff --git a/src/pipecat/services/aws/agent_core.py b/src/pipecat/services/aws/agent_core.py
index d66af9c5b..2f1560b7c 100644
--- a/src/pipecat/services/aws/agent_core.py
+++ b/src/pipecat/services/aws/agent_core.py
@@ -13,7 +13,7 @@ Amazon Bedrock AgentCore Runtime and streams their responses as LLMTextFrames.
import asyncio
import json
import os
-from typing import Callable, Optional
+from collections.abc import Callable
import aioboto3
from loguru import logger
@@ -31,7 +31,7 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
def default_context_to_payload_transformer(
context: LLMContext,
-) -> Optional[str]:
+) -> str | None:
"""Default transformer to create AgentCore payload from LLM context.
Extracts the latest user or system message text and wraps it in {"prompt": ""}.
@@ -68,7 +68,7 @@ def default_context_to_payload_transformer(
return json.dumps({"prompt": prompt})
-def default_response_to_output_transformer(response_line: str) -> Optional[str]:
+def default_response_to_output_transformer(response_line: str) -> str | None:
"""Default transformer to extract output text from AgentCore response.
Expects responses with {"response": ""} format.
@@ -110,12 +110,12 @@ class AWSAgentCoreProcessor(FrameProcessor):
def __init__(
self,
agentArn: str,
- aws_access_key: Optional[str] = None,
- aws_secret_key: Optional[str] = None,
- aws_session_token: Optional[str] = None,
- aws_region: Optional[str] = None,
- context_to_payload_transformer: Optional[Callable[[LLMContext], Optional[str]]] = None,
- response_to_output_transformer: Optional[Callable[[str], Optional[str]]] = None,
+ aws_access_key: str | None = None,
+ aws_secret_key: str | None = None,
+ aws_session_token: str | None = None,
+ aws_region: str | None = None,
+ context_to_payload_transformer: Callable[[LLMContext], str | None] | None = None,
+ response_to_output_transformer: Callable[[str], str | None] | None = None,
**kwargs,
):
"""Initialize the AWS AgentCore processor.
@@ -157,8 +157,8 @@ class AWSAgentCoreProcessor(FrameProcessor):
# State for managing output response bookends
self._output_response_open = False
- self._last_text_frame_time: Optional[float] = None
- self._close_task: Optional[asyncio.Task] = None
+ self._last_text_frame_time: float | None = None
+ self._close_task: asyncio.Task | None = None
self._output_response_timeout = 1.0 # seconds
async def _close_output_response_after_timeout(self):
diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py
index ef726e39b..b146bd11a 100644
--- a/src/pipecat/services/aws/llm.py
+++ b/src/pipecat/services/aws/llm.py
@@ -16,7 +16,7 @@ import json
import os
import re
from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
+from typing import Any
from loguru import logger
from pydantic import BaseModel, Field
@@ -66,10 +66,10 @@ class AWSBedrockLLMSettings(LLMSettings):
additional_model_request_fields: Additional model-specific parameters.
"""
- stop_sequences: List[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+ stop_sequences: list[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
latency: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
enable_prompt_caching: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
- additional_model_request_fields: Dict[str, Any] | _NotGiven = field(
+ additional_model_request_fields: dict[str, Any] | _NotGiven = field(
default_factory=lambda: NOT_GIVEN
)
@@ -104,27 +104,27 @@ class AWSBedrockLLMService(LLMService):
additional_model_request_fields: Additional model-specific parameters.
"""
- max_tokens: Optional[int] = Field(default=None, ge=1)
- temperature: Optional[float] = Field(default=None, ge=0.0, le=1.0)
- top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0)
- stop_sequences: Optional[List[str]] = Field(default_factory=lambda: [])
- latency: Optional[str] = Field(default=None)
- additional_model_request_fields: Optional[Dict[str, Any]] = Field(default_factory=dict)
+ max_tokens: int | None = Field(default=None, ge=1)
+ temperature: float | None = Field(default=None, ge=0.0, le=1.0)
+ top_p: float | None = Field(default=None, ge=0.0, le=1.0)
+ stop_sequences: list[str] | None = Field(default_factory=lambda: [])
+ latency: str | None = Field(default=None)
+ additional_model_request_fields: dict[str, Any] | None = Field(default_factory=dict)
def __init__(
self,
*,
- model: Optional[str] = None,
- aws_access_key: Optional[str] = None,
- aws_secret_key: Optional[str] = None,
- aws_session_token: Optional[str] = None,
- aws_region: Optional[str] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- stop_sequences: Optional[List[str]] = None,
- client_config: Optional[Config] = None,
- retry_timeout_secs: Optional[float] = 5.0,
- retry_on_timeout: Optional[bool] = False,
+ model: str | None = None,
+ aws_access_key: str | None = None,
+ aws_secret_key: str | None = None,
+ aws_session_token: str | None = None,
+ aws_region: str | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ stop_sequences: list[str] | None = None,
+ client_config: Config | None = None,
+ retry_timeout_secs: float | None = 5.0,
+ retry_on_timeout: bool | None = False,
**kwargs,
):
"""Initialize the AWS Bedrock LLM service.
@@ -239,7 +239,7 @@ class AWSBedrockLLMService(LLMService):
"""
return True
- def _build_inference_config(self) -> Dict[str, Any]:
+ def _build_inference_config(self) -> dict[str, Any]:
"""Build inference config with only the parameters that are set.
This prevents conflicts with models (e.g., Claude Sonnet 4.5) that don't
@@ -262,9 +262,9 @@ class AWSBedrockLLMService(LLMService):
async def run_inference(
self,
context: LLMContext,
- max_tokens: Optional[int] = None,
- system_instruction: Optional[str] = None,
- ) -> Optional[str]:
+ max_tokens: int | None = None,
+ system_instruction: str | None = None,
+ ) -> str | None:
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
Args:
@@ -344,7 +344,7 @@ class AWSBedrockLLMService(LLMService):
client.converse_stream(**request_params), timeout=self._retry_timeout_secs
)
return response
- except (ReadTimeoutError, asyncio.TimeoutError) as e:
+ except (TimeoutError, ReadTimeoutError) as e:
# Retry, this time without a timeout so we get a response
logger.debug(f"{self}: Retrying converse_stream due to timeout")
response = await client.converse_stream(**request_params)
@@ -553,7 +553,7 @@ class AWSBedrockLLMService(LLMService):
# also get cancelled.
use_completion_tokens_estimate = True
raise
- except (ReadTimeoutError, asyncio.TimeoutError):
+ except (TimeoutError, ReadTimeoutError):
await self._call_event_handler("on_completion_timeout")
except Exception as e:
await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e)
diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py
index fd35375c2..a99de2d8a 100644
--- a/src/pipecat/services/aws/nova_sonic/llm.py
+++ b/src/pipecat/services/aws/nova_sonic/llm.py
@@ -19,7 +19,7 @@ import wave
from dataclasses import dataclass, field
from enum import Enum
from importlib.resources import files
-from typing import Any, List, Optional
+from typing import Any
from loguru import logger
from pydantic import BaseModel, Field
@@ -163,22 +163,22 @@ class Params(BaseModel):
"""
# Audio input
- input_sample_rate: Optional[int] = Field(default=16000)
- input_sample_size: Optional[int] = Field(default=16)
- input_channel_count: Optional[int] = Field(default=1)
+ input_sample_rate: int | None = Field(default=16000)
+ input_sample_size: int | None = Field(default=16)
+ input_channel_count: int | None = Field(default=1)
# Audio output
- output_sample_rate: Optional[int] = Field(default=24000)
- output_sample_size: Optional[int] = Field(default=16)
- output_channel_count: Optional[int] = Field(default=1)
+ output_sample_rate: int | None = Field(default=24000)
+ output_sample_size: int | None = Field(default=16)
+ output_channel_count: int | None = Field(default=1)
# Inference
- max_tokens: Optional[int] = Field(default=1024)
- top_p: Optional[float] = Field(default=0.9)
- temperature: Optional[float] = Field(default=0.7)
+ max_tokens: int | None = Field(default=1024)
+ top_p: float | None = Field(default=0.9)
+ temperature: float | None = Field(default=0.7)
# Turn-taking
- endpointing_sensitivity: Optional[str] = Field(default=None)
+ endpointing_sensitivity: str | None = Field(default=None)
@property
def audio_config(self) -> "AudioConfig":
@@ -206,14 +206,14 @@ class AudioConfig(BaseModel):
"""
# Input
- input_sample_rate: Optional[int] = Field(default=16000)
- input_sample_size: Optional[int] = Field(default=16)
- input_channel_count: Optional[int] = Field(default=1)
+ input_sample_rate: int | None = Field(default=16000)
+ input_sample_size: int | None = Field(default=16)
+ input_channel_count: int | None = Field(default=1)
# Output
- output_sample_rate: Optional[int] = Field(default=24000)
- output_sample_size: Optional[int] = Field(default=16)
- output_channel_count: Optional[int] = Field(default=1)
+ output_sample_rate: int | None = Field(default=24000)
+ output_sample_size: int | None = Field(default=16)
+ output_channel_count: int | None = Field(default=1)
@dataclass
@@ -248,15 +248,15 @@ class AWSNovaSonicLLMService(LLMService):
*,
secret_access_key: str,
access_key_id: str,
- session_token: Optional[str] = None,
+ session_token: str | None = None,
region: str,
model: str = "amazon.nova-2-sonic-v1:0",
voice_id: str = "matthew",
- params: Optional[Params] = None,
- audio_config: Optional[AudioConfig] = None,
- settings: Optional[Settings] = None,
- system_instruction: Optional[str] = None,
- tools: Optional[ToolsSchema] = None,
+ params: Params | None = None,
+ audio_config: AudioConfig | None = None,
+ settings: Settings | None = None,
+ system_instruction: str | None = None,
+ tools: ToolsSchema | None = None,
**kwargs,
):
"""Initializes the AWS Nova Sonic LLM service.
@@ -363,7 +363,7 @@ class AWSNovaSonicLLMService(LLMService):
self._access_key_id = access_key_id
self._session_token = session_token
self._region = region
- self._client: Optional[BedrockRuntimeClient] = None
+ self._client: BedrockRuntimeClient | None = None
# Audio I/O config (hardware settings, not runtime-tunable)
# Priority: audio_config > params (deprecated) > defaults
@@ -383,29 +383,30 @@ class AWSNovaSonicLLMService(LLMService):
)
self._settings.endpointing_sensitivity = None
- self._context: Optional[LLMContext] = None
- self._stream: Optional[
+ self._context: LLMContext | None = None
+ self._stream: (
DuplexEventStream[
InvokeModelWithBidirectionalStreamInput,
InvokeModelWithBidirectionalStreamOutput,
InvokeModelWithBidirectionalStreamOperationOutput,
]
- ] = None
- self._receive_task: Optional[asyncio.Task] = None
- self._prompt_name: Optional[str] = None
- self._input_audio_content_name: Optional[str] = None
- self._content_being_received: Optional[CurrentContent] = None
+ | None
+ ) = None
+ self._receive_task: asyncio.Task | None = None
+ self._prompt_name: str | None = None
+ self._input_audio_content_name: str | None = None
+ self._content_being_received: CurrentContent | None = None
self._assistant_is_responding = False
self._ready_to_send_context = False
self._triggering_assistant_response = False
self._waiting_for_trigger_transcription = False
self._disconnecting = False
- self._connected_time: Optional[float] = None
+ self._connected_time: float | None = None
self._wants_connection = False
self._user_text_buffer = ""
self._completed_tool_calls = set()
self._audio_input_started = False
- self._pending_speculative_text: Optional[str] = None
+ self._pending_speculative_text: str | None = None
file_path = files("pipecat.services.aws.nova_sonic").joinpath("ready.wav")
with wave.open(file_path.open("rb"), "rb") as wav_file:
@@ -762,7 +763,7 @@ class AWSNovaSonicLLMService(LLMService):
"""
await self._send_client_event(session_start)
- async def _send_prompt_start_event(self, tools: List[Any]):
+ async def _send_prompt_start_event(self, tools: list[Any]):
if not self._prompt_name:
return
diff --git a/src/pipecat/services/aws/sagemaker/bidi_client.py b/src/pipecat/services/aws/sagemaker/bidi_client.py
index 10382acae..8d7bdeaa1 100644
--- a/src/pipecat/services/aws/sagemaker/bidi_client.py
+++ b/src/pipecat/services/aws/sagemaker/bidi_client.py
@@ -12,7 +12,6 @@ and JSON data to SageMaker model endpoints and receiving streaming responses.
"""
import os
-from typing import Optional
from loguru import logger
@@ -80,10 +79,10 @@ class SageMakerBidiClient:
self.model_invocation_path = model_invocation_path
self.model_query_string = model_query_string
self.bidi_endpoint = f"https://runtime.sagemaker.{region}.amazonaws.com:8443"
- self._client: Optional[SageMakerRuntimeHTTP2Client] = None
- self._stream: Optional[
- DuplexEventStream[RequestStreamEventPayloadPart, ResponseStreamEvent, any]
- ] = None
+ self._client: SageMakerRuntimeHTTP2Client | None = None
+ self._stream: (
+ DuplexEventStream[RequestStreamEventPayloadPart, ResponseStreamEvent, any] | None
+ ) = None
self._output_stream = None
self._is_active = False
@@ -161,7 +160,7 @@ class SageMakerBidiClient:
self._is_active = False
raise RuntimeError(f"Failed to start SageMaker BiDi session: {e}")
- async def send_data(self, data_bytes: bytes, data_type: Optional[str] = None):
+ async def send_data(self, data_bytes: bytes, data_type: str | None = None):
"""Send a chunk of data to the stream.
Generic method for sending any type of data to the SageMaker endpoint.
@@ -232,7 +231,7 @@ class SageMakerBidiClient:
await self.send_data(json.dumps(data).encode("utf-8"), data_type="UTF8")
- async def receive_response(self) -> Optional[ResponseStreamEvent]:
+ async def receive_response(self) -> ResponseStreamEvent | None:
"""Receive a response from the stream.
Blocks until a response is available from the SageMaker endpoint. Returns
diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py
index ace05090d..2c791bc97 100644
--- a/src/pipecat/services/aws/stt.py
+++ b/src/pipecat/services/aws/stt.py
@@ -14,8 +14,9 @@ import json
import os
import random
import string
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
from loguru import logger
@@ -66,14 +67,14 @@ class AWSTranscribeSTTService(WebsocketSTTService):
def __init__(
self,
*,
- api_key: Optional[str] = None,
- aws_access_key_id: Optional[str] = None,
- aws_session_token: Optional[str] = None,
- region: Optional[str] = None,
- sample_rate: Optional[int] = None,
- language: Optional[Language] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = AWS_TRANSCRIBE_TTFS_P99,
+ api_key: str | None = None,
+ aws_access_key_id: str | None = None,
+ aws_session_token: str | None = None,
+ region: str | None = None,
+ sample_rate: int | None = None,
+ language: Language | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = AWS_TRANSCRIBE_TTFS_P99,
**kwargs,
):
"""Initialize the AWS Transcribe STT service.
@@ -496,7 +497,7 @@ class AWSTranscribeSTTService(WebsocketSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[str] = None
+ self, transcript: str, is_final: bool, language: str | None = None
):
pass
diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py
index f46539b9b..93919c860 100644
--- a/src/pipecat/services/aws/tts.py
+++ b/src/pipecat/services/aws/tts.py
@@ -11,8 +11,8 @@ supporting multiple languages, voices, and SSML features.
"""
import os
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import AsyncGenerator, List, Optional
from loguru import logger
from pydantic import BaseModel
@@ -37,7 +37,7 @@ except ModuleNotFoundError as e:
raise Exception(f"Missing module: {e}")
-def language_to_aws_language(language: Language) -> Optional[str]:
+def language_to_aws_language(language: Language) -> str | None:
"""Convert a Language enum to AWS Polly language code.
Args:
@@ -137,7 +137,7 @@ class AWSPollyTTSSettings(TTSSettings):
pitch: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
rate: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
volume: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
- lexicon_names: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+ lexicon_names: list[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
class AWSPollyTTSService(TTSService):
@@ -166,24 +166,24 @@ class AWSPollyTTSService(TTSService):
lexicon_names: List of pronunciation lexicons to apply.
"""
- engine: Optional[str] = None
- language: Optional[Language] = Language.EN
- pitch: Optional[str] = None
- rate: Optional[str] = None
- volume: Optional[str] = None
- lexicon_names: Optional[List[str]] = None
+ engine: str | None = None
+ language: Language | None = Language.EN
+ pitch: str | None = None
+ rate: str | None = None
+ volume: str | None = None
+ lexicon_names: list[str] | None = None
def __init__(
self,
*,
- api_key: Optional[str] = None,
- aws_access_key_id: Optional[str] = None,
- aws_session_token: Optional[str] = None,
- region: Optional[str] = None,
- voice_id: Optional[str] = None,
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ api_key: str | None = None,
+ aws_access_key_id: str | None = None,
+ aws_session_token: str | None = None,
+ region: str | None = None,
+ voice_id: str | None = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initializes the AWS Polly TTS service.
@@ -268,7 +268,7 @@ class AWSPollyTTSService(TTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to AWS Polly language format.
Args:
diff --git a/src/pipecat/services/aws/utils.py b/src/pipecat/services/aws/utils.py
index 4a4fc6db5..2b69cf035 100644
--- a/src/pipecat/services/aws/utils.py
+++ b/src/pipecat/services/aws/utils.py
@@ -17,21 +17,20 @@ import hmac
import json
import struct
import urllib.parse
-from typing import Dict, Optional
def get_presigned_url(
*,
region: str,
- credentials: Dict[str, Optional[str]],
+ credentials: dict[str, str | None],
language_code: str,
media_encoding: str = "pcm",
sample_rate: int = 16000,
number_of_channels: int = 1,
enable_partial_results_stabilization: bool = True,
partial_results_stability: str = "high",
- vocabulary_name: Optional[str] = None,
- vocabulary_filter_name: Optional[str] = None,
+ vocabulary_name: str | None = None,
+ vocabulary_filter_name: str | None = None,
show_speaker_label: bool = False,
enable_channel_identification: bool = False,
) -> str:
@@ -199,7 +198,7 @@ class AWSTranscribePresignedURL:
self.canonical_querystring += "&vocabulary-name=" + vocabulary_name
# Create payload hash
- self.payload_hash = hashlib.sha256("".encode("utf-8")).hexdigest()
+ self.payload_hash = hashlib.sha256(b"").hexdigest()
# Create canonical request
self.canonical_request = f"{self.method}\n{self.canonical_uri}\n{self.canonical_querystring}\n{self.canonical_headers}\n{self.signed_headers}\n{self.payload_hash}"
@@ -213,7 +212,7 @@ class AWSTranscribePresignedURL:
# Calculate signature
k_date = hmac.new(
- f"AWS4{self.secret_key}".encode("utf-8"), self.datestamp.encode("utf-8"), hashlib.sha256
+ f"AWS4{self.secret_key}".encode(), self.datestamp.encode("utf-8"), hashlib.sha256
).digest()
k_region = hmac.new(k_date, self.region.encode("utf-8"), hashlib.sha256).digest()
k_service = hmac.new(k_region, self.service.encode("utf-8"), hashlib.sha256).digest()
diff --git a/src/pipecat/services/azure/common.py b/src/pipecat/services/azure/common.py
index dc7aaa359..8bb48cd04 100644
--- a/src/pipecat/services/azure/common.py
+++ b/src/pipecat/services/azure/common.py
@@ -6,12 +6,10 @@
"""Language conversion utilities for Azure services."""
-from typing import Optional
-
from pipecat.transcriptions.language import Language, resolve_language
-def language_to_azure_language(language: Language) -> Optional[str]:
+def language_to_azure_language(language: Language) -> str | None:
"""Convert a Language enum to Azure language code.
Args:
diff --git a/src/pipecat/services/azure/image.py b/src/pipecat/services/azure/image.py
index fc50d710a..b7fa732a3 100644
--- a/src/pipecat/services/azure/image.py
+++ b/src/pipecat/services/azure/image.py
@@ -12,8 +12,8 @@ using REST endpoints for creating images from text prompts.
import asyncio
import io
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import AsyncGenerator, Optional
import aiohttp
from PIL import Image
@@ -49,13 +49,13 @@ class AzureImageGenServiceREST(ImageGenService):
def __init__(
self,
*,
- image_size: Optional[str] = None,
+ image_size: str | None = None,
api_key: str,
endpoint: str,
- model: Optional[str] = None,
+ model: str | None = None,
aiohttp_session: aiohttp.ClientSession,
api_version="2023-06-01-preview",
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
):
"""Initialize the AzureImageGenServiceREST.
diff --git a/src/pipecat/services/azure/llm.py b/src/pipecat/services/azure/llm.py
index 8b5050e5b..f542dfe86 100644
--- a/src/pipecat/services/azure/llm.py
+++ b/src/pipecat/services/azure/llm.py
@@ -7,7 +7,6 @@
"""Azure OpenAI service implementation for the Pipecat AI framework."""
from dataclasses import dataclass
-from typing import Optional
from loguru import logger
from openai import AsyncAzureOpenAI
@@ -37,9 +36,9 @@ class AzureLLMService(OpenAILLMService):
*,
api_key: str,
endpoint: str,
- model: Optional[str] = None,
+ model: str | None = None,
api_version: str = "2024-09-01-preview",
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Azure LLM service.
diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py
index 57306e06a..9b2247793 100644
--- a/src/pipecat/services/azure/stt.py
+++ b/src/pipecat/services/azure/stt.py
@@ -11,8 +11,9 @@ Speech SDK for real-time audio transcription.
"""
import asyncio
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
from loguru import logger
@@ -73,13 +74,13 @@ class AzureSTTService(STTService):
self,
*,
api_key: str,
- region: Optional[str] = None,
- language: Optional[Language] = Language.EN_US,
- sample_rate: Optional[int] = None,
- private_endpoint: Optional[str] = None,
- endpoint_id: Optional[str] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = AZURE_TTFS_P99,
+ region: str | None = None,
+ language: Language | None = Language.EN_US,
+ sample_rate: int | None = None,
+ private_endpoint: str | None = None,
+ endpoint_id: str | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = AZURE_TTFS_P99,
**kwargs,
):
"""Initialize the Azure STT service.
@@ -165,7 +166,7 @@ class AzureSTTService(STTService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Azure service-specific language code.
Args:
@@ -272,7 +273,7 @@ class AzureSTTService(STTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
await self.stop_processing_metrics()
diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py
index 79dc8a2e1..dd54a1b41 100644
--- a/src/pipecat/services/azure/tts.py
+++ b/src/pipecat/services/azure/tts.py
@@ -7,8 +7,8 @@
"""Azure Cognitive Services Text-to-Speech service implementations."""
import asyncio
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import AsyncGenerator, Optional
from loguru import logger
from pydantic import BaseModel
@@ -127,14 +127,14 @@ class AzureBaseTTSService:
volume: Volume level (e.g., "+20%", "loud", "x-soft").
"""
- emphasis: Optional[str] = None
- language: Optional[Language] = Language.EN_US
- pitch: Optional[str] = None
- rate: Optional[str] = None
- role: Optional[str] = None
- style: Optional[str] = None
- style_degree: Optional[str] = None
- volume: Optional[str] = None
+ emphasis: str | None = None
+ language: Language | None = Language.EN_US
+ pitch: str | None = None
+ rate: str | None = None
+ role: str | None = None
+ style: str | None = None
+ style_degree: str | None = None
+ volume: str | None = None
def _init_azure_base(
self,
@@ -154,7 +154,7 @@ class AzureBaseTTSService:
self._region = region
self._speech_synthesizer = None
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Azure language format.
Args:
@@ -254,12 +254,12 @@ class AzureTTSService(TTSService, AzureBaseTTSService):
*,
api_key: str,
region: str,
- voice: Optional[str] = None,
- sample_rate: Optional[int] = None,
- params: Optional[AzureBaseTTSService.InputParams] = None,
- settings: Optional[Settings] = None,
- aggregate_sentences: Optional[bool] = None,
- text_aggregation_mode: Optional[TextAggregationMode] = None,
+ voice: str | None = None,
+ sample_rate: int | None = None,
+ params: AzureBaseTTSService.InputParams | None = None,
+ settings: Settings | None = None,
+ aggregate_sentences: bool | None = None,
+ text_aggregation_mode: TextAggregationMode | None = None,
**kwargs,
):
"""Initialize the Azure streaming TTS service.
@@ -350,11 +350,9 @@ class AzureTTSService(TTSService, AzureBaseTTSService):
self._current_sentence_max_word_offset: float = (
0.0 # Max word boundary offset seen in current sentence (for 8kHz workaround)
)
- self._last_word: Optional[str] = None # Track last word for punctuation merging
- self._last_timestamp: Optional[float] = None # Track last timestamp
- self._current_context_id: Optional[str] = (
- None # Track current context_id for word timestamps
- )
+ self._last_word: str | None = None # Track last word for punctuation merging
+ self._last_timestamp: float | None = None # Track last timestamp
+ self._current_context_id: str | None = None # Track current context_id for word timestamps
def can_generate_metrics(self) -> bool:
"""Check if this service can generate processing metrics.
@@ -622,7 +620,7 @@ class AzureTTSService(TTSService, AzureBaseTTSService):
self._last_timestamp = None
self._current_context_id = None
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio data."""
logger.trace(f"{self}: flushing audio")
@@ -753,10 +751,10 @@ class AzureHttpTTSService(TTSService, AzureBaseTTSService):
*,
api_key: str,
region: str,
- voice: Optional[str] = None,
- sample_rate: Optional[int] = None,
- params: Optional[AzureBaseTTSService.InputParams] = None,
- settings: Optional[Settings] = None,
+ voice: str | None = None,
+ sample_rate: int | None = None,
+ params: AzureBaseTTSService.InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Azure HTTP TTS service.
diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py
index b6b83a928..f2bc094b9 100644
--- a/src/pipecat/services/camb/tts.py
+++ b/src/pipecat/services/camb/tts.py
@@ -16,8 +16,9 @@ Features:
- Model-specific sample rates: mars-pro (48kHz), mars-flash (22.05kHz)
"""
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Dict, Optional
+from typing import Any
from camb import StreamTtsOutputConfiguration
from camb.client import AsyncCambAI
@@ -36,14 +37,14 @@ from pipecat.transcriptions.language import Language, resolve_language
from pipecat.utils.tracing.service_decorators import traced_tts
# Model-specific sample rates
-MODEL_SAMPLE_RATES: Dict[str, int] = {
+MODEL_SAMPLE_RATES: dict[str, int] = {
"mars-flash": 22050, # 22.05kHz
"mars-pro": 48000, # 48kHz
"mars-instruct": 22050, # 22.05kHz
}
-def language_to_camb_language(language: Language) -> Optional[str]:
+def language_to_camb_language(language: Language) -> str | None:
"""Convert a Pipecat Language enum to Camb.ai language code.
Args:
@@ -193,8 +194,8 @@ class CambTTSService(TTSService):
Ignored for other models. Max 1000 characters.
"""
- language: Optional[Language] = Language.EN
- user_instructions: Optional[str] = Field(
+ language: Language | None = Language.EN
+ user_instructions: str | None = Field(
default=None,
max_length=1000,
description="Custom instructions for mars-instruct model only. "
@@ -205,12 +206,12 @@ class CambTTSService(TTSService):
self,
*,
api_key: str,
- voice_id: Optional[int] = None,
- model: Optional[str] = None,
+ voice_id: int | None = None,
+ model: str | None = None,
timeout: float = 60.0,
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Camb.ai TTS service.
@@ -297,7 +298,7 @@ class CambTTSService(TTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Camb.ai language format.
Args:
@@ -342,7 +343,7 @@ class CambTTSService(TTSService):
try:
# Build SDK parameters
- tts_kwargs: Dict[str, Any] = {
+ tts_kwargs: dict[str, Any] = {
"text": text,
"voice_id": self._settings.voice,
"language": self._settings.language,
diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py
index 2606e0d6d..8e66eb965 100644
--- a/src/pipecat/services/cartesia/stt.py
+++ b/src/pipecat/services/cartesia/stt.py
@@ -12,8 +12,9 @@ the Cartesia Live transcription API for real-time speech recognition.
import json
import urllib.parse
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
from loguru import logger
@@ -155,10 +156,10 @@ class CartesiaSTTService(WebsocketSTTService):
api_key: str,
base_url: str = "",
encoding: str = "pcm_s16le",
- sample_rate: Optional[int] = None,
- live_options: Optional[CartesiaLiveOptions] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = CARTESIA_TTFS_P99,
+ sample_rate: int | None = None,
+ live_options: CartesiaLiveOptions | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = CARTESIA_TTFS_P99,
**kwargs,
):
"""Initialize CartesiaSTTService with API key and options.
@@ -389,7 +390,7 @@ class CartesiaSTTService(WebsocketSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
pass
diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py
index e8e033ee9..222939104 100644
--- a/src/pipecat/services/cartesia/tts.py
+++ b/src/pipecat/services/cartesia/tts.py
@@ -8,9 +8,10 @@
import base64
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, AsyncGenerator, List, Optional
+from enum import StrEnum
+from typing import Any
import aiohttp
from loguru import logger
@@ -56,12 +57,12 @@ class GenerationConfig(BaseModel):
and Marian.
"""
- volume: Optional[float] = None
- speed: Optional[float] = None
- emotion: Optional[str] = None
+ volume: float | None = None
+ speed: float | None = None
+ emotion: str | None = None
-def language_to_cartesia_language(language: Language) -> Optional[str]:
+def language_to_cartesia_language(language: Language) -> str | None:
"""Convert a Language enum to Cartesia language code.
Args:
@@ -118,7 +119,7 @@ def language_to_cartesia_language(language: Language) -> Optional[str]:
return resolve_language(language, LANGUAGE_MAP, use_base_code=True)
-class CartesiaEmotion(str, Enum):
+class CartesiaEmotion(StrEnum):
"""Predefined Emotions supported by Cartesia."""
# Primary emotions supported by Cartesia
@@ -222,25 +223,25 @@ class CartesiaTTSService(WebsocketTTSService):
pronunciation_dict_id: The ID of the pronunciation dictionary to use for custom pronunciations.
"""
- language: Optional[Language] = Language.EN
- generation_config: Optional[GenerationConfig] = None
- pronunciation_dict_id: Optional[str] = None
+ language: Language | None = Language.EN
+ generation_config: GenerationConfig | None = None
+ pronunciation_dict_id: str | None = None
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
cartesia_version: str = "2025-04-16",
url: str = "wss://api.cartesia.ai/tts/websocket",
- model: Optional[str] = None,
- sample_rate: Optional[int] = None,
+ model: str | None = None,
+ sample_rate: int | None = None,
encoding: str = "pcm_s16le",
container: str = "raw",
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- text_aggregation_mode: Optional[TextAggregationMode] = None,
- aggregate_sentences: Optional[bool] = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ text_aggregation_mode: TextAggregationMode | None = None,
+ aggregate_sentences: bool | None = None,
**kwargs,
):
"""Initialize the Cartesia TTS service.
@@ -362,7 +363,7 @@ class CartesiaTTSService(WebsocketTTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Cartesia language format.
Args:
@@ -408,8 +409,8 @@ class CartesiaTTSService(WebsocketTTSService):
return base_lang in cjk_languages
def _process_word_timestamps_for_language(
- self, words: List[str], starts: List[float]
- ) -> List[tuple[str, float]]:
+ self, words: list[str], starts: list[float]
+ ) -> list[tuple[str, float]]:
"""Process word timestamps based on the current language.
For CJK languages, Cartesia groups related characters in the same timestamp message.
@@ -576,7 +577,7 @@ class CartesiaTTSService(WebsocketTTSService):
"""
await super().on_audio_context_completed(context_id)
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio and finalize the current context.
Args:
@@ -715,24 +716,24 @@ class CartesiaHttpTTSService(TTSService):
pronunciation_dict_id: The ID of the pronunciation dictionary to use for custom pronunciations.
"""
- language: Optional[Language] = Language.EN
- generation_config: Optional[GenerationConfig] = None
- pronunciation_dict_id: Optional[str] = None
+ language: Language | None = Language.EN
+ generation_config: GenerationConfig | None = None
+ pronunciation_dict_id: str | None = None
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
- model: Optional[str] = None,
+ voice_id: str | None = None,
+ model: str | None = None,
base_url: str = "https://api.cartesia.ai",
cartesia_version: str = "2026-03-01",
- aiohttp_session: Optional[aiohttp.ClientSession] = None,
- sample_rate: Optional[int] = None,
+ aiohttp_session: aiohttp.ClientSession | None = None,
+ sample_rate: int | None = None,
encoding: str = "pcm_s16le",
container: str = "raw",
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Cartesia HTTP TTS service.
@@ -825,7 +826,7 @@ class CartesiaHttpTTSService(TTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Cartesia language format.
Args:
diff --git a/src/pipecat/services/cerebras/llm.py b/src/pipecat/services/cerebras/llm.py
index 5b883ecd3..0476cc120 100644
--- a/src/pipecat/services/cerebras/llm.py
+++ b/src/pipecat/services/cerebras/llm.py
@@ -7,7 +7,6 @@
"""Cerebras LLM service implementation using OpenAI-compatible interface."""
from dataclasses import dataclass
-from typing import Optional
from loguru import logger
@@ -42,8 +41,8 @@ class CerebrasLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://api.cerebras.ai/v1",
- model: Optional[str] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Cerebras LLM service.
diff --git a/src/pipecat/services/deepgram/flux/base.py b/src/pipecat/services/deepgram/flux/base.py
index cc58ad477..baefbd060 100644
--- a/src/pipecat/services/deepgram/flux/base.py
+++ b/src/pipecat/services/deepgram/flux/base.py
@@ -10,8 +10,8 @@ import asyncio
import time
from abc import abstractmethod
from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, Dict, Optional
+from enum import StrEnum
+from typing import Any
from urllib.parse import urlencode
from loguru import logger
@@ -32,7 +32,7 @@ from pipecat.utils.time import time_now_iso8601
from pipecat.utils.tracing.service_decorators import traced_stt
-class FluxMessageType(str, Enum):
+class FluxMessageType(StrEnum):
"""Deepgram Flux WebSocket message types.
These are the top-level message types that can be received from the
@@ -46,7 +46,7 @@ class FluxMessageType(str, Enum):
CONFIGURE_FAILURE = "ConfigureFailure"
-class FluxEventType(str, Enum):
+class FluxEventType(StrEnum):
"""Deepgram Flux TurnInfo event types.
These events are contained within TurnInfo messages and indicate
@@ -99,8 +99,8 @@ class DeepgramFluxSTTBase(STTService):
self,
*,
encoding: str = "linear16",
- mip_opt_out: Optional[bool] = None,
- tag: Optional[list] = None,
+ mip_opt_out: bool | None = None,
+ tag: list | None = None,
should_interrupt: bool = True,
settings: Settings,
**kwargs,
@@ -128,8 +128,8 @@ class DeepgramFluxSTTBase(STTService):
self._connection_established_event = asyncio.Event()
# Watchdog state — see _watchdog_task_handler for details
- self._last_stt_time: Optional[float] = None
- self._watchdog_task: Optional[asyncio.Task] = None
+ self._last_stt_time: float | None = None
+ self._watchdog_task: asyncio.Task | None = None
self._user_is_speaking = False
# Flux event handlers
@@ -340,7 +340,7 @@ class DeepgramFluxSTTBase(STTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
pass
@@ -349,7 +349,7 @@ class DeepgramFluxSTTBase(STTService):
# Message handling
# ------------------------------------------------------------------
- def _validate_message(self, data: Dict[str, Any]) -> bool:
+ def _validate_message(self, data: dict[str, Any]) -> bool:
"""Validate basic message structure from Deepgram Flux.
Ensures the received message has the expected structure before processing.
@@ -370,7 +370,7 @@ class DeepgramFluxSTTBase(STTService):
return True
- async def _handle_message(self, data: Dict[str, Any]):
+ async def _handle_message(self, data: dict[str, Any]):
"""Handle a parsed message from Deepgram Flux.
Routes messages to appropriate handlers based on their type. Validates
@@ -416,7 +416,7 @@ class DeepgramFluxSTTBase(STTService):
# Notify connection is established
self._connection_established_event.set()
- async def _handle_fatal_error(self, data: Dict[str, Any]):
+ async def _handle_fatal_error(self, data: dict[str, Any]):
"""Handle fatal error messages from Deepgram Flux.
Fatal errors indicate unrecoverable issues with the connection or
@@ -435,7 +435,7 @@ class DeepgramFluxSTTBase(STTService):
# Error will be handled by the transport's receive loop error handler
raise Exception(deepgram_error)
- async def _handle_turn_info(self, data: Dict[str, Any]):
+ async def _handle_turn_info(self, data: dict[str, Any]):
"""Handle TurnInfo events from Deepgram Flux.
TurnInfo messages contain various turn-based events that indicate
@@ -504,7 +504,7 @@ class DeepgramFluxSTTBase(STTService):
logger.trace(f"Received event TurnResumed: {event}")
await self._call_event_handler("on_turn_resumed")
- def _calculate_average_confidence(self, transcript_data) -> Optional[float]:
+ def _calculate_average_confidence(self, transcript_data) -> float | None:
"""Calculate the average confidence from transcript data.
Return None if the data is missing or invalid.
@@ -520,7 +520,7 @@ class DeepgramFluxSTTBase(STTService):
return None
return sum(confidences) / len(confidences)
- async def _handle_end_of_turn(self, transcript: str, data: Dict[str, Any]):
+ async def _handle_end_of_turn(self, transcript: str, data: dict[str, Any]):
"""Handle EndOfTurn events from Deepgram Flux.
EndOfTurn events are fired when Deepgram Flux determines that a speaking
@@ -567,7 +567,7 @@ class DeepgramFluxSTTBase(STTService):
await self.broadcast_frame(UserStoppedSpeakingFrame)
await self._call_event_handler("on_end_of_turn", transcript)
- async def _handle_eager_end_of_turn(self, transcript: str, data: Dict[str, Any]):
+ async def _handle_eager_end_of_turn(self, transcript: str, data: dict[str, Any]):
"""Handle EagerEndOfTurn events from Deepgram Flux.
EagerEndOfTurn events are fired when the end-of-turn confidence reaches the
diff --git a/src/pipecat/services/deepgram/flux/sagemaker/stt.py b/src/pipecat/services/deepgram/flux/sagemaker/stt.py
index ffff24d04..da61b169a 100644
--- a/src/pipecat/services/deepgram/flux/sagemaker/stt.py
+++ b/src/pipecat/services/deepgram/flux/sagemaker/stt.py
@@ -9,8 +9,8 @@
import asyncio
import json
import time
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import AsyncGenerator, Optional
from loguru import logger
@@ -86,11 +86,11 @@ class DeepgramFluxSageMakerSTTService(DeepgramFluxSTTBase):
endpoint_name: str,
region: str,
encoding: str = "linear16",
- sample_rate: Optional[int] = None,
- mip_opt_out: Optional[bool] = None,
- tag: Optional[list] = None,
+ sample_rate: int | None = None,
+ mip_opt_out: bool | None = None,
+ tag: list | None = None,
should_interrupt: bool = True,
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Deepgram Flux SageMaker STT service.
@@ -137,8 +137,8 @@ class DeepgramFluxSageMakerSTTService(DeepgramFluxSTTBase):
self._endpoint_name = endpoint_name
self._region = region
- self._client: Optional[SageMakerBidiClient] = None
- self._response_task: Optional[asyncio.Task] = None
+ self._client: SageMakerBidiClient | None = None
+ self._response_task: asyncio.Task | None = None
# ------------------------------------------------------------------
# Transport interface implementation
diff --git a/src/pipecat/services/deepgram/flux/stt.py b/src/pipecat/services/deepgram/flux/stt.py
index 32854f294..5b0b16472 100644
--- a/src/pipecat/services/deepgram/flux/stt.py
+++ b/src/pipecat/services/deepgram/flux/stt.py
@@ -8,7 +8,7 @@
import json
import time
-from typing import AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
from loguru import logger
from pydantic import BaseModel
@@ -90,27 +90,27 @@ class DeepgramFluxSTTService(DeepgramFluxSTTBase, WebsocketService):
min_confidence: Optional. Minimum confidence required confidence to create a TranscriptionFrame
"""
- eager_eot_threshold: Optional[float] = None
- eot_threshold: Optional[float] = None
- eot_timeout_ms: Optional[int] = None
+ eager_eot_threshold: float | None = None
+ eot_threshold: float | None = None
+ eot_timeout_ms: int | None = None
keyterm: list = []
- mip_opt_out: Optional[bool] = None
+ mip_opt_out: bool | None = None
tag: list = []
- min_confidence: Optional[float] = None # New parameter
+ min_confidence: float | None = None # New parameter
def __init__(
self,
*,
api_key: str,
url: str = "wss://api.deepgram.com/v2/listen",
- sample_rate: Optional[int] = None,
- mip_opt_out: Optional[bool] = None,
- model: Optional[str] = None,
+ sample_rate: int | None = None,
+ mip_opt_out: bool | None = None,
+ model: str | None = None,
flux_encoding: str = "linear16",
- tag: Optional[list] = None,
- params: Optional[InputParams] = None,
+ tag: list | None = None,
+ params: InputParams | None = None,
should_interrupt: bool = True,
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Deepgram Flux STT service.
diff --git a/src/pipecat/services/deepgram/sagemaker/stt.py b/src/pipecat/services/deepgram/sagemaker/stt.py
index 1087b124f..c837dc30b 100644
--- a/src/pipecat/services/deepgram/sagemaker/stt.py
+++ b/src/pipecat/services/deepgram/sagemaker/stt.py
@@ -14,8 +14,9 @@ languages, and various Deepgram features.
import asyncio
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, fields
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
from loguru import logger
@@ -89,11 +90,11 @@ class DeepgramSageMakerSTTService(STTService):
encoding: str = "linear16",
channels: int = 1,
multichannel: bool = False,
- sample_rate: Optional[int] = None,
- mip_opt_out: Optional[bool] = None,
- live_options: Optional[LiveOptions] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = DEEPGRAM_SAGEMAKER_TTFS_P99,
+ sample_rate: int | None = None,
+ mip_opt_out: bool | None = None,
+ live_options: LiveOptions | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = DEEPGRAM_SAGEMAKER_TTFS_P99,
**kwargs,
):
"""Initialize the Deepgram SageMaker STT service.
@@ -196,9 +197,9 @@ class DeepgramSageMakerSTTService(STTService):
self._multichannel = multichannel
self._mip_opt_out = mip_opt_out
- self._client: Optional[SageMakerBidiClient] = None
- self._response_task: Optional[asyncio.Task] = None
- self._keepalive_task: Optional[asyncio.Task] = None
+ self._client: SageMakerBidiClient | None = None
+ self._response_task: asyncio.Task | None = None
+ self._keepalive_task: asyncio.Task | None = None
def can_generate_metrics(self) -> bool:
"""Check if this service can generate processing metrics.
@@ -484,7 +485,7 @@ class DeepgramSageMakerSTTService(STTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing.
diff --git a/src/pipecat/services/deepgram/sagemaker/tts.py b/src/pipecat/services/deepgram/sagemaker/tts.py
index 6f43475dc..5585992b1 100644
--- a/src/pipecat/services/deepgram/sagemaker/tts.py
+++ b/src/pipecat/services/deepgram/sagemaker/tts.py
@@ -14,8 +14,9 @@ streaming audio output.
import asyncio
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
from loguru import logger
@@ -72,10 +73,10 @@ class DeepgramSageMakerTTSService(TTSService):
*,
endpoint_name: str,
region: str,
- voice: Optional[str] = None,
- sample_rate: Optional[int] = None,
+ voice: str | None = None,
+ sample_rate: int | None = None,
encoding: str = "linear16",
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Deepgram SageMaker TTS service.
@@ -122,8 +123,8 @@ class DeepgramSageMakerTTSService(TTSService):
self._region = region
self._encoding = encoding
- self._client: Optional[SageMakerBidiClient] = None
- self._response_task: Optional[asyncio.Task] = None
+ self._client: SageMakerBidiClient | None = None
+ self._response_task: asyncio.Task | None = None
def can_generate_metrics(self) -> bool:
"""Check if this service can generate processing metrics.
@@ -311,7 +312,7 @@ class DeepgramSageMakerTTSService(TTSService):
logger.error(f"{self} error sending Clear message: {e}")
await super().on_audio_context_interrupted(context_id)
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio synthesis by sending Flush command.
This should be called when the LLM finishes a complete response to force
diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py
index 3a7669707..66b1d70da 100644
--- a/src/pipecat/services/deepgram/stt.py
+++ b/src/pipecat/services/deepgram/stt.py
@@ -7,8 +7,9 @@
"""Deepgram speech-to-text service implementation."""
import asyncio
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field, fields
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
from loguru import logger
@@ -64,33 +65,33 @@ class LiveOptions:
def __init__(
self,
*,
- callback: Optional[str] = None,
- callback_method: Optional[str] = None,
- channels: Optional[int] = None,
- detect_entities: Optional[bool] = None,
- diarize: Optional[bool] = None,
- dictation: Optional[bool] = None,
- encoding: Optional[str] = None,
- endpointing: Optional[Any] = None,
- extra: Optional[Any] = None,
- interim_results: Optional[bool] = None,
- keyterm: Optional[Any] = None,
- keywords: Optional[Any] = None,
- language: Optional[str] = None,
- mip_opt_out: Optional[bool] = None,
- model: Optional[str] = None,
- multichannel: Optional[bool] = None,
- numerals: Optional[bool] = None,
- profanity_filter: Optional[bool] = None,
- punctuate: Optional[bool] = None,
- redact: Optional[Any] = None,
- replace: Optional[Any] = None,
- sample_rate: Optional[int] = None,
- search: Optional[Any] = None,
- smart_format: Optional[bool] = None,
- tag: Optional[Any] = None,
- utterance_end_ms: Optional[int] = None,
- version: Optional[str] = None,
+ callback: str | None = None,
+ callback_method: str | None = None,
+ channels: int | None = None,
+ detect_entities: bool | None = None,
+ diarize: bool | None = None,
+ dictation: bool | None = None,
+ encoding: str | None = None,
+ endpointing: Any | None = None,
+ extra: Any | None = None,
+ interim_results: bool | None = None,
+ keyterm: Any | None = None,
+ keywords: Any | None = None,
+ language: str | None = None,
+ mip_opt_out: bool | None = None,
+ model: str | None = None,
+ multichannel: bool | None = None,
+ numerals: bool | None = None,
+ profanity_filter: bool | None = None,
+ punctuate: bool | None = None,
+ redact: Any | None = None,
+ replace: Any | None = None,
+ sample_rate: int | None = None,
+ search: Any | None = None,
+ smart_format: bool | None = None,
+ tag: Any | None = None,
+ utterance_end_ms: int | None = None,
+ version: str | None = None,
**kwargs,
):
"""Initialize live transcription options.
@@ -298,15 +299,15 @@ class DeepgramSTTService(STTService):
encoding: str = "linear16",
channels: int = 1,
multichannel: bool = False,
- sample_rate: Optional[int] = None,
- callback: Optional[str] = None,
- callback_method: Optional[str] = None,
- tag: Optional[Any] = None,
- mip_opt_out: Optional[bool] = None,
- live_options: Optional[LiveOptions] = None,
- addons: Optional[dict] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = DEEPGRAM_TTFS_P99,
+ sample_rate: int | None = None,
+ callback: str | None = None,
+ callback_method: str | None = None,
+ tag: Any | None = None,
+ mip_opt_out: bool | None = None,
+ live_options: LiveOptions | None = None,
+ addons: dict | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = DEEPGRAM_TTFS_P99,
**kwargs,
):
"""Initialize the Deepgram STT service.
@@ -668,7 +669,7 @@ class DeepgramSTTService(STTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
pass
diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py
index 3b5d04202..cc7b88455 100644
--- a/src/pipecat/services/deepgram/tts.py
+++ b/src/pipecat/services/deepgram/tts.py
@@ -11,8 +11,9 @@ for generating speech from text using various voice models.
"""
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
import aiohttp
from loguru import logger
@@ -65,11 +66,11 @@ class DeepgramTTSService(WebsocketTTSService):
self,
*,
api_key: str,
- voice: Optional[str] = None,
+ voice: str | None = None,
base_url: str = "wss://api.deepgram.com",
- sample_rate: Optional[int] = None,
+ sample_rate: int | None = None,
encoding: str = "linear16",
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Deepgram WebSocket TTS service.
@@ -315,7 +316,7 @@ class DeepgramTTSService(WebsocketTTSService):
except json.JSONDecodeError:
logger.error(f"Invalid JSON message: {message}")
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio synthesis by sending Flush command.
This should be called when the LLM finishes a complete response to force
@@ -374,12 +375,12 @@ class DeepgramHttpTTSService(TTSService):
self,
*,
api_key: str,
- voice: Optional[str] = None,
+ voice: str | None = None,
aiohttp_session: aiohttp.ClientSession,
base_url: str = "https://api.deepgram.com",
- sample_rate: Optional[int] = None,
+ sample_rate: int | None = None,
encoding: str = "linear16",
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Deepgram TTS service.
diff --git a/src/pipecat/services/deepseek/llm.py b/src/pipecat/services/deepseek/llm.py
index 177a87e63..7485168f9 100644
--- a/src/pipecat/services/deepseek/llm.py
+++ b/src/pipecat/services/deepseek/llm.py
@@ -7,7 +7,6 @@
"""DeepSeek LLM service implementation using OpenAI-compatible interface."""
from dataclasses import dataclass
-from typing import Optional
from loguru import logger
@@ -42,8 +41,8 @@ class DeepSeekLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://api.deepseek.com/v1",
- model: Optional[str] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the DeepSeek LLM service.
diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py
index aa7fd0659..ac6c01876 100644
--- a/src/pipecat/services/elevenlabs/stt.py
+++ b/src/pipecat/services/elevenlabs/stt.py
@@ -15,9 +15,10 @@ import asyncio
import base64
import io
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, AsyncGenerator, Optional
+from enum import StrEnum
+from typing import Any
import aiohttp
from loguru import logger
@@ -53,7 +54,7 @@ except ModuleNotFoundError as e:
raise Exception(f"Missing module: {e}")
-def language_to_elevenlabs_language(language: Language) -> Optional[str]:
+def language_to_elevenlabs_language(language: Language) -> str | None:
"""Convert a Language enum to ElevenLabs language code.
Source:
@@ -170,7 +171,7 @@ def language_to_elevenlabs_language(language: Language) -> Optional[str]:
return resolve_language(language, LANGUAGE_MAP, use_base_code=False)
-class CommitStrategy(str, Enum):
+class CommitStrategy(StrEnum):
"""Commit strategies for transcript segmentation."""
MANUAL = "manual"
@@ -230,7 +231,7 @@ class ElevenLabsSTTService(SegmentedSTTService):
tag_audio_events: Whether to include audio events like (laughter), (coughing), in the transcription.
"""
- language: Optional[Language] = None
+ language: Language | None = None
tag_audio_events: bool = True
def __init__(
@@ -239,11 +240,11 @@ class ElevenLabsSTTService(SegmentedSTTService):
api_key: str,
aiohttp_session: aiohttp.ClientSession,
base_url: str = "https://api.elevenlabs.io",
- model: Optional[str] = None,
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = ELEVENLABS_TTFS_P99,
+ model: str | None = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = ELEVENLABS_TTFS_P99,
**kwargs,
):
"""Initialize the ElevenLabs STT service.
@@ -312,7 +313,7 @@ class ElevenLabsSTTService(SegmentedSTTService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to ElevenLabs service-specific language code.
Args:
@@ -364,7 +365,7 @@ class ElevenLabsSTTService(SegmentedSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[str] = None
+ self, transcript: str, is_final: bool, language: str | None = None
):
"""Handle a transcription result with tracing."""
await self.stop_processing_metrics()
@@ -474,12 +475,12 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
include_language_detection: Whether to include language detection in transcripts.
"""
- language_code: Optional[str] = None
+ language_code: str | None = None
commit_strategy: CommitStrategy = CommitStrategy.MANUAL
- vad_silence_threshold_secs: Optional[float] = None
- vad_threshold: Optional[float] = None
- min_speech_duration_ms: Optional[int] = None
- min_silence_duration_ms: Optional[int] = None
+ vad_silence_threshold_secs: float | None = None
+ vad_threshold: float | None = None
+ min_speech_duration_ms: int | None = None
+ min_silence_duration_ms: int | None = None
include_timestamps: bool = False
enable_logging: bool = False
include_language_detection: bool = False
@@ -490,14 +491,14 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
api_key: str,
base_url: str = "api.elevenlabs.io",
commit_strategy: CommitStrategy = CommitStrategy.MANUAL,
- model: Optional[str] = None,
- sample_rate: Optional[int] = None,
+ model: str | None = None,
+ sample_rate: int | None = None,
include_timestamps: bool = False,
enable_logging: bool = False,
include_language_detection: bool = False,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = ELEVENLABS_REALTIME_TTFS_P99,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = ELEVENLABS_REALTIME_TTFS_P99,
**kwargs,
):
"""Initialize the ElevenLabs Realtime STT service.
@@ -908,7 +909,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[str] = None
+ self, transcript: str, is_final: bool, language: str | None = None
):
"""Handle a transcription result with tracing."""
pass
diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py
index d2cb3786d..02e6383ff 100644
--- a/src/pipecat/services/elevenlabs/tts.py
+++ b/src/pipecat/services/elevenlabs/tts.py
@@ -13,17 +13,12 @@ with support for streaming audio, word timestamps, and voice customization.
import asyncio
import base64
import json
+from collections.abc import AsyncGenerator, Mapping
from dataclasses import dataclass, field
from typing import (
Any,
- AsyncGenerator,
ClassVar,
- Dict,
- List,
Literal,
- Mapping,
- Optional,
- Tuple,
Union,
)
@@ -74,7 +69,7 @@ ELEVENLABS_MULTILINGUAL_MODELS = {
}
-def language_to_elevenlabs_language(language: Language) -> Optional[str]:
+def language_to_elevenlabs_language(language: Language) -> str | None:
"""Convert a Language enum to ElevenLabs language code.
Args:
@@ -152,8 +147,8 @@ def output_format_from_sample_rate(sample_rate: int) -> str:
def build_elevenlabs_voice_settings(
- settings: Union[Dict[str, Any], "TTSSettings"],
-) -> Optional[Dict[str, Union[float, bool]]]:
+ settings: Union[dict[str, Any], "TTSSettings"],
+) -> dict[str, float | bool] | None:
"""Build voice settings dictionary for ElevenLabs based on provided settings.
Args:
@@ -255,7 +250,7 @@ def calculate_word_times(
cumulative_time: float,
partial_word: str = "",
partial_word_start_time: float = 0.0,
-) -> tuple[List[Tuple[str, float]], str, float]:
+) -> tuple[list[tuple[str, float]], str, float]:
"""Calculate word timestamps from character alignment information.
Args:
@@ -341,34 +336,34 @@ class ElevenLabsTTSService(WebsocketTTSService):
pronunciation_dictionary_locators: List of pronunciation dictionary locators to use.
"""
- language: Optional[Language] = None
- stability: Optional[float] = None
- similarity_boost: Optional[float] = None
- style: Optional[float] = None
- use_speaker_boost: Optional[bool] = None
- speed: Optional[float] = None
- auto_mode: Optional[bool] = True
- enable_ssml_parsing: Optional[bool] = None
- enable_logging: Optional[bool] = None
- apply_text_normalization: Optional[Literal["auto", "on", "off"]] = None
- pronunciation_dictionary_locators: Optional[List[PronunciationDictionaryLocator]] = None
+ language: Language | None = None
+ stability: float | None = None
+ similarity_boost: float | None = None
+ style: float | None = None
+ use_speaker_boost: bool | None = None
+ speed: float | None = None
+ auto_mode: bool | None = True
+ enable_ssml_parsing: bool | None = None
+ enable_logging: bool | None = None
+ apply_text_normalization: Literal["auto", "on", "off"] | None = None
+ pronunciation_dictionary_locators: list[PronunciationDictionaryLocator] | None = None
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
- model: Optional[str] = None,
+ voice_id: str | None = None,
+ model: str | None = None,
url: str = "wss://api.elevenlabs.io",
- sample_rate: Optional[int] = None,
- auto_mode: Optional[bool] = None,
- enable_ssml_parsing: Optional[bool] = None,
- enable_logging: Optional[bool] = None,
- pronunciation_dictionary_locators: Optional[List[PronunciationDictionaryLocator]] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- text_aggregation_mode: Optional[TextAggregationMode] = None,
- aggregate_sentences: Optional[bool] = None,
+ sample_rate: int | None = None,
+ auto_mode: bool | None = None,
+ enable_ssml_parsing: bool | None = None,
+ enable_logging: bool | None = None,
+ pronunciation_dictionary_locators: list[PronunciationDictionaryLocator] | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ text_aggregation_mode: TextAggregationMode | None = None,
+ aggregate_sentences: bool | None = None,
**kwargs,
):
"""Initialize the ElevenLabs TTS service.
@@ -534,7 +529,7 @@ class ElevenLabsTTSService(WebsocketTTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to ElevenLabs language format.
Args:
@@ -625,7 +620,7 @@ class ElevenLabsTTSService(WebsocketTTSService):
await super().cancel(frame)
await self._disconnect()
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio and finalize the current context.
Args:
@@ -935,31 +930,31 @@ class ElevenLabsHttpTTSService(TTSService):
pronunciation_dictionary_locators: List of pronunciation dictionary locators to use.
"""
- language: Optional[Language] = None
- optimize_streaming_latency: Optional[int] = None
- stability: Optional[float] = None
- similarity_boost: Optional[float] = None
- style: Optional[float] = None
- use_speaker_boost: Optional[bool] = None
- speed: Optional[float] = None
- apply_text_normalization: Optional[Literal["auto", "on", "off"]] = None
- pronunciation_dictionary_locators: Optional[List[PronunciationDictionaryLocator]] = None
+ language: Language | None = None
+ optimize_streaming_latency: int | None = None
+ stability: float | None = None
+ similarity_boost: float | None = None
+ style: float | None = None
+ use_speaker_boost: bool | None = None
+ speed: float | None = None
+ apply_text_normalization: Literal["auto", "on", "off"] | None = None
+ pronunciation_dictionary_locators: list[PronunciationDictionaryLocator] | None = None
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
aiohttp_session: aiohttp.ClientSession,
- model: Optional[str] = None,
+ model: str | None = None,
base_url: str = "https://api.elevenlabs.io",
- sample_rate: Optional[int] = None,
- enable_logging: Optional[bool] = None,
- pronunciation_dictionary_locators: Optional[List[PronunciationDictionaryLocator]] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- text_aggregation_mode: Optional[TextAggregationMode] = None,
- aggregate_sentences: Optional[bool] = None,
+ sample_rate: int | None = None,
+ enable_logging: bool | None = None,
+ pronunciation_dictionary_locators: list[PronunciationDictionaryLocator] | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ text_aggregation_mode: TextAggregationMode | None = None,
+ aggregate_sentences: bool | None = None,
**kwargs,
):
"""Initialize the ElevenLabs HTTP TTS service.
@@ -1078,7 +1073,7 @@ class ElevenLabsHttpTTSService(TTSService):
self._partial_word = ""
self._partial_word_start_time = 0.0
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert pipecat Language to ElevenLabs language code.
Args:
@@ -1147,7 +1142,7 @@ class ElevenLabsHttpTTSService(TTSService):
# End of turn - reset previous text
self._previous_text = ""
- def calculate_word_times(self, alignment_info: Mapping[str, Any]) -> List[Tuple[str, float]]:
+ def calculate_word_times(self, alignment_info: Mapping[str, Any]) -> list[tuple[str, float]]:
"""Calculate word timing from character alignment data.
This method handles partial words that may span across multiple alignment chunks.
@@ -1228,7 +1223,7 @@ class ElevenLabsHttpTTSService(TTSService):
# Use the with-timestamps endpoint
url = f"{self._base_url}/v1/text-to-speech/{self._settings.voice}/stream/with-timestamps"
- payload: Dict[str, Union[str, Dict[str, Union[float, bool]]]] = {
+ payload: dict[str, str | dict[str, float | bool]] = {
"text": text,
"model_id": self._settings.model,
}
diff --git a/src/pipecat/services/fal/image.py b/src/pipecat/services/fal/image.py
index 31af55440..8a608de77 100644
--- a/src/pipecat/services/fal/image.py
+++ b/src/pipecat/services/fal/image.py
@@ -13,8 +13,9 @@ for creating images from text prompts using various AI models.
import asyncio
import io
import os
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Dict, Optional, Union
+from typing import Any
import aiohttp
from loguru import logger
@@ -44,14 +45,14 @@ class FalImageGenSettings(ImageGenSettings):
seed: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
num_inference_steps: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
num_images: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
- image_size: str | Dict[str, int] | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+ image_size: str | dict[str, int] | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
expand_prompt: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
enable_safety_checker: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
- def to_api_arguments(self) -> Dict[str, Any]:
+ def to_api_arguments(self) -> dict[str, Any]:
"""Build the Fal API arguments dict from settings, excluding None values."""
- args: Dict[str, Any] = {}
+ args: dict[str, Any] = {}
if self.seed is not None:
args["seed"] = self.seed
args["num_inference_steps"] = self.num_inference_steps
@@ -89,10 +90,10 @@ class FalImageGenService(ImageGenService):
format: Output image format. Defaults to "png".
"""
- seed: Optional[int] = None
+ seed: int | None = None
num_inference_steps: int = 8
num_images: int = 1
- image_size: Union[str, Dict[str, int]] = "square_hd"
+ image_size: str | dict[str, int] = "square_hd"
expand_prompt: bool = False
enable_safety_checker: bool = True
format: str = "png"
@@ -102,11 +103,11 @@ class FalImageGenService(ImageGenService):
def __init__(
self,
*,
- params: Optional[InputParams] = None,
+ params: InputParams | None = None,
aiohttp_session: aiohttp.ClientSession,
- model: Optional[str] = None,
- key: Optional[str] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ key: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the FalImageGenService.
diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py
index 65df7e3ab..1e18c7f84 100644
--- a/src/pipecat/services/fal/stt.py
+++ b/src/pipecat/services/fal/stt.py
@@ -12,8 +12,8 @@ transcription using segmented audio processing.
import base64
import os
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import AsyncGenerator, Optional
import aiohttp
from loguru import logger
@@ -28,7 +28,7 @@ from pipecat.utils.time import time_now_iso8601
from pipecat.utils.tracing.service_decorators import traced_stt
-def language_to_fal_language(language: Language) -> Optional[str]:
+def language_to_fal_language(language: Language) -> str | None:
"""Convert a Language enum to Fal's Wizper language code.
Args:
@@ -171,7 +171,7 @@ class FalSTTService(SegmentedSTTService):
version: Version of Wizper model to use. Defaults to '3'.
"""
- language: Optional[Language] = Language.EN
+ language: Language | None = Language.EN
task: str = "transcribe"
chunk_level: str = "segment"
version: str = "3"
@@ -179,15 +179,15 @@ class FalSTTService(SegmentedSTTService):
def __init__(
self,
*,
- api_key: Optional[str] = None,
- aiohttp_session: Optional[aiohttp.ClientSession] = None,
+ api_key: str | None = None,
+ aiohttp_session: aiohttp.ClientSession | None = None,
task: str = "transcribe",
chunk_level: str = "segment",
version: str = "3",
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = FAL_TTFS_P99,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = FAL_TTFS_P99,
**kwargs,
):
"""Initialize the FalSTTService with API key and parameters.
@@ -266,7 +266,7 @@ class FalSTTService(SegmentedSTTService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Fal's service-specific language code.
Args:
@@ -279,7 +279,7 @@ class FalSTTService(SegmentedSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[str] = None
+ self, transcript: str, is_final: bool, language: str | None = None
):
"""Handle a transcription result with tracing."""
await self.stop_processing_metrics()
diff --git a/src/pipecat/services/fireworks/llm.py b/src/pipecat/services/fireworks/llm.py
index 7d2997987..c51daf0ea 100644
--- a/src/pipecat/services/fireworks/llm.py
+++ b/src/pipecat/services/fireworks/llm.py
@@ -7,7 +7,6 @@
"""Fireworks AI service implementation using OpenAI-compatible interface."""
from dataclasses import dataclass
-from typing import Optional
from loguru import logger
@@ -37,9 +36,9 @@ class FireworksLLMService(OpenAILLMService):
self,
*,
api_key: str,
- model: Optional[str] = None,
+ model: str | None = None,
base_url: str = "https://api.fireworks.ai/inference/v1",
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Fireworks LLM service.
diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py
index f6738fbb8..5f4252fb2 100644
--- a/src/pipecat/services/fish/tts.py
+++ b/src/pipecat/services/fish/tts.py
@@ -10,8 +10,9 @@ This module provides integration with Fish Audio's real-time TTS WebSocket API
for streaming text-to-speech synthesis with customizable voice parameters.
"""
+from collections.abc import AsyncGenerator, Mapping
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Literal, Mapping, Optional, Self
+from typing import Any, Literal, Self
from loguru import logger
from pydantic import BaseModel
@@ -99,22 +100,22 @@ class FishAudioTTSService(InterruptibleTTSService):
prosody_volume: Volume adjustment in dB. Defaults to 0.
"""
- language: Optional[Language] = Language.EN
- latency: Optional[str] = "normal" # "normal" or "balanced"
- normalize: Optional[bool] = True
- prosody_speed: Optional[float] = 1.0 # Speech speed (0.5-2.0)
- prosody_volume: Optional[int] = 0 # Volume adjustment in dB
+ language: Language | None = Language.EN
+ latency: str | None = "normal" # "normal" or "balanced"
+ normalize: bool | None = True
+ prosody_speed: float | None = 1.0 # Speech speed (0.5-2.0)
+ prosody_volume: int | None = 0 # Volume adjustment in dB
def __init__(
self,
*,
api_key: str,
- reference_id: Optional[str] = None, # This is the voice ID
- model_id: Optional[str] = None,
+ reference_id: str | None = None, # This is the voice ID
+ model_id: str | None = None,
output_format: FishAudioOutputFormat = "pcm",
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Fish Audio TTS service.
@@ -321,7 +322,7 @@ class FishAudioTTSService(InterruptibleTTSService):
self._websocket = None
await self._call_event_handler("on_disconnected")
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any buffered audio by sending a flush event to Fish Audio."""
logger.trace(f"{self}: Flushing audio buffers")
if not self._websocket or self._websocket.state is State.CLOSED:
diff --git a/src/pipecat/services/gladia/config.py b/src/pipecat/services/gladia/config.py
index 917309594..dd46927dc 100644
--- a/src/pipecat/services/gladia/config.py
+++ b/src/pipecat/services/gladia/config.py
@@ -6,7 +6,7 @@
"""Configuration for the Gladia STT service."""
-from typing import Any, Dict, List, Optional, Union
+from typing import Any
from pydantic import BaseModel
@@ -19,8 +19,8 @@ class LanguageConfig(BaseModel):
code_switching: Whether to auto-detect language changes during transcription
"""
- languages: Optional[List[str]] = None
- code_switching: Optional[bool] = None
+ languages: list[str] | None = None
+ code_switching: bool | None = None
class PreProcessingConfig(BaseModel):
@@ -31,8 +31,8 @@ class PreProcessingConfig(BaseModel):
speech_threshold: Sensitivity for speech detection (0-1)
"""
- audio_enhancer: Optional[bool] = None
- speech_threshold: Optional[float] = None
+ audio_enhancer: bool | None = None
+ speech_threshold: float | None = None
class CustomVocabularyItem(BaseModel):
@@ -47,8 +47,8 @@ class CustomVocabularyItem(BaseModel):
value: str
intensity: float
- pronunciations: Optional[List[str]] = None
- language: Optional[str] = None
+ pronunciations: list[str] | None = None
+ language: str | None = None
class CustomVocabularyConfig(BaseModel):
@@ -59,8 +59,8 @@ class CustomVocabularyConfig(BaseModel):
default_intensity: Default intensity for simple string vocabulary items
"""
- vocabulary: Optional[List[Union[str, CustomVocabularyItem]]] = None
- default_intensity: Optional[float] = None
+ vocabulary: list[str | CustomVocabularyItem] | None = None
+ default_intensity: float | None = None
class CustomSpellingConfig(BaseModel):
@@ -70,7 +70,7 @@ class CustomSpellingConfig(BaseModel):
spelling_dictionary: Mapping of correct spellings to phonetic variations
"""
- spelling_dictionary: Optional[Dict[str, List[str]]] = None
+ spelling_dictionary: dict[str, list[str]] | None = None
class TranslationConfig(BaseModel):
@@ -86,13 +86,13 @@ class TranslationConfig(BaseModel):
informal: Force informal language forms when available
"""
- target_languages: Optional[List[str]] = None
- model: Optional[str] = None
- match_original_utterances: Optional[bool] = None
- lipsync: Optional[bool] = None
- context_adaptation: Optional[bool] = None
- context: Optional[str] = None
- informal: Optional[bool] = None
+ target_languages: list[str] | None = None
+ model: str | None = None
+ match_original_utterances: bool | None = None
+ lipsync: bool | None = None
+ context_adaptation: bool | None = None
+ context: str | None = None
+ informal: bool | None = None
class RealtimeProcessingConfig(BaseModel):
@@ -110,15 +110,15 @@ class RealtimeProcessingConfig(BaseModel):
sentiment_analysis: Whether to enable sentiment analysis
"""
- words_accurate_timestamps: Optional[bool] = None
- custom_vocabulary: Optional[bool] = None
- custom_vocabulary_config: Optional[CustomVocabularyConfig] = None
- custom_spelling: Optional[bool] = None
- custom_spelling_config: Optional[CustomSpellingConfig] = None
- translation: Optional[bool] = None
- translation_config: Optional[TranslationConfig] = None
- named_entity_recognition: Optional[bool] = None
- sentiment_analysis: Optional[bool] = None
+ words_accurate_timestamps: bool | None = None
+ custom_vocabulary: bool | None = None
+ custom_vocabulary_config: CustomVocabularyConfig | None = None
+ custom_spelling: bool | None = None
+ custom_spelling_config: CustomSpellingConfig | None = None
+ translation: bool | None = None
+ translation_config: TranslationConfig | None = None
+ named_entity_recognition: bool | None = None
+ sentiment_analysis: bool | None = None
class MessagesConfig(BaseModel):
@@ -136,15 +136,15 @@ class MessagesConfig(BaseModel):
receive_lifecycle_events: Whether to receive lifecycle events
"""
- receive_partial_transcripts: Optional[bool] = None
- receive_final_transcripts: Optional[bool] = None
- receive_speech_events: Optional[bool] = None
- receive_pre_processing_events: Optional[bool] = None
- receive_realtime_processing_events: Optional[bool] = None
- receive_post_processing_events: Optional[bool] = None
- receive_acknowledgments: Optional[bool] = None
- receive_errors: Optional[bool] = None
- receive_lifecycle_events: Optional[bool] = None
+ receive_partial_transcripts: bool | None = None
+ receive_final_transcripts: bool | None = None
+ receive_speech_events: bool | None = None
+ receive_pre_processing_events: bool | None = None
+ receive_realtime_processing_events: bool | None = None
+ receive_post_processing_events: bool | None = None
+ receive_acknowledgments: bool | None = None
+ receive_errors: bool | None = None
+ receive_lifecycle_events: bool | None = None
class GladiaInputParams(BaseModel):
@@ -170,14 +170,14 @@ class GladiaInputParams(BaseModel):
and stopped frames. Defaults to False.
"""
- encoding: Optional[str] = "wav/pcm"
- bit_depth: Optional[int] = 16
- channels: Optional[int] = 1
- custom_metadata: Optional[Dict[str, Any]] = None
- endpointing: Optional[float] = None
- maximum_duration_without_endpointing: Optional[int] = 5
- language_config: Optional[LanguageConfig] = None
- pre_processing: Optional[PreProcessingConfig] = None
- realtime_processing: Optional[RealtimeProcessingConfig] = None
- messages_config: Optional[MessagesConfig] = None
+ encoding: str | None = "wav/pcm"
+ bit_depth: int | None = 16
+ channels: int | None = 1
+ custom_metadata: dict[str, Any] | None = None
+ endpointing: float | None = None
+ maximum_duration_without_endpointing: int | None = 5
+ language_config: LanguageConfig | None = None
+ pre_processing: PreProcessingConfig | None = None
+ realtime_processing: RealtimeProcessingConfig | None = None
+ messages_config: MessagesConfig | None = None
enable_vad: bool = False
diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py
index f939ff4ba..8c57e47e2 100644
--- a/src/pipecat/services/gladia/stt.py
+++ b/src/pipecat/services/gladia/stt.py
@@ -13,8 +13,9 @@ supporting multiple languages, custom vocabulary, and various audio processing o
import asyncio
import base64
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Literal, Optional
+from typing import Any, Literal
import aiohttp
from loguru import logger
@@ -55,7 +56,7 @@ except ModuleNotFoundError as e:
raise Exception(f"Missing module: {e}")
-def language_to_gladia_language(language: Language) -> Optional[str]:
+def language_to_gladia_language(language: Language) -> str | None:
"""Convert a Language enum to Gladia's language code format.
Args:
@@ -223,13 +224,13 @@ class GladiaSTTService(WebsocketSTTService):
encoding: str = "wav/pcm",
bit_depth: int = 16,
channels: int = 1,
- sample_rate: Optional[int] = None,
- model: Optional[str] = None,
- params: Optional[GladiaInputParams] = None,
+ sample_rate: int | None = None,
+ model: str | None = None,
+ params: GladiaInputParams | None = None,
max_buffer_size: int = 1024 * 1024 * 20, # 20MB default buffer
should_interrupt: bool = True,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = GLADIA_TTFS_P99,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = GLADIA_TTFS_P99,
**kwargs,
):
"""Initialize the Gladia STT service.
@@ -353,7 +354,7 @@ class GladiaSTTService(WebsocketSTTService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert pipecat Language enum to Gladia's language code.
Args:
@@ -587,7 +588,7 @@ class GladiaSTTService(WebsocketSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[str] = None
+ self, transcript: str, is_final: bool, language: str | None = None
):
await self.stop_processing_metrics()
diff --git a/src/pipecat/services/google/frames.py b/src/pipecat/services/google/frames.py
index 47fcb3365..fbb32bf01 100644
--- a/src/pipecat/services/google/frames.py
+++ b/src/pipecat/services/google/frames.py
@@ -12,7 +12,6 @@ models that support web search and fact grounding capabilities.
"""
from dataclasses import dataclass, field
-from typing import List, Optional
from pipecat.frames.frames import DataFrame
@@ -27,7 +26,7 @@ class LLMSearchResult:
"""
text: str
- confidence: List[float] = field(default_factory=list)
+ confidence: list[float] = field(default_factory=list)
@dataclass
@@ -40,9 +39,9 @@ class LLMSearchOrigin:
results: List of search results from this origin.
"""
- site_uri: Optional[str] = None
- site_title: Optional[str] = None
- results: List[LLMSearchResult] = field(default_factory=list)
+ site_uri: str | None = None
+ site_title: str | None = None
+ results: list[LLMSearchResult] = field(default_factory=list)
@dataclass
@@ -60,9 +59,9 @@ class LLMSearchResponseFrame(DataFrame):
origins: List of search result origins with detailed information.
"""
- search_result: Optional[str] = None
- rendered_content: Optional[str] = None
- origins: List[LLMSearchOrigin] = field(default_factory=list)
+ search_result: str | None = None
+ rendered_content: str | None = None
+ origins: list[LLMSearchOrigin] = field(default_factory=list)
def __str__(self):
"""Return string representation of the search response frame.
diff --git a/src/pipecat/services/google/gemini_live/file_api.py b/src/pipecat/services/google/gemini_live/file_api.py
index 0c9fa49d3..6dcec5b19 100644
--- a/src/pipecat/services/google/gemini_live/file_api.py
+++ b/src/pipecat/services/google/gemini_live/file_api.py
@@ -12,7 +12,7 @@ this API can be referenced in Gemini generative model calls.
"""
import mimetypes
-from typing import Any, Dict, Optional
+from typing import Any
import aiohttp
from loguru import logger
@@ -43,9 +43,7 @@ class GeminiFileAPI:
# Upload URL uses the /upload/ path
self.upload_base_url = "https://generativelanguage.googleapis.com/upload/v1beta/files"
- async def upload_file(
- self, file_path: str, display_name: Optional[str] = None
- ) -> Dict[str, Any]:
+ async def upload_file(self, file_path: str, display_name: str | None = None) -> dict[str, Any]:
"""Upload a file to the Gemini File API using the correct resumable upload protocol.
Args:
@@ -116,7 +114,7 @@ class GeminiFileAPI:
logger.info(f"File uploaded successfully: {file_info.get('file', {}).get('name')}")
return file_info
- async def get_file(self, name: str) -> Dict[str, Any]:
+ async def get_file(self, name: str) -> dict[str, Any]:
"""Get metadata for a file.
Args:
@@ -140,8 +138,8 @@ class GeminiFileAPI:
return file_info
async def list_files(
- self, page_size: int = 10, page_token: Optional[str] = None
- ) -> Dict[str, Any]:
+ self, page_size: int = 10, page_token: str | None = None
+ ) -> dict[str, Any]:
"""List uploaded files.
Args:
diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py
index 3e3cbd092..b1674d91e 100644
--- a/src/pipecat/services/google/gemini_live/llm.py
+++ b/src/pipecat/services/google/gemini_live/llm.py
@@ -17,8 +17,8 @@ import io
import time
import uuid
from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, Dict, List, Optional, Union
+from enum import StrEnum
+from typing import Any
from loguru import logger
from PIL import Image
@@ -109,7 +109,7 @@ MAX_CONSECUTIVE_FAILURES = 3
CONNECTION_ESTABLISHED_THRESHOLD = 10.0 # seconds
-def language_to_gemini_language(language: Language) -> Optional[str]:
+def language_to_gemini_language(language: Language) -> str | None:
"""Maps a Language enum value to a Gemini Live supported language code.
Source:
@@ -206,7 +206,7 @@ def language_to_gemini_language(language: Language) -> Optional[str]:
return resolve_language(language, LANGUAGE_MAP, use_base_code=False)
-class GeminiModalities(Enum):
+class GeminiModalities(StrEnum):
"""Supported modalities for Gemini Live.
Parameters:
@@ -218,7 +218,7 @@ class GeminiModalities(Enum):
AUDIO = "AUDIO"
-class GeminiMediaResolution(str, Enum):
+class GeminiMediaResolution(StrEnum):
"""Media resolution options for Gemini Live.
Parameters:
@@ -245,11 +245,11 @@ class GeminiVADParams(BaseModel):
silence_duration_ms: Silence duration threshold in milliseconds. Defaults to None.
"""
- disabled: Optional[bool] = Field(default=None)
- start_sensitivity: Optional[StartSensitivity] = Field(default=None)
- end_sensitivity: Optional[EndSensitivity] = Field(default=None)
- prefix_padding_ms: Optional[int] = Field(default=None)
- silence_duration_ms: Optional[int] = Field(default=None)
+ disabled: bool | None = Field(default=None)
+ start_sensitivity: StartSensitivity | None = Field(default=None)
+ end_sensitivity: EndSensitivity | None = Field(default=None)
+ prefix_padding_ms: int | None = Field(default=None)
+ silence_duration_ms: int | None = Field(default=None)
class ContextWindowCompressionParams(BaseModel):
@@ -261,9 +261,7 @@ class ContextWindowCompressionParams(BaseModel):
"""
enabled: bool = Field(default=False)
- trigger_tokens: Optional[int] = Field(
- default=None
- ) # None = use default (80% of context window)
+ trigger_tokens: int | None = Field(default=None) # None = use default (80% of context window)
class InputParams(BaseModel):
@@ -303,23 +301,23 @@ class InputParams(BaseModel):
extra: Additional parameters. Defaults to empty dict.
"""
- frequency_penalty: Optional[float] = Field(default=None, ge=0.0, le=2.0)
- max_tokens: Optional[int] = Field(default=4096, ge=1)
- presence_penalty: Optional[float] = Field(default=None, ge=0.0, le=2.0)
- temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0)
- top_k: Optional[int] = Field(default=None, ge=0)
- top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0)
- modalities: Optional[GeminiModalities] = Field(default=GeminiModalities.AUDIO)
- language: Optional[Language] = Field(default=Language.EN_US)
- media_resolution: Optional[GeminiMediaResolution] = Field(
+ frequency_penalty: float | None = Field(default=None, ge=0.0, le=2.0)
+ max_tokens: int | None = Field(default=4096, ge=1)
+ presence_penalty: float | None = Field(default=None, ge=0.0, le=2.0)
+ temperature: float | None = Field(default=None, ge=0.0, le=2.0)
+ top_k: int | None = Field(default=None, ge=0)
+ top_p: float | None = Field(default=None, ge=0.0, le=1.0)
+ modalities: GeminiModalities | None = Field(default=GeminiModalities.AUDIO)
+ language: Language | None = Field(default=Language.EN_US)
+ media_resolution: GeminiMediaResolution | None = Field(
default=GeminiMediaResolution.UNSPECIFIED
)
- vad: Optional[GeminiVADParams] = Field(default=None)
- context_window_compression: Optional[ContextWindowCompressionParams] = Field(default=None)
- thinking: Optional[ThinkingConfig] = Field(default=None)
- enable_affective_dialog: Optional[bool] = Field(default=None)
- proactivity: Optional[ProactivityConfig] = Field(default=None)
- extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
+ vad: GeminiVADParams | None = Field(default=None)
+ context_window_compression: ContextWindowCompressionParams | None = Field(default=None)
+ thinking: ThinkingConfig | None = Field(default=None)
+ enable_affective_dialog: bool | None = Field(default=None)
+ proactivity: ProactivityConfig | None = Field(default=None)
+ extra: dict[str, Any] | None = Field(default_factory=dict)
@dataclass
@@ -374,17 +372,17 @@ class GeminiLiveLLMService(LLMService):
self,
*,
api_key: str,
- model: Optional[str] = None,
+ model: str | None = None,
voice_id: str = "Charon",
start_audio_paused: bool = False,
start_video_paused: bool = False,
- system_instruction: Optional[str] = None,
- tools: Optional[Union[List[dict], ToolsSchema]] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ system_instruction: str | None = None,
+ tools: list[dict] | ToolsSchema | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
inference_on_context_initialization: bool = True,
file_api_base_url: str = "https://generativelanguage.googleapis.com/v1beta/files",
- http_options: Optional[HttpOptions] = None,
+ http_options: HttpOptions | None = None,
**kwargs,
):
"""Initialize the Gemini Live LLM service.
@@ -537,18 +535,18 @@ class GeminiLiveLLMService(LLMService):
self._connection_start_time = None
self._file_api_base_url = file_api_base_url
- self._file_api: Optional[GeminiFileAPI] = None
+ self._file_api: GeminiFileAPI | None = None
# Grounding metadata tracking
self._search_result_buffer = ""
self._accumulated_grounding_metadata = None
# Session resumption
- self._session_resumption_handle: Optional[str] = None
+ self._session_resumption_handle: str | None = None
# Bookkeeping for ending gracefully (i.e. after the bot is finished)
- self._end_frame_pending_bot_turn_finished: Optional[EndFrame] = None
- self._end_frame_deferral_timeout_task: Optional[asyncio.Task] = None
+ self._end_frame_pending_bot_turn_finished: EndFrame | None = None
+ self._end_frame_deferral_timeout_task: asyncio.Task | None = None
# Initialize the API client. Subclasses can override this if needed.
self.create_client()
@@ -908,7 +906,7 @@ class GeminiLiveLLMService(LLMService):
self._end_frame_deferral_timeout_task.cancel()
self._end_frame_deferral_timeout_task = None
- def _get_history_config(self) -> Optional[HistoryConfig]:
+ def _get_history_config(self) -> HistoryConfig | None:
"""Return the history config for the Live API connection.
Subclasses can override this to disable history config (e.g. Vertex AI
@@ -916,7 +914,7 @@ class GeminiLiveLLMService(LLMService):
"""
return HistoryConfig(initial_history_in_client_content=True)
- async def _connect(self, session_resumption_handle: Optional[str] = None):
+ async def _connect(self, session_resumption_handle: str | None = None):
"""Establish client connection to Gemini Live API."""
if self._session:
# Here we assume that if we have a client, we are connected. We
@@ -1336,7 +1334,7 @@ class GeminiLiveLLMService(LLMService):
@traced_gemini_live(operation="llm_tool_result")
async def _tool_result(
- self, tool_call_id: str, tool_name: str, tool_result_message: Dict[str, Any]
+ self, tool_call_id: str, tool_name: str, tool_result_message: dict[str, Any]
):
"""Send tool result back to the API."""
if self._disconnecting or not self._session:
@@ -1513,12 +1511,12 @@ class GeminiLiveLLMService(LLMService):
@traced_stt
async def _handle_user_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
pass
- async def _push_user_transcription(self, text: str, result: Optional[LiveServerMessage] = None):
+ async def _push_user_transcription(self, text: str, result: LiveServerMessage | None = None):
"""Push a user transcription frame upstream.
Helper method to ensure consistent handling of user transcriptions
@@ -1697,7 +1695,7 @@ class GeminiLiveLLMService(LLMService):
if grounding_metadata.grounding_chunks and grounding_metadata.grounding_supports:
# Create a mapping of chunk indices to origins
- chunk_to_origin: Dict[int, LLMSearchOrigin] = {}
+ chunk_to_origin: dict[int, LLMSearchOrigin] = {}
for index, chunk in enumerate(grounding_metadata.grounding_chunks):
if chunk.web:
diff --git a/src/pipecat/services/google/gemini_live/vertex/llm.py b/src/pipecat/services/google/gemini_live/vertex/llm.py
index 8466c7f21..44ded852f 100644
--- a/src/pipecat/services/google/gemini_live/vertex/llm.py
+++ b/src/pipecat/services/google/gemini_live/vertex/llm.py
@@ -13,7 +13,6 @@ streaming responses, and tool usage.
import json
from dataclasses import dataclass
-from typing import List, Optional, Union
from loguru import logger
@@ -61,21 +60,21 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService):
def __init__(
self,
*,
- credentials: Optional[str] = None,
- credentials_path: Optional[str] = None,
+ credentials: str | None = None,
+ credentials_path: str | None = None,
location: str,
project_id: str,
- model: Optional[str] = None,
+ model: str | None = None,
voice_id: str = "Charon",
start_audio_paused: bool = False,
start_video_paused: bool = False,
- system_instruction: Optional[str] = None,
- tools: Optional[Union[List[dict], ToolsSchema]] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ system_instruction: str | None = None,
+ tools: list[dict] | ToolsSchema | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
inference_on_context_initialization: bool = True,
file_api_base_url: str = "https://generativelanguage.googleapis.com/v1beta/files",
- http_options: Optional[HttpOptions] = None,
+ http_options: HttpOptions | None = None,
**kwargs,
):
"""Initialize the service for accessing Gemini Live via Google Vertex AI.
@@ -234,7 +233,7 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService):
)
@staticmethod
- def _get_credentials(credentials: Optional[str], credentials_path: Optional[str]) -> str:
+ def _get_credentials(credentials: str | None, credentials_path: str | None) -> str:
"""Retrieve Credentials using Google service account credentials JSON.
Supports multiple authentication methods:
@@ -252,7 +251,7 @@ class GeminiLiveVertexLLMService(GeminiLiveLLMService):
Raises:
ValueError: If no valid credentials are provided or found.
"""
- creds: Optional[service_account.Credentials] = None
+ creds: service_account.Credentials | None = None
if credentials:
# Parse and load credentials from JSON string
diff --git a/src/pipecat/services/google/image.py b/src/pipecat/services/google/image.py
index 9e4ec1b59..c8c33a68d 100644
--- a/src/pipecat/services/google/image.py
+++ b/src/pipecat/services/google/image.py
@@ -16,8 +16,9 @@ import os
# Suppress gRPC fork warnings
os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false"
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
from loguru import logger
from PIL import Image
@@ -76,15 +77,15 @@ class GoogleImageGenService(ImageGenService):
number_of_images: int = Field(default=1, ge=1, le=8)
model: str = Field(default="imagen-4.0-generate-001")
- negative_prompt: Optional[str] = Field(default=None)
+ negative_prompt: str | None = Field(default=None)
def __init__(
self,
*,
api_key: str,
- params: Optional[InputParams] = None,
- http_options: Optional[Any] = None,
- settings: Optional[Settings] = None,
+ params: InputParams | None = None,
+ http_options: Any | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the GoogleImageGenService with API key and parameters.
diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py
index 5e95b99da..6268009ed 100644
--- a/src/pipecat/services/google/llm.py
+++ b/src/pipecat/services/google/llm.py
@@ -13,8 +13,9 @@ including LLM services, context management, and message aggregation.
import io
import os
import uuid
+from collections.abc import AsyncIterator
from dataclasses import dataclass, field
-from typing import Any, AsyncIterator, Dict, List, Literal, Optional, Union
+from typing import Any, Literal, Optional, Union
from loguru import logger
from PIL import Image
@@ -88,15 +89,13 @@ class GoogleThinkingConfig(BaseModel):
Today's models default to not including thoughts (False).
"""
- thinking_budget: Optional[int] = Field(default=None)
+ thinking_budget: int | None = Field(default=None)
# Why `| str` here? To not break compatibility in case Google adds more
# levels in the future.
- thinking_level: Optional[Literal["low", "high", "medium", "minimal"] | str] = Field(
- default=None
- )
+ thinking_level: Literal["low", "high", "medium", "minimal"] | str | None = Field(default=None)
- include_thoughts: Optional[bool] = Field(default=None)
+ include_thoughts: bool | None = Field(default=None)
@dataclass
@@ -160,24 +159,24 @@ class GoogleLLMService(LLMService):
extra: Additional parameters as a dictionary.
"""
- max_tokens: Optional[int] = Field(default=4096, ge=1)
- temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0)
- top_k: Optional[int] = Field(default=None, ge=0)
- top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0)
+ max_tokens: int | None = Field(default=4096, ge=1)
+ temperature: float | None = Field(default=None, ge=0.0, le=2.0)
+ top_k: int | None = Field(default=None, ge=0)
+ top_p: float | None = Field(default=None, ge=0.0, le=1.0)
thinking: Optional["GoogleLLMService.ThinkingConfig"] = Field(default=None)
- extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
+ extra: dict[str, Any] | None = Field(default_factory=dict)
def __init__(
self,
*,
api_key: str,
- model: Optional[str] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- system_instruction: Optional[str] = None,
- tools: Optional[List[Dict[str, Any]]] = None,
- tool_config: Optional[Dict[str, Any]] = None,
- http_options: Optional[HttpOptions] = None,
+ model: str | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ system_instruction: str | None = None,
+ tools: list[dict[str, Any]] | None = None,
+ tool_config: dict[str, Any] | None = None,
+ http_options: HttpOptions | None = None,
**kwargs,
):
"""Initialize the Google LLM service.
@@ -272,9 +271,9 @@ class GoogleLLMService(LLMService):
async def run_inference(
self,
context: LLMContext,
- max_tokens: Optional[int] = None,
- system_instruction: Optional[str] = None,
- ) -> Optional[str]:
+ max_tokens: int | None = None,
+ system_instruction: str | None = None,
+ ) -> str | None:
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
Args:
@@ -327,10 +326,10 @@ class GoogleLLMService(LLMService):
def _build_generation_params(
self,
- system_instruction: Optional[str] = None,
- tools: Optional[List] = None,
- tool_config: Optional[Dict[str, Any]] = None,
- ) -> Dict[str, Any]:
+ system_instruction: str | None = None,
+ tools: list | None = None,
+ tool_config: dict[str, Any] | None = None,
+ ) -> dict[str, Any]:
"""Build generation parameters for Google AI API.
Args:
@@ -367,7 +366,7 @@ class GoogleLLMService(LLMService):
return generation_params
- def _maybe_unset_thinking_budget(self, generation_params: Dict[str, Any]):
+ def _maybe_unset_thinking_budget(self, generation_params: dict[str, Any]):
try:
# If we have an image model, we don't apply a thinking default.
if "image" in self._settings.model:
diff --git a/src/pipecat/services/google/rtvi.py b/src/pipecat/services/google/rtvi.py
index 738b0ab9d..2a829ba7d 100644
--- a/src/pipecat/services/google/rtvi.py
+++ b/src/pipecat/services/google/rtvi.py
@@ -11,7 +11,7 @@ including models for search responses and an observer for handling Google-specif
frame types.
"""
-from typing import List, Literal, Optional
+from typing import Literal
from pydantic import BaseModel
@@ -29,9 +29,9 @@ class RTVISearchResponseMessageData(BaseModel):
origins: List of search result origins with metadata.
"""
- search_result: Optional[str]
- rendered_content: Optional[str]
- origins: List[LLMSearchOrigin]
+ search_result: str | None
+ rendered_content: str | None
+ origins: list[LLMSearchOrigin]
class RTVIBotLLMSearchResponseMessage(BaseModel):
@@ -95,7 +95,7 @@ class GoogleRTVIProcessor(RTVIProcessor):
Creates a specific Google RTVI Observer.
"""
- def create_rtvi_observer(self, *, params: Optional[RTVIObserverParams] = None, **kwargs):
+ def create_rtvi_observer(self, *, params: RTVIObserverParams | None = None, **kwargs):
"""Creates a new RTVI Observer.
Args:
diff --git a/src/pipecat/services/google/stt.py b/src/pipecat/services/google/stt.py
index 282389a3a..4665d6309 100644
--- a/src/pipecat/services/google/stt.py
+++ b/src/pipecat/services/google/stt.py
@@ -23,7 +23,8 @@ from pipecat.utils.tracing.service_decorators import traced_stt
# Suppress gRPC fork warnings
os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false"
-from typing import Any, AsyncGenerator, List, Optional, Union
+from collections.abc import AsyncGenerator
+from typing import Any
from loguru import logger
from pydantic import BaseModel, Field, field_validator
@@ -59,7 +60,7 @@ except ModuleNotFoundError as e:
raise Exception(f"Missing module: {e}")
-def language_to_google_stt_language(language: Language) -> Optional[str]:
+def language_to_google_stt_language(language: Language) -> str | None:
"""Maps Language enum to Google Speech-to-Text V2 language codes.
Args:
@@ -383,8 +384,8 @@ class GoogleSTTSettings(STTSettings):
enable_voice_activity_events: Detect voice activity in audio.
"""
- languages: List[Language] | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
- language_codes: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+ languages: list[Language] | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+ language_codes: list[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
use_separate_recognition_per_channel: bool | _NotGiven = field(
default_factory=lambda: NOT_GIVEN
)
@@ -443,21 +444,21 @@ class GoogleSTTService(STTService):
enable_voice_activity_events: Detect voice activity in audio.
"""
- languages: Union[Language, List[Language]] = Field(default_factory=lambda: [Language.EN_US])
- model: Optional[str] = "latest_long"
- use_separate_recognition_per_channel: Optional[bool] = False
- enable_automatic_punctuation: Optional[bool] = True
- enable_spoken_punctuation: Optional[bool] = False
- enable_spoken_emojis: Optional[bool] = False
- profanity_filter: Optional[bool] = False
- enable_word_time_offsets: Optional[bool] = False
- enable_word_confidence: Optional[bool] = False
- enable_interim_results: Optional[bool] = True
- enable_voice_activity_events: Optional[bool] = False
+ languages: Language | list[Language] = Field(default_factory=lambda: [Language.EN_US])
+ model: str | None = "latest_long"
+ use_separate_recognition_per_channel: bool | None = False
+ enable_automatic_punctuation: bool | None = True
+ enable_spoken_punctuation: bool | None = False
+ enable_spoken_emojis: bool | None = False
+ profanity_filter: bool | None = False
+ enable_word_time_offsets: bool | None = False
+ enable_word_confidence: bool | None = False
+ enable_interim_results: bool | None = True
+ enable_voice_activity_events: bool | None = False
@field_validator("languages", mode="before")
@classmethod
- def validate_languages(cls, v) -> List[Language]:
+ def validate_languages(cls, v) -> list[Language]:
"""Ensure languages is always a list.
Args:
@@ -471,7 +472,7 @@ class GoogleSTTService(STTService):
return v
@property
- def language_list(self) -> List[Language]:
+ def language_list(self) -> list[Language]:
"""Get languages as a guaranteed list.
Returns:
@@ -483,13 +484,13 @@ class GoogleSTTService(STTService):
def __init__(
self,
*,
- credentials: Optional[str] = None,
- credentials_path: Optional[str] = None,
+ credentials: str | None = None,
+ credentials_path: str | None = None,
location: str = "global",
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = GOOGLE_TTFS_P99,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = GOOGLE_TTFS_P99,
**kwargs,
):
"""Initialize the Google STT service.
@@ -581,7 +582,7 @@ class GoogleSTTService(STTService):
client_options = ClientOptions(api_endpoint=f"{self._location}-speech.googleapis.com")
# Extract project ID and create client
- creds: Optional[service_account.Credentials] = None
+ creds: service_account.Credentials | None = None
if credentials:
json_account_info = json.loads(credentials)
self._project_id = json_account_info.get("project_id")
@@ -616,7 +617,7 @@ class GoogleSTTService(STTService):
"""
return True
- def language_to_service_language(self, language: Language | List[Language]) -> str | List[str]:
+ def language_to_service_language(self, language: Language | list[Language]) -> str | list[str]:
"""Convert Language enum(s) to Google STT language code(s).
Args:
@@ -629,7 +630,7 @@ class GoogleSTTService(STTService):
return [language_to_google_stt_language(lang) or "en-US" for lang in language]
return language_to_google_stt_language(language) or "en-US"
- def _get_language_codes(self) -> List[str]:
+ def _get_language_codes(self) -> list[str]:
"""Resolve the current language settings to Google STT language code strings.
Prefers ``languages`` (``Language`` enums) over the deprecated
@@ -651,7 +652,7 @@ class GoogleSTTService(STTService):
await self._disconnect()
await self._connect()
- async def set_languages(self, languages: List[Language]):
+ async def set_languages(self, languages: list[Language]):
"""Update the service's recognition languages.
.. deprecated:: 0.0.104
@@ -741,17 +742,17 @@ class GoogleSTTService(STTService):
async def update_options(
self,
*,
- languages: Optional[List[Language]] = None,
- model: Optional[str] = None,
- enable_automatic_punctuation: Optional[bool] = None,
- enable_spoken_punctuation: Optional[bool] = None,
- enable_spoken_emojis: Optional[bool] = None,
- profanity_filter: Optional[bool] = None,
- enable_word_time_offsets: Optional[bool] = None,
- enable_word_confidence: Optional[bool] = None,
- enable_interim_results: Optional[bool] = None,
- enable_voice_activity_events: Optional[bool] = None,
- location: Optional[str] = None,
+ languages: list[Language] | None = None,
+ model: str | None = None,
+ enable_automatic_punctuation: bool | None = None,
+ enable_spoken_punctuation: bool | None = None,
+ enable_spoken_emojis: bool | None = None,
+ profanity_filter: bool | None = None,
+ enable_word_time_offsets: bool | None = None,
+ enable_word_confidence: bool | None = None,
+ enable_interim_results: bool | None = None,
+ enable_voice_activity_events: bool | None = None,
+ location: str | None = None,
) -> None:
"""Update service options dynamically.
@@ -947,7 +948,7 @@ class GoogleSTTService(STTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[str] = None
+ self, transcript: str, is_final: bool, language: str | None = None
):
pass
diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py
index 81ede9ef8..f9fe1a87d 100644
--- a/src/pipecat/services/google/tts.py
+++ b/src/pipecat/services/google/tts.py
@@ -22,8 +22,9 @@ from pipecat.utils.tracing.service_decorators import traced_tts
# Suppress gRPC fork warnings
os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false"
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, List, Literal, Optional
+from typing import Any, Literal
from loguru import logger
from pydantic import BaseModel
@@ -58,7 +59,7 @@ except ModuleNotFoundError as e:
raise Exception(f"Missing module: {e}")
-def language_to_google_tts_language(language: Language) -> Optional[str]:
+def language_to_google_tts_language(language: Language) -> str | None:
"""Convert a Language enum to Google TTS language code.
Source:
@@ -217,7 +218,7 @@ def language_to_google_tts_language(language: Language) -> Optional[str]:
return resolve_language(language, LANGUAGE_MAP, use_base_code=False)
-def language_to_gemini_tts_language(language: Language) -> Optional[str]:
+def language_to_gemini_tts_language(language: Language) -> str | None:
"""Convert a Language enum to Gemini TTS language code.
Source:
@@ -575,25 +576,25 @@ class GoogleHttpTTSService(TTSService):
google_style: Google-specific voice style.
"""
- pitch: Optional[str] = None
- rate: Optional[str] = None
- speaking_rate: Optional[float] = None
- volume: Optional[str] = None
- emphasis: Optional[Literal["strong", "moderate", "reduced", "none"]] = None
- language: Optional[Language] = Language.EN
- gender: Optional[Literal["male", "female", "neutral"]] = None
- google_style: Optional[Literal["apologetic", "calm", "empathetic", "firm", "lively"]] = None
+ pitch: str | None = None
+ rate: str | None = None
+ speaking_rate: float | None = None
+ volume: str | None = None
+ emphasis: Literal["strong", "moderate", "reduced", "none"] | None = None
+ language: Language | None = Language.EN
+ gender: Literal["male", "female", "neutral"] | None = None
+ google_style: Literal["apologetic", "calm", "empathetic", "firm", "lively"] | None = None
def __init__(
self,
*,
- credentials: Optional[str] = None,
- credentials_path: Optional[str] = None,
- location: Optional[str] = None,
- voice_id: Optional[str] = None,
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ credentials: str | None = None,
+ credentials_path: str | None = None,
+ location: str | None = None,
+ voice_id: str | None = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initializes the Google HTTP TTS service.
@@ -675,7 +676,7 @@ class GoogleHttpTTSService(TTSService):
)
def _create_client(
- self, credentials: Optional[str], credentials_path: Optional[str]
+ self, credentials: str | None, credentials_path: str | None
) -> texttospeech_v1.TextToSpeechAsyncClient:
"""Create authenticated Google Text-to-Speech client.
@@ -689,7 +690,7 @@ class GoogleHttpTTSService(TTSService):
Raises:
ValueError: If no valid credentials are provided.
"""
- creds: Optional[service_account.Credentials] = None
+ creds: service_account.Credentials | None = None
if credentials:
# Use provided credentials JSON string
@@ -727,7 +728,7 @@ class GoogleHttpTTSService(TTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Google TTS language format.
Args:
@@ -874,7 +875,7 @@ class GoogleBaseTTSService(TTSService):
"""
def _create_client(
- self, credentials: Optional[str], credentials_path: Optional[str]
+ self, credentials: str | None, credentials_path: str | None
) -> texttospeech_v1.TextToSpeechAsyncClient:
"""Create authenticated Google Text-to-Speech client.
@@ -888,7 +889,7 @@ class GoogleBaseTTSService(TTSService):
Raises:
ValueError: If no valid credentials are provided.
"""
- creds: Optional[service_account.Credentials] = None
+ creds: service_account.Credentials | None = None
if credentials:
# Use provided credentials JSON string
@@ -926,7 +927,7 @@ class GoogleBaseTTSService(TTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Google TTS language format.
Args:
@@ -942,7 +943,7 @@ class GoogleBaseTTSService(TTSService):
streaming_config: texttospeech_v1.StreamingSynthesizeConfig,
text: str,
context_id: str,
- prompt: Optional[str] = None,
+ prompt: str | None = None,
) -> AsyncGenerator[Frame, None]:
"""Shared streaming synthesis logic.
@@ -1032,20 +1033,20 @@ class GoogleTTSService(GoogleBaseTTSService):
speaking_rate: The speaking rate, in the range [0.25, 2.0].
"""
- language: Optional[Language] = Language.EN
- speaking_rate: Optional[float] = None
+ language: Language | None = Language.EN
+ speaking_rate: float | None = None
def __init__(
self,
*,
- credentials: Optional[str] = None,
- credentials_path: Optional[str] = None,
- location: Optional[str] = None,
- voice_id: Optional[str] = None,
- voice_cloning_key: Optional[str] = None,
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ credentials: str | None = None,
+ credentials_path: str | None = None,
+ location: str | None = None,
+ voice_id: str | None = None,
+ voice_cloning_key: str | None = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initializes the Google streaming TTS service.
@@ -1249,22 +1250,22 @@ class GeminiTTSService(GoogleBaseTTSService):
speaker_configs: List of speaker configurations for multi-speaker mode.
"""
- language: Optional[Language] = Language.EN
- prompt: Optional[str] = None
+ language: Language | None = Language.EN
+ prompt: str | None = None
multi_speaker: bool = False
- speaker_configs: Optional[List[dict]] = None
+ speaker_configs: list[dict] | None = None
def __init__(
self,
*,
- model: Optional[str] = None,
- credentials: Optional[str] = None,
- credentials_path: Optional[str] = None,
- location: Optional[str] = None,
- voice_id: Optional[str] = None,
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ credentials: str | None = None,
+ credentials_path: str | None = None,
+ location: str | None = None,
+ voice_id: str | None = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initializes the Gemini TTS service.
@@ -1353,7 +1354,7 @@ class GeminiTTSService(GoogleBaseTTSService):
credentials, credentials_path
)
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Gemini TTS language format.
Args:
diff --git a/src/pipecat/services/google/utils.py b/src/pipecat/services/google/utils.py
index 9f712a5ad..3707fbc28 100644
--- a/src/pipecat/services/google/utils.py
+++ b/src/pipecat/services/google/utils.py
@@ -6,12 +6,12 @@
"""Utility functions for Google services."""
-from typing import Any, Dict, Optional, Union
+from typing import Any
from pipecat import version as pipecat_version
-def update_google_client_http_options(http_options: Optional[Union[Dict[str, Any], Any]]) -> Any:
+def update_google_client_http_options(http_options: dict[str, Any] | Any | None) -> Any:
"""Updates http_options with the x-goog-api-client header.
Args:
diff --git a/src/pipecat/services/google/vertex/llm.py b/src/pipecat/services/google/vertex/llm.py
index b8b83cb24..c5954d6d8 100644
--- a/src/pipecat/services/google/vertex/llm.py
+++ b/src/pipecat/services/google/vertex/llm.py
@@ -17,7 +17,6 @@ from dataclasses import dataclass
# Suppress gRPC fork warnings
os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false"
-from typing import Optional
from loguru import logger
@@ -64,17 +63,17 @@ class GoogleVertexLLMService(GoogleLLMService):
def __init__(
self,
*,
- credentials: Optional[str] = None,
- credentials_path: Optional[str] = None,
- model: Optional[str] = None,
+ credentials: str | None = None,
+ credentials_path: str | None = None,
+ model: str | None = None,
location: str = "us-east4",
project_id: str,
- params: Optional[GoogleLLMService.InputParams] = None,
- settings: Optional[Settings] = None,
- system_instruction: Optional[str] = None,
- tools: Optional[list] = None,
- tool_config: Optional[dict] = None,
- http_options: Optional[HttpOptions] = None,
+ params: GoogleLLMService.InputParams | None = None,
+ settings: Settings | None = None,
+ system_instruction: str | None = None,
+ tools: list | None = None,
+ tool_config: dict | None = None,
+ http_options: HttpOptions | None = None,
**kwargs,
):
"""Initializes the VertexLLMService.
@@ -186,7 +185,7 @@ class GoogleVertexLLMService(GoogleLLMService):
)
@staticmethod
- def _get_credentials(credentials: Optional[str], credentials_path: Optional[str]):
+ def _get_credentials(credentials: str | None, credentials_path: str | None):
"""Retrieve Credentials using Google service account credentials.
Supports multiple authentication methods:
@@ -204,7 +203,7 @@ class GoogleVertexLLMService(GoogleLLMService):
Raises:
ValueError: If no valid credentials are provided or found.
"""
- creds: Optional[service_account.Credentials] = None
+ creds: service_account.Credentials | None = None
if credentials:
# Parse and load credentials from JSON string
diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py
index 5dea2c824..223941bd0 100644
--- a/src/pipecat/services/gradium/stt.py
+++ b/src/pipecat/services/gradium/stt.py
@@ -13,8 +13,9 @@ WebSocket API for streaming audio transcription.
import asyncio
import base64
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
from loguru import logger
from pydantic import BaseModel
@@ -78,7 +79,7 @@ def _input_format_from_encoding(encoding: str, sample_rate: int) -> str:
return encoding
-def language_to_gradium_language(language: Language) -> Optional[str]:
+def language_to_gradium_language(language: Language) -> str | None:
"""Convert a Language enum to Gradium's language code format.
Args:
@@ -109,7 +110,7 @@ class GradiumSTTSettings(STTSettings):
Default is 10 (800ms). Lower values like 7-8 give faster response.
"""
- delay_in_frames: Optional[int] | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+ delay_in_frames: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
class GradiumSTTService(WebsocketSTTService):
@@ -139,8 +140,8 @@ class GradiumSTTService(WebsocketSTTService):
Default is 10 (800ms). Lower values like 7-8 give faster response.
"""
- language: Optional[Language] = None
- delay_in_frames: Optional[int] = None
+ language: Language | None = None
+ delay_in_frames: int | None = None
def __init__(
self,
@@ -148,11 +149,11 @@ class GradiumSTTService(WebsocketSTTService):
api_key: str,
api_endpoint_base_url: str = "wss://eu.api.gradium.ai/api/speech/asr",
encoding: str = "pcm",
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- json_config: Optional[str] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = GRADIUM_TTFS_P99,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ json_config: str | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = GRADIUM_TTFS_P99,
**kwargs,
):
"""Initialize the Gradium STT service.
@@ -239,7 +240,7 @@ class GradiumSTTService(WebsocketSTTService):
# and pushed as a TranscriptionFrame.
self._accumulated_text: list[str] = []
self._flush_counter = 0
- self._transcript_aggregation_task: Optional[asyncio.Task] = None
+ self._transcript_aggregation_task: asyncio.Task | None = None
def can_generate_metrics(self) -> bool:
"""Check if the service can generate metrics.
diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py
index e8d69a7da..12cc3ddc1 100644
--- a/src/pipecat/services/gradium/tts.py
+++ b/src/pipecat/services/gradium/tts.py
@@ -6,8 +6,9 @@
import base64
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
from loguru import logger
from pydantic import BaseModel
@@ -60,18 +61,18 @@ class GradiumTTSService(WebsocketTTSService):
temp: Temperature to be used for generation, defaults to 0.6.
"""
- temp: Optional[float] = 0.6
+ temp: float | None = 0.6
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
url: str = "wss://eu.api.gradium.ai/api/speech/tts",
- model: Optional[str] = None,
- json_config: Optional[str] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ json_config: str | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Gradium TTS service.
@@ -280,7 +281,7 @@ class GradiumTTSService(WebsocketTTSService):
return self._websocket
raise Exception("Websocket not connected")
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio synthesis."""
flush_id = context_id or self.get_active_audio_context_id()
if not flush_id or not self._websocket:
diff --git a/src/pipecat/services/groq/llm.py b/src/pipecat/services/groq/llm.py
index d36b52ab8..af6dc193a 100644
--- a/src/pipecat/services/groq/llm.py
+++ b/src/pipecat/services/groq/llm.py
@@ -7,7 +7,6 @@
"""Groq LLM Service implementation using OpenAI-compatible interface."""
from dataclasses import dataclass
-from typing import Optional
from loguru import logger
@@ -37,8 +36,8 @@ class GroqLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://api.groq.com/openai/v1",
- model: Optional[str] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize Groq LLM service.
diff --git a/src/pipecat/services/groq/stt.py b/src/pipecat/services/groq/stt.py
index 3f6c23774..444fb5094 100644
--- a/src/pipecat/services/groq/stt.py
+++ b/src/pipecat/services/groq/stt.py
@@ -7,7 +7,6 @@
"""Groq speech-to-text service implementation using Whisper models."""
from dataclasses import dataclass
-from typing import Optional
from pipecat.services.stt_latency import GROQ_TTFS_P99
from pipecat.services.whisper.base_stt import (
@@ -41,14 +40,14 @@ class GroqSTTService(BaseWhisperSTTService):
def __init__(
self,
*,
- model: Optional[str] = None,
- api_key: Optional[str] = None,
+ model: str | None = None,
+ api_key: str | None = None,
base_url: str = "https://api.groq.com/openai/v1",
- language: Optional[Language] = None,
- prompt: Optional[str] = None,
- temperature: Optional[float] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = GROQ_TTFS_P99,
+ language: Language | None = None,
+ prompt: str | None = None,
+ temperature: float | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = GROQ_TTFS_P99,
**kwargs,
):
"""Initialize Groq STT service.
diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py
index 00ff3ef84..49a4d7930 100644
--- a/src/pipecat/services/groq/tts.py
+++ b/src/pipecat/services/groq/tts.py
@@ -8,8 +8,8 @@
import io
import wave
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import AsyncGenerator, Optional
from loguru import logger
from pydantic import BaseModel
@@ -65,8 +65,8 @@ class GroqTTSService(TTSService):
speed: Speech speed multiplier. Defaults to 1.0.
"""
- language: Optional[Language] = Language.EN
- speed: Optional[float] = 1.0
+ language: Language | None = Language.EN
+ speed: float | None = 1.0
GROQ_SAMPLE_RATE = 48000 # Groq TTS only supports 48kHz sample rate
@@ -75,11 +75,11 @@ class GroqTTSService(TTSService):
*,
api_key: str,
output_format: str = "wav",
- params: Optional[InputParams] = None,
- model_name: Optional[str] = None,
- voice_id: Optional[str] = None,
- sample_rate: Optional[int] = GROQ_SAMPLE_RATE,
- settings: Optional[Settings] = None,
+ params: InputParams | None = None,
+ model_name: str | None = None,
+ voice_id: str | None = None,
+ sample_rate: int | None = GROQ_SAMPLE_RATE,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize Groq TTS service.
diff --git a/src/pipecat/services/heygen/api_interactive_avatar.py b/src/pipecat/services/heygen/api_interactive_avatar.py
index 26d2553ce..c0cbfb1cf 100644
--- a/src/pipecat/services/heygen/api_interactive_avatar.py
+++ b/src/pipecat/services/heygen/api_interactive_avatar.py
@@ -9,8 +9,8 @@
API to communicate with HeyGen Streaming API.
"""
-from enum import Enum
-from typing import Any, Dict, Literal, Optional
+from enum import StrEnum
+from typing import Any, Literal
import aiohttp
from loguru import logger
@@ -19,7 +19,7 @@ from pydantic import BaseModel, Field
from pipecat.services.heygen.base_api import BaseAvatarApi, StandardSessionResponse
-class AvatarQuality(str, Enum):
+class AvatarQuality(StrEnum):
"""Enum representing different avatar quality levels."""
low = "low"
@@ -27,14 +27,14 @@ class AvatarQuality(str, Enum):
high = "high"
-class VideoEncoding(str, Enum):
+class VideoEncoding(StrEnum):
"""Enum representing the video encoding."""
H264 = "H264"
VP8 = "VP8"
-class VoiceEmotion(str, Enum):
+class VoiceEmotion(StrEnum):
"""Enum representing different voice emotion types."""
EXCITED = "excited"
@@ -55,11 +55,11 @@ class ElevenLabsSettings(BaseModel):
use_speaker_boost (Optional[bool]): Flag to enable speaker boost.
"""
- stability: Optional[float] = None
- similarity_boost: Optional[float] = None
- model_id: Optional[str] = None
- style: Optional[int] = None
- use_speaker_boost: Optional[bool] = None
+ stability: float | None = None
+ similarity_boost: float | None = None
+ model_id: str | None = None
+ style: int | None = None
+ use_speaker_boost: bool | None = None
class VoiceSettings(BaseModel):
@@ -72,10 +72,10 @@ class VoiceSettings(BaseModel):
elevenlabs_settings (Optional[ElevenLabsSettings]): Details for ElevenLabs configuration.
"""
- voice_id: Optional[str] = Field(None, alias="voiceId")
- rate: Optional[float] = None
- emotion: Optional[VoiceEmotion] = None
- elevenlabs_settings: Optional[ElevenLabsSettings] = Field(None, alias="elevenlabsSettings")
+ voice_id: str | None = Field(None, alias="voiceId")
+ rate: float | None = None
+ emotion: VoiceEmotion | None = None
+ elevenlabs_settings: ElevenLabsSettings | None = Field(None, alias="elevenlabsSettings")
class NewSessionRequest(BaseModel):
@@ -93,15 +93,15 @@ class NewSessionRequest(BaseModel):
activity_idle_timeout (Optional[int]): Timeout in seconds for activity-based idle detection.
"""
- quality: Optional[AvatarQuality] = None
- avatar_id: Optional[str] = None
- voice: Optional[VoiceSettings] = None
- video_encoding: Optional[VideoEncoding] = None
- knowledge_id: Optional[str] = None
- knowledge_base: Optional[str] = None
+ quality: AvatarQuality | None = None
+ avatar_id: str | None = None
+ voice: VoiceSettings | None = None
+ video_encoding: VideoEncoding | None = None
+ knowledge_id: str | None = None
+ knowledge_base: str | None = None
version: Literal["v2"] = "v2"
- disable_idle_timeout: Optional[bool] = None
- activity_idle_timeout: Optional[int] = None
+ disable_idle_timeout: bool | None = None
+ activity_idle_timeout: int | None = None
class HeyGenSession(BaseModel):
@@ -153,7 +153,7 @@ class HeyGenApi(BaseAvatarApi):
self.api_key = api_key
self.session = session
- async def _request(self, path: str, params: Dict[str, Any], expect_data: bool = True) -> Any:
+ async def _request(self, path: str, params: dict[str, Any], expect_data: bool = True) -> Any:
"""Make a POST request to the HeyGen API.
Args:
diff --git a/src/pipecat/services/heygen/api_liveavatar.py b/src/pipecat/services/heygen/api_liveavatar.py
index 7b9119542..14e941852 100644
--- a/src/pipecat/services/heygen/api_liveavatar.py
+++ b/src/pipecat/services/heygen/api_liveavatar.py
@@ -9,8 +9,8 @@
API to communicate with LiveAvatar Streaming API.
"""
-from enum import Enum
-from typing import Any, Dict, Optional
+from enum import StrEnum
+from typing import Any
import aiohttp
from loguru import logger
@@ -28,8 +28,8 @@ class AvatarPersona(BaseModel):
language (str): Language code for the avatar (default: "en").
"""
- voice_id: Optional[str] = None
- context_id: Optional[str] = None
+ voice_id: str | None = None
+ context_id: str | None = None
language: str = "en"
@@ -47,14 +47,14 @@ class CustomSDKLiveKitConfig(BaseModel):
livekit_client_token: str
-class VideoEncoding(str, Enum):
+class VideoEncoding(StrEnum):
"""Enum representing the video encoding."""
H264 = "H264"
VP8 = "VP8"
-class VideoQuality(str, Enum):
+class VideoQuality(StrEnum):
"""Enum representing different avatar quality levels."""
low = "low"
@@ -84,10 +84,10 @@ class LiveAvatarNewSessionRequest(BaseModel):
mode: str = "LITE"
avatar_id: str
- video_settings: Optional[VideoSettings] = VideoSettings(encoding=VideoEncoding.VP8)
- is_sandbox: Optional[bool] = False
- avatar_persona: Optional[AvatarPersona] = None
- livekit_config: Optional[CustomSDKLiveKitConfig] = None
+ video_settings: VideoSettings | None = VideoSettings(encoding=VideoEncoding.VP8)
+ is_sandbox: bool | None = False
+ avatar_persona: AvatarPersona | None = None
+ livekit_config: CustomSDKLiveKitConfig | None = None
class SessionTokenData(BaseModel):
@@ -186,8 +186,8 @@ class LiveAvatarApi(BaseAvatarApi):
self,
method: str,
path: str,
- params: Optional[Dict[str, Any]] = None,
- bearer_token: Optional[str] = None,
+ params: dict[str, Any] | None = None,
+ bearer_token: str | None = None,
) -> Any:
"""Make a request to the LiveAvatar API.
diff --git a/src/pipecat/services/heygen/client.py b/src/pipecat/services/heygen/client.py
index 7f99502b6..cc6451a25 100644
--- a/src/pipecat/services/heygen/client.py
+++ b/src/pipecat/services/heygen/client.py
@@ -16,8 +16,8 @@ import base64
import json
import time
import uuid
+from collections.abc import Awaitable, Callable
from enum import Enum
-from typing import Awaitable, Callable, Optional, Union
import aiohttp
from loguru import logger
@@ -93,8 +93,8 @@ class HeyGenClient:
api_key: str,
session: aiohttp.ClientSession,
params: TransportParams,
- session_request: Optional[Union[LiveAvatarNewSessionRequest, NewSessionRequest]] = None,
- service_type: Optional[ServiceType] = None,
+ session_request: LiveAvatarNewSessionRequest | NewSessionRequest | None = None,
+ service_type: ServiceType | None = None,
callbacks: HeyGenCallbacks,
connect_as_user: bool = False,
) -> None:
@@ -149,16 +149,16 @@ class HeyGenClient:
else:
self._api = LiveAvatarApi(api_key, session=session)
- self._heyGen_session: Optional[StandardSessionResponse] = None
+ self._heyGen_session: StandardSessionResponse | None = None
self._websocket = None
- self._task_manager: Optional[BaseTaskManager] = None
+ self._task_manager: BaseTaskManager | None = None
self._params = params
self._in_sample_rate = 0
self._out_sample_rate = 0
self._connected = False
self._session_request = session_request
self._callbacks = callbacks
- self._event_queue: Optional[asyncio.Queue] = None
+ self._event_queue: asyncio.Queue | None = None
self._event_task = None
# Currently supporting to capture the audio and video from a single participant
self._video_task = None
diff --git a/src/pipecat/services/heygen/video.py b/src/pipecat/services/heygen/video.py
index 9a20f35ef..11bc06ca2 100644
--- a/src/pipecat/services/heygen/video.py
+++ b/src/pipecat/services/heygen/video.py
@@ -13,7 +13,6 @@ audio/video streaming capabilities through the HeyGen API.
import asyncio
from dataclasses import dataclass
-from typing import Optional, Union
import aiohttp
from loguru import logger
@@ -90,9 +89,9 @@ class HeyGenVideoService(AIService):
*,
api_key: str,
session: aiohttp.ClientSession,
- session_request: Optional[Union[LiveAvatarNewSessionRequest, NewSessionRequest]] = None,
- service_type: Optional[ServiceType] = None,
- settings: Optional[Settings] = None,
+ session_request: LiveAvatarNewSessionRequest | NewSessionRequest | None = None,
+ service_type: ServiceType | None = None,
+ settings: Settings | None = None,
**kwargs,
) -> None:
"""Initialize the HeyGen video service.
@@ -113,8 +112,8 @@ class HeyGenVideoService(AIService):
super().__init__(settings=default_settings, **kwargs)
self._api_key = api_key
self._session = session
- self._client: Optional[HeyGenClient] = None
- self._send_task: Optional[asyncio.Task] = None
+ self._client: HeyGenClient | None = None
+ self._send_task: asyncio.Task | None = None
self._resampler = create_stream_resampler()
self._is_interrupting = False
self._session_request = session_request
@@ -367,7 +366,7 @@ class HeyGenVideoService(AIService):
await self._client.agent_speak(bytes(chunk), self._event_id)
self._queue.task_done()
- except asyncio.TimeoutError:
+ except TimeoutError:
# Bot has stopped speaking
if self._event_id is not None:
await self._client.agent_speak_end(self._event_id)
diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py
index ea43a530d..ff82a9fc2 100644
--- a/src/pipecat/services/hume/tts.py
+++ b/src/pipecat/services/hume/tts.py
@@ -7,8 +7,9 @@
import base64
import os
import warnings
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
import httpx
from loguru import logger
@@ -93,18 +94,18 @@ class HumeTTSService(TTSService):
trailing_silence: Seconds of silence to append at the end (0-5).
"""
- description: Optional[str] = None
- speed: Optional[float] = None
- trailing_silence: Optional[float] = None
+ description: str | None = None
+ speed: float | None = None
+ trailing_silence: float | None = None
def __init__(
self,
*,
- api_key: Optional[str] = None,
- voice_id: Optional[str] = None,
- params: Optional[InputParams] = None,
- sample_rate: Optional[int] = HUME_SAMPLE_RATE,
- settings: Optional[Settings] = None,
+ api_key: str | None = None,
+ voice_id: str | None = None,
+ params: InputParams | None = None,
+ sample_rate: int | None = HUME_SAMPLE_RATE,
+ settings: Settings | None = None,
**kwargs,
) -> None:
"""Initialize the HumeTTSService.
diff --git a/src/pipecat/services/image_service.py b/src/pipecat/services/image_service.py
index f99909444..df8ef66fe 100644
--- a/src/pipecat/services/image_service.py
+++ b/src/pipecat/services/image_service.py
@@ -11,7 +11,7 @@ text prompts into images.
"""
from abc import abstractmethod
-from typing import AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
from pipecat.frames.frames import Frame, TextFrame
from pipecat.processors.frame_processor import FrameDirection
@@ -27,7 +27,7 @@ class ImageGenService(AIService):
generation functionality using their specific AI service.
"""
- def __init__(self, *, settings: Optional[ImageGenSettings] = None, **kwargs):
+ def __init__(self, *, settings: ImageGenSettings | None = None, **kwargs):
"""Initialize the image generation service.
Args:
diff --git a/src/pipecat/services/inworld/realtime/events.py b/src/pipecat/services/inworld/realtime/events.py
index 55f7f28bd..6ee3f05b3 100644
--- a/src/pipecat/services/inworld/realtime/events.py
+++ b/src/pipecat/services/inworld/realtime/events.py
@@ -12,7 +12,7 @@ https://docs.inworld.ai/api-reference/realtimeAPI/realtime/realtime-websocket
import json
import uuid
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
@@ -84,10 +84,10 @@ class TurnDetection(BaseModel):
interrupt_response: Whether user speech interrupts the current response.
"""
- type: Optional[Literal["server_vad", "semantic_vad"]] = "semantic_vad"
- eagerness: Optional[str] = None
- create_response: Optional[bool] = None
- interrupt_response: Optional[bool] = None
+ type: Literal["server_vad", "semantic_vad"] | None = "semantic_vad"
+ eagerness: str | None = None
+ create_response: bool | None = None
+ interrupt_response: bool | None = None
class InputTranscription(BaseModel):
@@ -97,7 +97,7 @@ class InputTranscription(BaseModel):
model: The STT model to use for transcription.
"""
- model: Optional[str] = None
+ model: str | None = None
#
@@ -114,9 +114,9 @@ class AudioInput(BaseModel):
turn_detection: Configuration for turn detection.
"""
- format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None
- transcription: Optional[InputTranscription] = None
- turn_detection: Optional[TurnDetection] = None
+ format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None
+ transcription: InputTranscription | None = None
+ turn_detection: TurnDetection | None = None
class AudioOutput(BaseModel):
@@ -128,9 +128,9 @@ class AudioOutput(BaseModel):
voice: The voice ID to use (e.g. "Sarah", "Clive").
"""
- format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None
- model: Optional[str] = None
- voice: Optional[str] = None
+ format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None
+ model: str | None = None
+ voice: str | None = None
class AudioConfiguration(BaseModel):
@@ -141,8 +141,8 @@ class AudioConfiguration(BaseModel):
output: Configuration for output audio.
"""
- input: Optional[AudioInput] = None
- output: Optional[AudioOutput] = None
+ input: AudioInput | None = None
+ output: AudioOutput | None = None
#
@@ -163,11 +163,11 @@ class FunctionTool(BaseModel):
type: Literal["function"] = "function"
name: str
description: str
- parameters: Dict[str, Any]
+ parameters: dict[str, Any]
# Union type for Inworld tools
-InworldTool = Union[FunctionTool, Dict[str, Any]]
+InworldTool = FunctionTool | dict[str, Any]
#
@@ -191,15 +191,15 @@ class SessionProperties(BaseModel):
# Needed to support ToolSchema in tools field.
model_config = ConfigDict(arbitrary_types_allowed=True)
- type: Optional[str] = "realtime"
- model: Optional[str] = None
- instructions: Optional[str] = None
- temperature: Optional[float] = None
- output_modalities: Optional[List[str]] = None
- audio: Optional[AudioConfiguration] = None
+ type: str | None = "realtime"
+ model: str | None = None
+ instructions: str | None = None
+ temperature: float | None = None
+ output_modalities: list[str] | None = None
+ audio: AudioConfiguration | None = None
# Tools can be ToolsSchema when provided by user, or list of dicts for API
- tools: Optional[ToolsSchema | List[InworldTool]] = None
- provider_data: Optional[Dict[str, Any]] = None
+ tools: ToolsSchema | list[InworldTool] | None = None
+ provider_data: dict[str, Any] | None = None
#
@@ -218,9 +218,9 @@ class ItemContent(BaseModel):
"""
type: Literal["text", "audio", "input_text", "input_audio", "output_text", "output_audio"]
- text: Optional[str] = None
- audio: Optional[str] = None # base64-encoded audio
- transcript: Optional[str] = None
+ text: str | None = None
+ audio: str | None = None # base64-encoded audio
+ transcript: str | None = None
class ConversationItem(BaseModel):
@@ -240,15 +240,15 @@ class ConversationItem(BaseModel):
"""
id: str = Field(default_factory=lambda: str(uuid.uuid4().hex))
- object: Optional[Literal["realtime.item"]] = None
+ object: Literal["realtime.item"] | None = None
type: Literal["message", "function_call", "function_call_output"]
- status: Optional[Literal["completed", "in_progress", "incomplete"]] = None
- role: Optional[Literal["user", "assistant", "system", "tool"]] = None
- content: Optional[List[ItemContent]] = None
- call_id: Optional[str] = None
- name: Optional[str] = None
- arguments: Optional[str] = None
- output: Optional[str] = None
+ status: Literal["completed", "in_progress", "incomplete"] | None = None
+ role: Literal["user", "assistant", "system", "tool"] | None = None
+ content: list[ItemContent] | None = None
+ call_id: str | None = None
+ name: str | None = None
+ arguments: str | None = None
+ output: str | None = None
class RealtimeConversation(BaseModel):
@@ -270,7 +270,7 @@ class ResponseProperties(BaseModel):
modalities: Output modalities for the response (text, audio, or both).
"""
- modalities: Optional[List[Literal["text", "audio"]]] = ["text", "audio"]
+ modalities: list[Literal["text", "audio"]] | None = ["text", "audio"]
#
@@ -289,11 +289,11 @@ class RealtimeError(BaseModel):
event_id: Event ID associated with the error, if applicable.
"""
- type: Optional[str] = None
- code: Optional[str] = ""
+ type: str | None = None
+ code: str | None = ""
message: str
- param: Optional[str] = None
- event_id: Optional[str] = None
+ param: str | None = None
+ event_id: str | None = None
#
@@ -367,7 +367,7 @@ class ConversationItemCreateEvent(ClientEvent):
"""
type: Literal["conversation.item.create"] = "conversation.item.create"
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
item: ConversationItem
@@ -380,7 +380,7 @@ class ResponseCreateEvent(ClientEvent):
"""
type: Literal["response.create"] = "response.create"
- response: Optional[ResponseProperties] = None
+ response: ResponseProperties | None = None
class ResponseCancelEvent(ClientEvent):
@@ -423,7 +423,7 @@ class SessionCreatedEvent(ServerEvent):
"""
type: Literal["session.created"]
- session: Optional[SessionProperties] = None
+ session: SessionProperties | None = None
class SessionUpdatedEvent(ServerEvent):
@@ -462,7 +462,7 @@ class ConversationItemAdded(ServerEvent):
"""
type: Literal["conversation.item.added"]
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
item: ConversationItem
@@ -492,7 +492,7 @@ class ConversationItemInputAudioTranscriptionDelta(ServerEvent):
type: Literal["conversation.item.input_audio_transcription.delta"]
item_id: str
- content_index: Optional[int] = None
+ content_index: int | None = None
delta: str
@@ -534,7 +534,7 @@ class InputAudioBufferCommitted(ServerEvent):
"""
type: Literal["input_audio_buffer.committed"]
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
item_id: str
@@ -653,11 +653,11 @@ class ResponseFunctionCallArgumentsDelta(ServerEvent):
"""
type: Literal["response.function_call_arguments.delta"]
- response_id: Optional[str] = None
- item_id: Optional[str] = None
+ response_id: str | None = None
+ item_id: str | None = None
call_id: str
delta: str
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
class ResponseFunctionCallArgumentsDone(ServerEvent):
@@ -673,7 +673,7 @@ class ResponseFunctionCallArgumentsDone(ServerEvent):
type: Literal["response.function_call_arguments.done"]
call_id: str
- name: Optional[str] = None
+ name: str | None = None
arguments: str
@@ -686,9 +686,9 @@ class Usage(BaseModel):
output_tokens: Number of output tokens used.
"""
- total_tokens: Optional[int] = None
- input_tokens: Optional[int] = None
- output_tokens: Optional[int] = None
+ total_tokens: int | None = None
+ input_tokens: int | None = None
+ output_tokens: int | None = None
class Response(BaseModel):
@@ -705,9 +705,9 @@ class Response(BaseModel):
id: str
object: Literal["realtime.response"]
status: Literal["completed", "in_progress", "incomplete", "cancelled", "failed"]
- status_details: Optional[Any] = None
- output: List[ConversationItem]
- usage: Optional[Usage] = None
+ status_details: Any | None = None
+ output: list[ConversationItem]
+ usage: Usage | None = None
class ResponseDone(ServerEvent):
@@ -721,7 +721,7 @@ class ResponseDone(ServerEvent):
type: Literal["response.done"]
response: Response
- usage: Optional[Usage] = None
+ usage: Usage | None = None
class ResponseOutputItemDone(ServerEvent):
@@ -749,7 +749,7 @@ class ContentPart(BaseModel):
"""
type: str
- transcript: Optional[str] = None
+ transcript: str | None = None
class ResponseContentPartAdded(ServerEvent):
diff --git a/src/pipecat/services/inworld/realtime/llm.py b/src/pipecat/services/inworld/realtime/llm.py
index 79ee186ca..0c9ea623d 100644
--- a/src/pipecat/services/inworld/realtime/llm.py
+++ b/src/pipecat/services/inworld/realtime/llm.py
@@ -14,9 +14,10 @@ import base64
import json
import time
import urllib.parse
+from collections.abc import Mapping
from dataclasses import dataclass, field
from dataclasses import fields as dataclass_fields
-from typing import Any, Dict, Literal, Mapping, Optional, Type
+from typing import Any, Literal
from loguru import logger
@@ -117,7 +118,7 @@ class InworldRealtimeLLMSettings(LLMSettings):
# -- apply_update override -----------------------------------------------
- def apply_update(self, delta: "InworldRealtimeLLMService.Settings") -> Dict[str, Any]:
+ def apply_update(self, delta: "InworldRealtimeLLMService.Settings") -> dict[str, Any]:
"""Merge a delta, keeping ``model``/``system_instruction`` in sync with SP.
When the delta contains ``session_properties``, it **replaces** the
@@ -147,7 +148,7 @@ class InworldRealtimeLLMSettings(LLMSettings):
@classmethod
def from_mapping(
- cls: Type["InworldRealtimeLLMService.Settings"], settings: Mapping[str, Any]
+ cls: type["InworldRealtimeLLMService.Settings"], settings: Mapping[str, Any]
) -> "InworldRealtimeLLMService.Settings":
"""Build a delta from a plain dict, routing SP keys into ``session_properties``.
@@ -157,9 +158,9 @@ class InworldRealtimeLLMSettings(LLMSettings):
"""
own_field_names = {f.name for f in dataclass_fields(cls)} - {"extra"}
- top: Dict[str, Any] = {}
- sp_dict: Dict[str, Any] = {}
- extra: Dict[str, Any] = {}
+ top: dict[str, Any] = {}
+ sp_dict: dict[str, Any] = {}
+ extra: dict[str, Any] = {}
sp_keys = set(events.SessionProperties.model_fields.keys()) - {"model"}
@@ -249,13 +250,13 @@ class InworldRealtimeLLMService(LLMService):
self,
*,
api_key: str,
- llm_model: Optional[str] = None,
- voice: Optional[str] = None,
- tts_model: Optional[str] = None,
- stt_model: Optional[str] = None,
+ llm_model: str | None = None,
+ voice: str | None = None,
+ tts_model: str | None = None,
+ stt_model: str | None = None,
base_url: str = "wss://api.inworld.ai/api/v1/realtime/session",
auth_type: Literal["basic", "bearer"] = "basic",
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
start_audio_paused: bool = False,
**kwargs,
):
@@ -375,7 +376,7 @@ class InworldRealtimeLLMService(LLMService):
"""
self._audio_input_paused = paused
- def _get_configured_sample_rate(self, direction: str) -> Optional[int]:
+ def _get_configured_sample_rate(self, direction: str) -> int | None:
"""Get manually configured sample rate for input or output.
Args:
diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py
index 215c0f747..65922e3a1 100644
--- a/src/pipecat/services/inworld/tts.py
+++ b/src/pipecat/services/inworld/tts.py
@@ -17,19 +17,13 @@ import asyncio
import base64
import json
import uuid
+from collections.abc import AsyncGenerator, Mapping
from dataclasses import dataclass, field
from typing import (
Any,
- AsyncGenerator,
ClassVar,
- Dict,
- List,
Literal,
- Mapping,
- Optional,
Self,
- Set,
- Tuple,
)
import aiohttp
@@ -81,7 +75,7 @@ class InworldTTSSettings(TTSSettings):
speaking_rate: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
temperature: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
- _aliases: ClassVar[Dict[str, str]] = {
+ _aliases: ClassVar[dict[str, str]] = {
"voiceId": "voice",
"modelId": "model",
}
@@ -118,23 +112,23 @@ class InworldHttpTTSService(TTSService):
timestamp_transport_strategy: The strategy to use for timestamp transport.
"""
- temperature: Optional[float] = None
- speaking_rate: Optional[float] = None
- timestamp_transport_strategy: Optional[Literal["ASYNC", "SYNC"]] = "ASYNC"
+ temperature: float | None = None
+ speaking_rate: float | None = None
+ timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC"
def __init__(
self,
*,
api_key: str,
aiohttp_session: aiohttp.ClientSession,
- voice_id: Optional[str] = None,
- model: Optional[str] = None,
+ voice_id: str | None = None,
+ model: str | None = None,
streaming: bool = True,
- sample_rate: Optional[int] = None,
+ sample_rate: int | None = None,
encoding: str = "LINEAR16",
- timestamp_transport_strategy: Optional[Literal["ASYNC", "SYNC"]] = "ASYNC",
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC",
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Inworld TTS service.
@@ -255,8 +249,8 @@ class InworldHttpTTSService(TTSService):
def _calculate_word_times(
self,
- timestamp_info: Dict[str, Any],
- ) -> Tuple[List[Tuple[str, float]], float]:
+ timestamp_info: dict[str, Any],
+ ) -> tuple[list[tuple[str, float]], float]:
"""Calculate word timestamps from Inworld HTTP API word-level response.
Note: Inworld HTTP provides timestamps that reset for each request.
@@ -269,7 +263,7 @@ class InworldHttpTTSService(TTSService):
Tuple of (word_times, chunk_end_time) where chunk_end_time is the
end time of the last word in this chunk (not cumulative).
"""
- word_times: List[Tuple[str, float]] = []
+ word_times: list[tuple[str, float]] = []
chunk_end_time = 0.0
alignment = timestamp_info.get("wordAlignment", {})
@@ -534,30 +528,30 @@ class InworldTTSService(WebsocketTTSService):
timestamp_transport_strategy: The strategy to use for timestamp transport.
"""
- temperature: Optional[float] = None
- speaking_rate: Optional[float] = None
- apply_text_normalization: Optional[str] = None
- max_buffer_delay_ms: Optional[int] = None
- buffer_char_threshold: Optional[int] = None
- auto_mode: Optional[bool] = True
- timestamp_transport_strategy: Optional[Literal["ASYNC", "SYNC"]] = "ASYNC"
+ temperature: float | None = None
+ speaking_rate: float | None = None
+ apply_text_normalization: str | None = None
+ max_buffer_delay_ms: int | None = None
+ buffer_char_threshold: int | None = None
+ auto_mode: bool | None = True
+ timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC"
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
- model: Optional[str] = None,
+ voice_id: str | None = None,
+ model: str | None = None,
url: str = "wss://api.inworld.ai/tts/v1/voice:streamBidirectional",
- sample_rate: Optional[int] = None,
+ sample_rate: int | None = None,
encoding: str = "LINEAR16",
- auto_mode: Optional[bool] = None,
- apply_text_normalization: Optional[str] = None,
- timestamp_transport_strategy: Optional[Literal["ASYNC", "SYNC"]] = "ASYNC",
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- aggregate_sentences: Optional[bool] = None,
- text_aggregation_mode: Optional[TextAggregationMode] = None,
+ auto_mode: bool | None = None,
+ apply_text_normalization: str | None = None,
+ timestamp_transport_strategy: Literal["ASYNC", "SYNC"] | None = "ASYNC",
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ aggregate_sentences: bool | None = None,
+ text_aggregation_mode: TextAggregationMode | None = None,
append_trailing_space: bool = True,
**kwargs: Any,
):
@@ -684,8 +678,8 @@ class InworldTTSService(WebsocketTTSService):
# Fallback tracking for when timestamps are not received. Without
# timestamps, interruptions commit the full text rather than only the
# portion that was spoken.
- self._context_texts: Dict[str, str] = {}
- self._contexts_with_timestamps: Set[str] = set()
+ self._context_texts: dict[str, str] = {}
+ self._contexts_with_timestamps: set[str] = set()
# Init-only config (not runtime-updatable).
self._audio_encoding = encoding
@@ -730,7 +724,7 @@ class InworldTTSService(WebsocketTTSService):
await super().cancel(frame)
await self._disconnect()
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio without closing the context.
This triggers synthesis of all accumulated text in the buffer while
@@ -758,7 +752,7 @@ class InworldTTSService(WebsocketTTSService):
except Exception as e:
logger.warning(f"{self}: Failed to pre-open context: {e}")
- def _calculate_word_times(self, timestamp_info: Dict[str, Any]) -> List[Tuple[str, float]]:
+ def _calculate_word_times(self, timestamp_info: dict[str, Any]) -> list[tuple[str, float]]:
"""Calculate word timestamps from Inworld WebSocket API response.
Adds cumulative time offset to maintain monotonically increasing timestamps
@@ -771,7 +765,7 @@ class InworldTTSService(WebsocketTTSService):
Returns:
List of (word, timestamp) tuples with cumulative offset applied.
"""
- word_times: List[Tuple[str, float]] = []
+ word_times: list[tuple[str, float]] = []
alignment = timestamp_info.get("wordAlignment", {})
words = alignment.get("words", [])
@@ -1079,7 +1073,7 @@ class InworldTTSService(WebsocketTTSService):
if self._settings.speaking_rate is not None:
audio_config["speakingRate"] = self._settings.speaking_rate
- create_config: Dict[str, Any] = {
+ create_config: dict[str, Any] = {
"voiceId": self._settings.voice,
"modelId": self._settings.model,
"audioConfig": audio_config,
diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py
index 4756d4e74..9e4dc1869 100644
--- a/src/pipecat/services/kokoro/tts.py
+++ b/src/pipecat/services/kokoro/tts.py
@@ -7,9 +7,9 @@
"""Kokoro TTS service implementation using kokoro-onnx."""
import os
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
from pathlib import Path
-from typing import AsyncGenerator, Optional
import numpy as np
from loguru import logger
@@ -119,11 +119,11 @@ class KokoroTTSService(TTSService):
def __init__(
self,
*,
- voice_id: Optional[str] = None,
- model_path: Optional[str] = None,
- voices_path: Optional[str] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ voice_id: str | None = None,
+ model_path: str | None = None,
+ voices_path: str | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Kokoro TTS service.
diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py
index 036818370..979d5c430 100644
--- a/src/pipecat/services/llm_service.py
+++ b/src/pipecat/services/llm_service.py
@@ -6,22 +6,17 @@
"""Base classes for Large Language Model services with function calling support."""
+from __future__ import annotations
+
import asyncio
import json
import uuid
import warnings
+from collections.abc import Awaitable, Callable, Mapping, Sequence
from dataclasses import dataclass
from typing import (
Any,
- Awaitable,
- Callable,
- Dict,
- List,
- Mapping,
- Optional,
Protocol,
- Sequence,
- Type,
)
from loguru import logger
@@ -86,7 +81,7 @@ class FunctionCallResultCallback(Protocol):
"""
async def __call__(
- self, result: Any, *, properties: Optional[FunctionCallResultProperties] = None
+ self, result: Any, *, properties: FunctionCallResultProperties | None = None
) -> None:
"""Call the result callback.
@@ -117,7 +112,7 @@ class FunctionCallParams:
function_name: str
tool_call_id: str
arguments: Mapping[str, Any]
- llm: "LLMService"
+ llm: LLMService
context: LLMContext
result_callback: FunctionCallResultCallback
@@ -139,10 +134,10 @@ class FunctionCallRegistryItem:
``function_call_timeout_secs`` for this specific function.
"""
- function_name: Optional[str]
- handler: FunctionCallHandler | "DirectFunctionWrapper"
+ function_name: str | None
+ handler: FunctionCallHandler | DirectFunctionWrapper
cancel_on_interruption: bool
- timeout_secs: Optional[float] = None
+ timeout_secs: float | None = None
@dataclass
@@ -168,8 +163,8 @@ class FunctionCallRunnerItem:
tool_call_id: str
arguments: Mapping[str, Any]
context: LLMContext
- run_llm: Optional[bool] = None
- group_id: Optional[str] = None
+ run_llm: bool | None = None
+ group_id: str | None = None
class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
@@ -207,15 +202,15 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
# OpenAILLMAdapter is used as the default adapter since it aligns with most LLM implementations.
# However, subclasses should override this with a more specific adapter when necessary.
- adapter_class: Type[BaseLLMAdapter] = OpenAILLMAdapter
+ adapter_class: type[BaseLLMAdapter] = OpenAILLMAdapter
def __init__(
self,
run_in_parallel: bool = True,
group_parallel_tools: bool = True,
- function_call_timeout_secs: Optional[float] = None,
+ function_call_timeout_secs: float | None = None,
enable_async_tool_cancellation: bool = False,
- settings: Optional[LLMSettings] = None,
+ settings: LLMSettings | None = None,
**kwargs,
):
"""Initialize the LLM service.
@@ -250,13 +245,13 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
self._enable_async_tool_cancellation: bool = enable_async_tool_cancellation
self._filter_incomplete_user_turns: bool = False
self._async_tool_cancellation_enabled: bool = False
- self._base_system_instruction: Optional[str] = None
+ self._base_system_instruction: str | None = None
self._adapter = self.adapter_class()
- self._functions: Dict[Optional[str], FunctionCallRegistryItem] = {}
- self._function_call_tasks: Dict[Optional[asyncio.Task], FunctionCallRunnerItem] = {}
- self._sequential_runner_task: Optional[asyncio.Task] = None
- self._skip_tts: Optional[bool] = None
- self._summary_task: Optional[asyncio.Task] = None
+ self._functions: dict[str | None, FunctionCallRegistryItem] = {}
+ self._function_call_tasks: dict[asyncio.Task | None, FunctionCallRunnerItem] = {}
+ self._sequential_runner_task: asyncio.Task | None = None
+ self._skip_tts: bool | None = None
+ self._summary_task: asyncio.Task | None = None
self._register_event_handler("on_function_calls_started")
self._register_event_handler("on_function_calls_cancelled")
@@ -284,9 +279,9 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
async def run_inference(
self,
context: LLMContext,
- max_tokens: Optional[int] = None,
- system_instruction: Optional[str] = None,
- ) -> Optional[str]:
+ max_tokens: int | None = None,
+ system_instruction: str | None = None,
+ ) -> str | None:
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
Must be implemented by subclasses.
@@ -495,7 +490,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
self._generate_summary(frame),
timeout=timeout,
)
- except asyncio.TimeoutError:
+ except TimeoutError:
await self.push_error(error_msg=f"Context summarization timed out after {timeout}s")
except Exception as e:
error = f"Error generating context summary: {e}"
@@ -580,11 +575,11 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
def register_function(
self,
- function_name: Optional[str],
+ function_name: str | None,
handler: Any,
*,
cancel_on_interruption: bool = True,
- timeout_secs: Optional[float] = None,
+ timeout_secs: float | None = None,
):
"""Register a function handler for LLM function calls.
@@ -621,7 +616,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
handler: DirectFunction,
*,
cancel_on_interruption: bool = True,
- timeout_secs: Optional[float] = None,
+ timeout_secs: float | None = None,
):
"""Register a direct function handler for LLM function calls.
@@ -653,7 +648,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
timeout_secs=timeout_secs,
)
- def unregister_function(self, function_name: Optional[str]):
+ def unregister_function(self, function_name: str | None):
"""Remove a registered function handler.
Args:
@@ -806,12 +801,12 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
group_id=runner_item.group_id,
)
- timeout_task: Optional[asyncio.Task] = None
+ timeout_task: asyncio.Task | None = None
# Single callback for both intermediate updates and final results.
# Pass properties=FunctionCallResultProperties(is_final=False) for updates.
async def function_call_result_callback(
- result: Any, *, properties: Optional[FunctionCallResultProperties] = None
+ result: Any, *, properties: FunctionCallResultProperties | None = None
):
is_final = properties.is_final if properties else True
if not is_final and item.cancel_on_interruption:
@@ -940,7 +935,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
self._functions.pop(CANCEL_ASYNC_TOOL_NAME, None)
self._compose_system_instruction()
- async def _cancel_async_tool_call_handler(self, params: "FunctionCallParams"):
+ async def _cancel_async_tool_call_handler(self, params: FunctionCallParams):
"""Handle a ``cancel_async_tool_call`` invocation from the LLM.
Args:
@@ -948,7 +943,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
"""
logger.debug(f"{self}: cancel_async_tool_call invoked")
- tool_call_id: Optional[str] = params.arguments.get("tool_call_id")
+ tool_call_id: str | None = params.arguments.get("tool_call_id")
if not tool_call_id:
logger.warning(f"{self} cancel_async_tool_call called with no tool_call_id")
await params.result_callback({"cancelled": None})
@@ -1003,7 +998,7 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService):
if cancelled_items:
await self._call_event_handler("on_function_calls_cancelled", cancelled_items)
- async def _cancel_function_call(self, function_name: Optional[str]):
+ async def _cancel_function_call(self, function_name: str | None):
cancelled_tasks = set()
cancelled_items = []
for task, runner_item in self._function_call_tasks.items():
diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py
index 0df70fd19..8daa1902a 100644
--- a/src/pipecat/services/lmnt/tts.py
+++ b/src/pipecat/services/lmnt/tts.py
@@ -7,8 +7,9 @@
"""LMNT text-to-speech service implementation."""
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
from loguru import logger
@@ -36,7 +37,7 @@ except ModuleNotFoundError as e:
raise Exception(f"Missing module: {e}")
-def language_to_lmnt_language(language: Language) -> Optional[str]:
+def language_to_lmnt_language(language: Language) -> str | None:
"""Convert a Language enum to LMNT language code.
Args:
@@ -94,12 +95,12 @@ class LmntTTSService(InterruptibleTTSService):
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
- sample_rate: Optional[int] = None,
+ voice_id: str | None = None,
+ sample_rate: int | None = None,
language: Language = Language.EN,
output_format: str = "pcm_s16le",
- model: Optional[str] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the LMNT TTS service.
@@ -173,7 +174,7 @@ class LmntTTSService(InterruptibleTTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to LMNT service language format.
Args:
@@ -300,7 +301,7 @@ class LmntTTSService(InterruptibleTTSService):
return self._websocket
raise Exception("Websocket not connected")
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio synthesis."""
if not self._websocket or self._websocket.state is State.CLOSED:
return
diff --git a/src/pipecat/services/mcp_service.py b/src/pipecat/services/mcp_service.py
index 275569714..ee962cb86 100644
--- a/src/pipecat/services/mcp_service.py
+++ b/src/pipecat/services/mcp_service.py
@@ -7,8 +7,9 @@
"""MCP (Model Context Protocol) client for integrating external tools with LLMs."""
import json
+from collections.abc import Callable
from contextlib import AsyncExitStack
-from typing import Any, Callable, Dict, List, Optional, TypeAlias
+from typing import Any, TypeAlias
from loguru import logger
@@ -53,8 +54,8 @@ class MCPClient(BaseObject):
def __init__(
self,
server_params: ServerParameters,
- tools_filter: Optional[List[str]] = None,
- tools_output_filters: Optional[Dict[str, Callable[[Any], Any]]] = None,
+ tools_filter: list[str] | None = None,
+ tools_output_filters: dict[str, Callable[[Any], Any]] | None = None,
**kwargs,
):
"""Initialize the MCP client with server parameters.
@@ -70,8 +71,8 @@ class MCPClient(BaseObject):
self._server_params = server_params
self._tools_filter = tools_filter
self._tools_output_filters = tools_output_filters or {}
- self._exit_stack: Optional[AsyncExitStack] = None
- self._active_session: Optional[ClientSession] = None
+ self._exit_stack: AsyncExitStack | None = None
+ self._active_session: ClientSession | None = None
if not isinstance(
server_params,
@@ -195,7 +196,7 @@ class MCPClient(BaseObject):
llm.register_function(function_schema.name, self._tool_wrapper)
def _convert_mcp_schema_to_pipecat(
- self, tool_name: str, tool_schema: Dict[str, Any]
+ self, tool_name: str, tool_schema: dict[str, Any]
) -> FunctionSchema:
"""Convert an mcp tool schema to Pipecat's FunctionSchema format.
@@ -276,7 +277,7 @@ class MCPClient(BaseObject):
async def _list_tools_helper(self, session):
available_tools = await session.list_tools()
- tool_schemas: List[FunctionSchema] = []
+ tool_schemas: list[FunctionSchema] = []
logger.debug(f"Found {len(available_tools.tools)} available tools")
diff --git a/src/pipecat/services/mem0/memory.py b/src/pipecat/services/mem0/memory.py
index 91396cab4..5359284f5 100644
--- a/src/pipecat/services/mem0/memory.py
+++ b/src/pipecat/services/mem0/memory.py
@@ -12,7 +12,7 @@ historical information.
"""
import asyncio
-from typing import Any, Dict, List, Optional
+from typing import Any
from loguru import logger
from pydantic import BaseModel, Field
@@ -61,13 +61,13 @@ class Mem0MemoryService(FrameProcessor):
def __init__(
self,
*,
- api_key: Optional[str] = None,
- local_config: Optional[Dict[str, Any]] = None,
- user_id: Optional[str] = None,
- agent_id: Optional[str] = None,
- run_id: Optional[str] = None,
- params: Optional[InputParams] = None,
- host: Optional[str] = None,
+ api_key: str | None = None,
+ local_config: dict[str, Any] | None = None,
+ user_id: str | None = None,
+ agent_id: str | None = None,
+ run_id: str | None = None,
+ params: InputParams | None = None,
+ host: str | None = None,
):
"""Initialize the Mem0 memory service.
@@ -109,7 +109,7 @@ class Mem0MemoryService(FrameProcessor):
self.last_query = None
logger.info(f"Initialized Mem0MemoryService with {user_id=}, {agent_id=}, {run_id=}")
- async def get_memories(self) -> List[Dict[str, Any]]:
+ async def get_memories(self) -> list[dict[str, Any]]:
"""Retrieve all stored memories for the configured user/agent/run IDs.
This is a convenience method for accessing memories outside the pipeline,
@@ -148,7 +148,7 @@ class Mem0MemoryService(FrameProcessor):
logger.error(f"Error retrieving memories from Mem0: {e}")
return []
- async def _store_messages(self, messages: List[Dict[str, Any]]):
+ async def _store_messages(self, messages: list[dict[str, Any]]):
"""Store messages in Mem0.
Runs the blocking Mem0 API call in a background thread to avoid
@@ -174,7 +174,7 @@ class Mem0MemoryService(FrameProcessor):
except Exception as e:
logger.error(f"Error storing messages in Mem0: {e}")
- async def _retrieve_memories(self, query: str) -> List[Dict[str, Any]]:
+ async def _retrieve_memories(self, query: str) -> list[dict[str, Any]]:
"""Retrieve relevant memories from Mem0.
Runs the blocking Mem0 API call in a background thread to avoid
diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py
index a197bdcbe..25b7feade 100644
--- a/src/pipecat/services/minimax/tts.py
+++ b/src/pipecat/services/minimax/tts.py
@@ -11,8 +11,9 @@ for streaming text-to-speech synthesis.
"""
import json
+from collections.abc import AsyncGenerator, Mapping
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Mapping, Optional, Self
+from typing import Any, Self
import aiohttp
from loguru import logger
@@ -30,7 +31,7 @@ from pipecat.transcriptions.language import Language, resolve_language
from pipecat.utils.tracing.service_decorators import traced_tts
-def language_to_minimax_language(language: Language) -> Optional[str]:
+def language_to_minimax_language(language: Language) -> str | None:
"""Convert a Language enum to MiniMax language format.
Args:
@@ -162,14 +163,14 @@ class MiniMaxHttpTTSService(TTSService):
exclude_aggregated_audio: Whether to exclude aggregated audio in final chunk.
"""
- language: Optional[Language] = Language.EN
- speed: Optional[float] = 1.0
- volume: Optional[float] = 1.0
- pitch: Optional[int] = 0
- emotion: Optional[str] = None
- text_normalization: Optional[bool] = None
- latex_read: Optional[bool] = None
- exclude_aggregated_audio: Optional[bool] = None
+ language: Language | None = Language.EN
+ speed: float | None = 1.0
+ volume: float | None = 1.0
+ pitch: int | None = 0
+ emotion: str | None = None
+ text_normalization: bool | None = None
+ latex_read: bool | None = None
+ exclude_aggregated_audio: bool | None = None
def __init__(
self,
@@ -177,13 +178,13 @@ class MiniMaxHttpTTSService(TTSService):
api_key: str,
base_url: str = "https://api.minimax.io/v1/t2a_v2",
group_id: str,
- model: Optional[str] = None,
- voice_id: Optional[str] = None,
+ model: str | None = None,
+ voice_id: str | None = None,
aiohttp_session: aiohttp.ClientSession,
- sample_rate: Optional[int] = None,
+ sample_rate: int | None = None,
stream: bool = True,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the MiniMax TTS service.
@@ -312,7 +313,7 @@ class MiniMaxHttpTTSService(TTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to MiniMax service language format.
Args:
diff --git a/src/pipecat/services/mistral/llm.py b/src/pipecat/services/mistral/llm.py
index d280aaada..c1ac8b652 100644
--- a/src/pipecat/services/mistral/llm.py
+++ b/src/pipecat/services/mistral/llm.py
@@ -6,8 +6,8 @@
"""Mistral LLM service implementation using OpenAI-compatible interface."""
+from collections.abc import Sequence
from dataclasses import dataclass
-from typing import List, Optional, Sequence
from loguru import logger
from openai.types.chat import ChatCompletionMessageParam
@@ -44,8 +44,8 @@ class MistralLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://api.mistral.ai/v1",
- model: Optional[str] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Mistral LLM service.
@@ -93,8 +93,8 @@ class MistralLLMService(OpenAILLMService):
return super().create_client(api_key, base_url, **kwargs)
def _apply_mistral_fixups(
- self, messages: List[ChatCompletionMessageParam]
- ) -> List[ChatCompletionMessageParam]:
+ self, messages: list[ChatCompletionMessageParam]
+ ) -> list[ChatCompletionMessageParam]:
"""Apply fixups to messages to meet Mistral-specific requirements.
1. A "tool"-role message must be followed by an assistant message.
diff --git a/src/pipecat/services/mistral/stt.py b/src/pipecat/services/mistral/stt.py
index c41768d15..3200bc76e 100644
--- a/src/pipecat/services/mistral/stt.py
+++ b/src/pipecat/services/mistral/stt.py
@@ -10,8 +10,9 @@ This module provides a real-time STT service that integrates with Mistral's
Voxtral Realtime transcription API using the Mistral SDK's RealtimeConnection.
"""
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
from loguru import logger
@@ -88,12 +89,12 @@ class MistralSTTService(STTService):
def __init__(
self,
*,
- api_key: Optional[str] = None,
- base_url: Optional[str] = None,
- sample_rate: Optional[int] = None,
- target_streaming_delay_ms: Optional[int] = None,
- ttfs_p99_latency: Optional[float] = MISTRAL_TTFS_P99,
- settings: Optional[Settings] = None,
+ api_key: str | None = None,
+ base_url: str | None = None,
+ sample_rate: int | None = None,
+ target_streaming_delay_ms: int | None = None,
+ ttfs_p99_latency: float | None = MISTRAL_TTFS_P99,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize Mistral STT service.
@@ -128,10 +129,10 @@ class MistralSTTService(STTService):
self._client = Mistral(api_key=api_key, server_url=base_url)
self._target_streaming_delay_ms = target_streaming_delay_ms
- self._connection: Optional[RealtimeConnection] = None
+ self._connection: RealtimeConnection | None = None
self._receive_task = None
self._accumulated_text = ""
- self._detected_language: Optional[str] = None
+ self._detected_language: str | None = None
def can_generate_metrics(self) -> bool:
"""Check if the service can generate processing metrics.
@@ -292,7 +293,7 @@ class MistralSTTService(STTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[str] = None
+ self, transcript: str, is_final: bool, language: str | None = None
):
"""Handle a transcription result with tracing."""
pass
diff --git a/src/pipecat/services/mistral/tts.py b/src/pipecat/services/mistral/tts.py
index d00b98a95..6e660e6e2 100644
--- a/src/pipecat/services/mistral/tts.py
+++ b/src/pipecat/services/mistral/tts.py
@@ -12,8 +12,8 @@ generating speech from text input using HTTP streaming with Server-Sent Events.
import base64
import struct
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import AsyncGenerator, Optional
from loguru import logger
@@ -63,9 +63,9 @@ class MistralTTSService(TTSService):
def __init__(
self,
*,
- api_key: Optional[str] = None,
- sample_rate: Optional[int] = None,
- settings: Optional[Settings] = None,
+ api_key: str | None = None,
+ sample_rate: int | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize Mistral TTS service.
diff --git a/src/pipecat/services/moondream/vision.py b/src/pipecat/services/moondream/vision.py
index 6eeff19cd..70cbe89f5 100644
--- a/src/pipecat/services/moondream/vision.py
+++ b/src/pipecat/services/moondream/vision.py
@@ -11,8 +11,8 @@ for image analysis and description generation.
"""
import asyncio
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import AsyncGenerator, Optional
from loguru import logger
from PIL import Image
@@ -85,10 +85,10 @@ class MoondreamService(VisionService):
def __init__(
self,
*,
- model: Optional[str] = None,
+ model: str | None = None,
revision="2025-01-09",
use_cpu=False,
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Moondream service.
@@ -149,7 +149,7 @@ class MoondreamService(VisionService):
logger.debug(f"Analyzing image (bytes length: {len(frame.image)})")
- def get_image_description(image_bytes: bytes, text: Optional[str]) -> str:
+ def get_image_description(image_bytes: bytes, text: str | None) -> str:
image = Image.frombytes(frame.format, frame.size, image_bytes)
image_embeds = self._model.encode_image(image)
description = self._model.query(image_embeds, text)["answer"]
diff --git a/src/pipecat/services/nebius/llm.py b/src/pipecat/services/nebius/llm.py
index aa26e776f..f2473b719 100644
--- a/src/pipecat/services/nebius/llm.py
+++ b/src/pipecat/services/nebius/llm.py
@@ -7,7 +7,6 @@
"""Nebius LLM service implementation using OpenAI-compatible interface."""
from dataclasses import dataclass
-from typing import Optional
from loguru import logger
@@ -41,7 +40,7 @@ class NebiusLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://api.tokenfactory.nebius.com/v1/",
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Nebius LLM service.
diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py
index 927a8b7b2..5ee19fb3b 100644
--- a/src/pipecat/services/neuphonic/tts.py
+++ b/src/pipecat/services/neuphonic/tts.py
@@ -13,8 +13,9 @@ text-to-speech API for real-time audio synthesis.
import asyncio
import base64
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Optional
+from typing import Any
import aiohttp
from loguru import logger
@@ -43,7 +44,7 @@ except ModuleNotFoundError as e:
raise Exception(f"Missing module: {e}")
-def language_to_neuphonic_lang_code(language: Language) -> Optional[str]:
+def language_to_neuphonic_lang_code(language: Language) -> str | None:
"""Convert a Language enum to Neuphonic language code.
Args:
@@ -101,21 +102,21 @@ class NeuphonicTTSService(InterruptibleTTSService):
speed: Speech speed multiplier. Defaults to 1.0.
"""
- language: Optional[Language] = Language.EN
- speed: Optional[float] = 1.0
+ language: Language | None = Language.EN
+ speed: float | None = 1.0
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
url: str = "wss://api.neuphonic.com",
- sample_rate: Optional[int] = 22050,
+ sample_rate: int | None = 22050,
encoding: str = "pcm_linear",
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- aggregate_sentences: Optional[bool] = None,
- text_aggregation_mode: Optional[TextAggregationMode] = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ aggregate_sentences: bool | None = None,
+ text_aggregation_mode: TextAggregationMode | None = None,
**kwargs,
):
"""Initialize the Neuphonic TTS service.
@@ -197,7 +198,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Neuphonic service language format.
Args:
@@ -244,7 +245,7 @@ class NeuphonicTTSService(InterruptibleTTSService):
await super().cancel(frame)
await self._disconnect()
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio synthesis by sending stop command."""
if self._websocket:
msg = {"text": ""}
@@ -417,20 +418,20 @@ class NeuphonicHttpTTSService(TTSService):
speed: Speech speed multiplier. Defaults to 1.0.
"""
- language: Optional[Language] = Language.EN
- speed: Optional[float] = 1.0
+ language: Language | None = Language.EN
+ speed: float | None = 1.0
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
aiohttp_session: aiohttp.ClientSession,
url: str = "https://api.neuphonic.com",
- sample_rate: Optional[int] = 22050,
- encoding: Optional[str] = "pcm_linear",
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ sample_rate: int | None = 22050,
+ encoding: str | None = "pcm_linear",
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Neuphonic HTTP TTS service.
@@ -502,7 +503,7 @@ class NeuphonicHttpTTSService(TTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Neuphonic service language format.
Args:
@@ -521,7 +522,7 @@ class NeuphonicHttpTTSService(TTSService):
"""
await super().start(frame)
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio synthesis.
Note:
diff --git a/src/pipecat/services/novita/llm.py b/src/pipecat/services/novita/llm.py
index dbe6ff23e..e28b563da 100644
--- a/src/pipecat/services/novita/llm.py
+++ b/src/pipecat/services/novita/llm.py
@@ -7,7 +7,6 @@
"""Novita AI LLM service implementation using OpenAI-compatible interface."""
from dataclasses import dataclass
-from typing import Optional
from loguru import logger
@@ -37,7 +36,7 @@ class NovitaLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://api.novita.ai/openai",
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize Novita AI LLM service.
diff --git a/src/pipecat/services/nvidia/llm.py b/src/pipecat/services/nvidia/llm.py
index a06dfd4da..28b635a62 100644
--- a/src/pipecat/services/nvidia/llm.py
+++ b/src/pipecat/services/nvidia/llm.py
@@ -11,7 +11,6 @@ Microservice) API while maintaining compatibility with the OpenAI-style interfac
"""
from dataclasses import dataclass
-from typing import Optional
from pipecat.metrics.metrics import LLMTokenUsage
from pipecat.processors.aggregators.llm_context import LLMContext
@@ -42,8 +41,8 @@ class NvidiaLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://integrate.api.nvidia.com/v1",
- model: Optional[str] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the NvidiaLLMService.
diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py
index e8ef3dc08..3181b53ba 100644
--- a/src/pipecat/services/nvidia/stt.py
+++ b/src/pipecat/services/nvidia/stt.py
@@ -7,9 +7,10 @@
"""NVIDIA Riva Speech-to-Text service implementations for real-time and batch transcription."""
import asyncio
+from collections.abc import AsyncGenerator, Mapping
from concurrent.futures import CancelledError as FuturesCancelledError
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, List, Mapping, Optional
+from typing import Any
from loguru import logger
from pydantic import BaseModel
@@ -39,7 +40,7 @@ except ModuleNotFoundError as e:
raise Exception(f"Missing module: {e}")
-def language_to_nvidia_riva_language(language: Language) -> Optional[str]:
+def language_to_nvidia_riva_language(language: Language) -> str | None:
"""Maps Language enum to NVIDIA Riva ASR language codes.
Source:
@@ -113,7 +114,7 @@ class NvidiaSegmentedSTTSettings(STTSettings):
profanity_filter: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
automatic_punctuation: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
verbatim_transcripts: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
- boosted_lm_words: List[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+ boosted_lm_words: list[str] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
boosted_lm_score: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
@@ -138,7 +139,7 @@ class NvidiaSTTService(STTService):
language: Target language for transcription. Defaults to EN_US.
"""
- language: Optional[Language] = Language.EN_US
+ language: Language | None = Language.EN_US
def __init__(
self,
@@ -149,11 +150,11 @@ class NvidiaSTTService(STTService):
"function_id": "1598d209-5e27-4d3c-8079-4751568b1081",
"model_name": "parakeet-ctc-1.1b-asr",
},
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
use_ssl: bool = True,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = NVIDIA_TTFS_P99,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = NVIDIA_TTFS_P99,
**kwargs,
):
"""Initialize the NVIDIA Riva STT service.
@@ -355,7 +356,7 @@ class NvidiaSTTService(STTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
pass
@@ -460,11 +461,11 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
boosted_lm_score: Score boost for specified words.
"""
- language: Optional[Language] = Language.EN_US
+ language: Language | None = Language.EN_US
profanity_filter: bool = False
automatic_punctuation: bool = True
verbatim_transcripts: bool = False
- boosted_lm_words: Optional[List[str]] = None
+ boosted_lm_words: list[str] | None = None
boosted_lm_score: float = 4.0
def __init__(
@@ -476,11 +477,11 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
"function_id": "ee8dc628-76de-4acc-8595-1836e7e857bd",
"model_name": "canary-1b-asr",
},
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
use_ssl: bool = True,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = NVIDIA_TTFS_P99,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = NVIDIA_TTFS_P99,
**kwargs,
):
"""Initialize the NVIDIA Riva segmented STT service.
@@ -555,7 +556,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
self._config = None
self._asr_service = None
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert pipecat Language enum to NVIDIA Riva's language code.
Args:
@@ -655,7 +656,7 @@ class NvidiaSegmentedSTTService(SegmentedSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
pass
diff --git a/src/pipecat/services/nvidia/tts.py b/src/pipecat/services/nvidia/tts.py
index daa4e0bd7..0edc0c5f0 100644
--- a/src/pipecat/services/nvidia/tts.py
+++ b/src/pipecat/services/nvidia/tts.py
@@ -20,8 +20,9 @@ import os
import queue
import textwrap
import threading
+from collections.abc import AsyncGenerator, Mapping
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Mapping, Optional
+from typing import Any
from pipecat.utils.tracing.service_decorators import traced_tts
@@ -77,8 +78,8 @@ class _SynthesisStreamState:
response_queue: asyncio.Queue
stop_event: threading.Event
rpc_call: Any = None
- synth_task: Optional[asyncio.Task] = None
- response_task: Optional[asyncio.Task] = None
+ synth_task: asyncio.Task | None = None
+ response_task: asyncio.Task | None = None
class NvidiaTTSService(TTSService):
@@ -104,25 +105,25 @@ class NvidiaTTSService(TTSService):
quality: Audio quality setting (0-100). Defaults to 20.
"""
- language: Optional[Language] = Language.EN_US
- quality: Optional[int] = 20
+ language: Language | None = Language.EN_US
+ quality: int | None = 20
def __init__(
self,
*,
- api_key: Optional[str] = None,
+ api_key: str | None = None,
server: str = "grpc.nvcf.nvidia.com:443",
- voice_id: Optional[str] = None,
- sample_rate: Optional[int] = None,
+ voice_id: str | None = None,
+ sample_rate: int | None = None,
model_function_map: Mapping[str, str] = {
"function_id": "877104f7-e885-42b9-8de8-f6e4c6303969",
"model_name": "magpie-tts-multilingual",
},
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
use_ssl: bool = True,
- custom_dictionary: Optional[dict] = None,
- encoding: Optional[AudioEncoding] = AudioEncoding.LINEAR_PCM,
+ custom_dictionary: dict | None = None,
+ encoding: AudioEncoding | None = AudioEncoding.LINEAR_PCM,
**kwargs,
):
"""Initialize the NVIDIA Nemotron Speech TTS service.
@@ -195,7 +196,7 @@ class NvidiaTTSService(TTSService):
self._function_id = model_function_map.get("function_id")
self._use_ssl = use_ssl
- self._custom_dictionary: Optional[str] = None
+ self._custom_dictionary: str | None = None
if custom_dictionary:
entries = [f"{k} {v}" for k, v in custom_dictionary.items()]
self._custom_dictionary = ",".join(entries)
@@ -205,7 +206,7 @@ class NvidiaTTSService(TTSService):
self._config = None
# Runtime state for the active streaming turn.
- self._stream_state: Optional[_SynthesisStreamState] = None
+ self._stream_state: _SynthesisStreamState | None = None
def can_generate_metrics(self) -> bool:
"""Check if this service can generate metrics.
@@ -478,7 +479,7 @@ class NvidiaTTSService(TTSService):
if self._stream_state is state:
self._stream_state = None
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio and finalize the current context.
Args:
diff --git a/src/pipecat/services/ollama/llm.py b/src/pipecat/services/ollama/llm.py
index 89488787e..12967272b 100644
--- a/src/pipecat/services/ollama/llm.py
+++ b/src/pipecat/services/ollama/llm.py
@@ -7,7 +7,6 @@
"""OLLama LLM service implementation for Pipecat AI framework."""
from dataclasses import dataclass
-from typing import Optional
from loguru import logger
@@ -40,9 +39,9 @@ class OLLamaLLMService(OpenAILLMService):
def __init__(
self,
*,
- model: Optional[str] = None,
+ model: str | None = None,
base_url: str = "http://localhost:11434/v1",
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize OLLama LLM service.
diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py
index f8b131511..2933c528d 100644
--- a/src/pipecat/services/openai/base_llm.py
+++ b/src/pipecat/services/openai/base_llm.py
@@ -8,9 +8,10 @@
import asyncio
import json
+from collections.abc import Mapping
from contextlib import asynccontextmanager
from dataclasses import dataclass, field
-from typing import Any, Dict, Mapping, Optional
+from typing import Any
import httpx
from loguru import logger
@@ -93,37 +94,33 @@ class BaseOpenAILLMService(LLMService):
extra: Additional model-specific parameters.
"""
- frequency_penalty: Optional[float] = Field(
- default_factory=lambda: NOT_GIVEN, ge=-2.0, le=2.0
- )
- presence_penalty: Optional[float] = Field(
- default_factory=lambda: NOT_GIVEN, ge=-2.0, le=2.0
- )
- seed: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0)
- temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=2.0)
+ frequency_penalty: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=-2.0, le=2.0)
+ presence_penalty: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=-2.0, le=2.0)
+ seed: int | None = Field(default_factory=lambda: NOT_GIVEN, ge=0)
+ temperature: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=2.0)
# Note: top_k is currently not supported by the OpenAI client library,
# so top_k is ignored right now.
- top_k: Optional[int] = Field(default=None, ge=0)
- top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
- max_tokens: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=1)
- max_completion_tokens: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=1)
- service_tier: Optional[str] = Field(default_factory=lambda: NOT_GIVEN)
- extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
+ top_k: int | None = Field(default=None, ge=0)
+ top_p: float | None = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0)
+ max_tokens: int | None = Field(default_factory=lambda: NOT_GIVEN, ge=1)
+ max_completion_tokens: int | None = Field(default_factory=lambda: NOT_GIVEN, ge=1)
+ service_tier: str | None = Field(default_factory=lambda: NOT_GIVEN)
+ extra: dict[str, Any] | None = Field(default_factory=dict)
def __init__(
self,
*,
- model: Optional[str] = None,
+ model: str | None = None,
api_key=None,
base_url=None,
organization=None,
project=None,
- default_headers: Optional[Mapping[str, str]] = None,
- service_tier: Optional[str] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- retry_timeout_secs: Optional[float] = 5.0,
- retry_on_timeout: Optional[bool] = False,
+ default_headers: Mapping[str, str] | None = None,
+ service_tier: str | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ retry_timeout_secs: float | None = 5.0,
+ retry_on_timeout: bool | None = False,
**kwargs,
):
"""Initialize the BaseOpenAILLMService.
@@ -296,7 +293,7 @@ class BaseOpenAILLMService(LLMService):
self._client.chat.completions.create(**params), timeout=self._retry_timeout_secs
)
return chunks
- except (APITimeoutError, asyncio.TimeoutError):
+ except (TimeoutError, APITimeoutError):
# Retry, this time without a timeout so we get a response
logger.debug(f"{self}: Retrying chat completion due to timeout")
chunks = await self._client.chat.completions.create(**params)
@@ -342,9 +339,9 @@ class BaseOpenAILLMService(LLMService):
async def run_inference(
self,
context: LLMContext,
- max_tokens: Optional[int] = None,
- system_instruction: Optional[str] = None,
- ) -> Optional[str]:
+ max_tokens: int | None = None,
+ system_instruction: str | None = None,
+ ) -> str | None:
"""Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.
Args:
diff --git a/src/pipecat/services/openai/image.py b/src/pipecat/services/openai/image.py
index de010247b..eb3cd429d 100644
--- a/src/pipecat/services/openai/image.py
+++ b/src/pipecat/services/openai/image.py
@@ -11,8 +11,9 @@ for creating images from text prompts.
"""
import io
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import AsyncGenerator, Literal, Optional
+from typing import Literal
import aiohttp
from loguru import logger
@@ -55,13 +56,12 @@ class OpenAIImageGenService(ImageGenService):
self,
*,
api_key: str,
- base_url: Optional[str] = None,
+ base_url: str | None = None,
aiohttp_session: aiohttp.ClientSession,
- image_size: Optional[
- Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]
- ] = None,
- model: Optional[str] = None,
- settings: Optional[Settings] = None,
+ image_size: Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]
+ | None = None,
+ model: str | None = None,
+ settings: Settings | None = None,
):
"""Initialize the OpenAI image generation service.
diff --git a/src/pipecat/services/openai/llm.py b/src/pipecat/services/openai/llm.py
index 7f5bb7ca3..21894b93c 100644
--- a/src/pipecat/services/openai/llm.py
+++ b/src/pipecat/services/openai/llm.py
@@ -6,8 +6,6 @@
"""OpenAI LLM service implementation with context aggregators."""
-from typing import Optional
-
from openai import NOT_GIVEN
from pipecat.services.openai.base_llm import BaseOpenAILLMService
@@ -26,10 +24,10 @@ class OpenAILLMService(BaseOpenAILLMService):
def __init__(
self,
*,
- model: Optional[str] = None,
- service_tier: Optional[str] = None,
- params: Optional[BaseOpenAILLMService.InputParams] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ service_tier: str | None = None,
+ params: BaseOpenAILLMService.InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize OpenAI LLM service.
diff --git a/src/pipecat/services/openai/realtime/events.py b/src/pipecat/services/openai/realtime/events.py
index 0aa1355e6..57b53c565 100644
--- a/src/pipecat/services/openai/realtime/events.py
+++ b/src/pipecat/services/openai/realtime/events.py
@@ -8,7 +8,7 @@
import json
import uuid
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
@@ -61,14 +61,14 @@ class InputAudioTranscription(BaseModel):
"""Configuration for audio transcription settings."""
model: str = "gpt-4o-transcribe"
- language: Optional[str]
- prompt: Optional[str]
+ language: str | None
+ prompt: str | None
def __init__(
self,
- model: Optional[str] = "gpt-4o-transcribe",
- language: Optional[str] = None,
- prompt: Optional[str] = None,
+ model: str | None = "gpt-4o-transcribe",
+ language: str | None = None,
+ prompt: str | None = None,
):
"""Initialize InputAudioTranscription.
@@ -90,10 +90,10 @@ class TurnDetection(BaseModel):
silence_duration_ms: Silence duration to detect speech end in milliseconds. Defaults to 500.
"""
- type: Optional[Literal["server_vad"]] = "server_vad"
- threshold: Optional[float] = 0.5
- prefix_padding_ms: Optional[int] = 300
- silence_duration_ms: Optional[int] = 500
+ type: Literal["server_vad"] | None = "server_vad"
+ threshold: float | None = 0.5
+ prefix_padding_ms: int | None = 300
+ silence_duration_ms: int | None = 500
class SemanticTurnDetection(BaseModel):
@@ -106,10 +106,10 @@ class SemanticTurnDetection(BaseModel):
interrupt_response: Whether to interrupt ongoing responses on turn detection.
"""
- type: Optional[Literal["semantic_vad"]] = "semantic_vad"
- eagerness: Optional[Literal["low", "medium", "high", "auto"]] = None
- create_response: Optional[bool] = None
- interrupt_response: Optional[bool] = None
+ type: Literal["semantic_vad"] | None = "semantic_vad"
+ eagerness: Literal["low", "medium", "high", "auto"] | None = None
+ create_response: bool | None = None
+ interrupt_response: bool | None = None
class InputAudioNoiseReduction(BaseModel):
@@ -119,7 +119,7 @@ class InputAudioNoiseReduction(BaseModel):
type: Noise reduction type for different microphone scenarios.
"""
- type: Optional[Literal["near_field", "far_field"]]
+ type: Literal["near_field", "far_field"] | None
class AudioInput(BaseModel):
@@ -132,10 +132,10 @@ class AudioInput(BaseModel):
turn_detection: Configuration for turn detection, or False to disable.
"""
- format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None
- transcription: Optional[InputAudioTranscription] = None
- noise_reduction: Optional[InputAudioNoiseReduction] = None
- turn_detection: Optional[Union[TurnDetection, SemanticTurnDetection, bool]] = None
+ format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None
+ transcription: InputAudioTranscription | None = None
+ noise_reduction: InputAudioNoiseReduction | None = None
+ turn_detection: TurnDetection | SemanticTurnDetection | bool | None = None
class AudioOutput(BaseModel):
@@ -147,9 +147,9 @@ class AudioOutput(BaseModel):
speed: The speed of the model's spoken response.
"""
- format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None
- voice: Optional[str] = None
- speed: Optional[float] = None
+ format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None
+ voice: str | None = None
+ speed: float | None = None
class AudioConfiguration(BaseModel):
@@ -160,8 +160,8 @@ class AudioConfiguration(BaseModel):
output: Configuration for output audio.
"""
- input: Optional[AudioInput] = None
- output: Optional[AudioOutput] = None
+ input: AudioInput | None = None
+ output: AudioOutput | None = None
class SessionProperties(BaseModel):
@@ -189,23 +189,23 @@ class SessionProperties(BaseModel):
# Needed to support ToolSchema in tools field.
model_config = ConfigDict(arbitrary_types_allowed=True)
- type: Optional[Literal["realtime"]] = "realtime"
- object: Optional[Literal["realtime.session"]] = None
- id: Optional[str] = None
- model: Optional[str] = None
- output_modalities: Optional[List[Literal["text", "audio"]]] = None
- instructions: Optional[str] = None
- audio: Optional[AudioConfiguration] = None
+ type: Literal["realtime"] | None = "realtime"
+ object: Literal["realtime.session"] | None = None
+ id: str | None = None
+ model: str | None = None
+ output_modalities: list[Literal["text", "audio"]] | None = None
+ instructions: str | None = None
+ audio: AudioConfiguration | None = None
# Tools can only be ToolsSchema when provided by the user, in either the
# OpenAIRealtimeLLMService constructor or through LLMUpdateSettingsFrame.
# We'll never serialize/deserialize ToolsSchema when talking to the server.
- tools: Optional[ToolsSchema | List[Dict]] = None
- tool_choice: Optional[Literal["auto", "none", "required"]] = None
- max_output_tokens: Optional[Union[int, Literal["inf"]]] = None
- tracing: Optional[Union[Literal["auto"], Dict]] = None
- prompt: Optional[Dict] = None
- expires_at: Optional[int] = None
- include: Optional[List[str]] = None
+ tools: ToolsSchema | list[dict] | None = None
+ tool_choice: Literal["auto", "none", "required"] | None = None
+ max_output_tokens: int | Literal["inf"] | None = None
+ tracing: Literal["auto"] | dict | None = None
+ prompt: dict | None = None
+ expires_at: int | None = None
+ include: list[str] | None = None
#
@@ -228,11 +228,11 @@ class ItemContent(BaseModel):
type: Literal[
"text", "audio", "input_text", "input_audio", "input_image", "output_text", "output_audio"
]
- text: Optional[str] = None
- audio: Optional[str] = None # base64-encoded audio
- transcript: Optional[str] = None
- image_url: Optional[str] = None # base64-encoded image as data URI
- detail: Optional[Literal["auto", "low", "high"]] = None
+ text: str | None = None
+ audio: str | None = None # base64-encoded audio
+ transcript: str | None = None
+ image_url: str | None = None # base64-encoded image as data URI
+ detail: Literal["auto", "low", "high"] | None = None
class ConversationItem(BaseModel):
@@ -252,17 +252,17 @@ class ConversationItem(BaseModel):
"""
id: str = Field(default_factory=lambda: str(uuid.uuid4().hex))
- object: Optional[Literal["realtime.item"]] = None
+ object: Literal["realtime.item"] | None = None
type: Literal["message", "function_call", "function_call_output"]
- status: Optional[Literal["completed", "in_progress", "incomplete"]] = None
+ status: Literal["completed", "in_progress", "incomplete"] | None = None
# role and content are present for message items
- role: Optional[Literal["user", "assistant", "system"]] = None
- content: Optional[List[ItemContent]] = None
+ role: Literal["user", "assistant", "system"] | None = None
+ content: list[ItemContent] | None = None
# these four fields are present for function_call items
- call_id: Optional[str] = None
- name: Optional[str] = None
- arguments: Optional[str] = None
- output: Optional[str] = None
+ call_id: str | None = None
+ name: str | None = None
+ arguments: str | None = None
+ output: str | None = None
class RealtimeConversation(BaseModel):
@@ -290,13 +290,13 @@ class ResponseProperties(BaseModel):
max_output_tokens: Maximum tokens for this response.
"""
- output_modalities: Optional[List[Literal["text", "audio"]]] = ["audio"]
- instructions: Optional[str] = None
- audio: Optional[AudioConfiguration] = None
- tools: Optional[List[Dict]] = None
- tool_choice: Optional[Literal["auto", "none", "required"]] = None
- temperature: Optional[float] = None
- max_output_tokens: Optional[Union[int, Literal["inf"]]] = None
+ output_modalities: list[Literal["text", "audio"]] | None = ["audio"]
+ instructions: str | None = None
+ audio: AudioConfiguration | None = None
+ tools: list[dict] | None = None
+ tool_choice: Literal["auto", "none", "required"] | None = None
+ temperature: float | None = None
+ max_output_tokens: int | Literal["inf"] | None = None
#
@@ -314,10 +314,10 @@ class RealtimeError(BaseModel):
"""
type: str
- code: Optional[str] = ""
+ code: str | None = ""
message: str
- param: Optional[str] = None
- event_id: Optional[str] = None
+ param: str | None = None
+ event_id: str | None = None
#
@@ -346,7 +346,7 @@ class SessionUpdateEvent(ClientEvent):
type: Literal["session.update"] = "session.update"
session: SessionProperties
- def model_dump(self, *args, **kwargs) -> Dict[str, Any]:
+ def model_dump(self, *args, **kwargs) -> dict[str, Any]:
"""Serialize the event to a dictionary.
Handles special serialization for turn_detection where False becomes null.
@@ -412,7 +412,7 @@ class ConversationItemCreateEvent(ClientEvent):
"""
type: Literal["conversation.item.create"] = "conversation.item.create"
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
item: ConversationItem
@@ -465,7 +465,7 @@ class ResponseCreateEvent(ClientEvent):
"""
type: Literal["response.create"] = "response.create"
- response: Optional[ResponseProperties] = None
+ response: ResponseProperties | None = None
class ResponseCancelEvent(ClientEvent):
@@ -543,7 +543,7 @@ class ConversationItemAdded(ServerEvent):
"""
type: Literal["conversation.item.added"]
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
item: ConversationItem
@@ -557,7 +557,7 @@ class ConversationItemDone(ServerEvent):
"""
type: Literal["conversation.item.done"]
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
item: ConversationItem
@@ -941,7 +941,7 @@ class InputAudioBufferCommitted(ServerEvent):
"""
type: Literal["input_audio_buffer.committed"]
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
item_id: str
@@ -976,7 +976,7 @@ class RateLimitsUpdated(ServerEvent):
"""
type: Literal["rate_limits.updated"]
- rate_limits: List[Dict[str, Any]]
+ rate_limits: list[dict[str, Any]]
class CachedTokensDetails(BaseModel):
@@ -987,8 +987,8 @@ class CachedTokensDetails(BaseModel):
audio_tokens: Number of cached audio tokens.
"""
- text_tokens: Optional[int] = 0
- audio_tokens: Optional[int] = 0
+ text_tokens: int | None = 0
+ audio_tokens: int | None = 0
class TokenDetails(BaseModel):
@@ -1004,11 +1004,11 @@ class TokenDetails(BaseModel):
model_config = ConfigDict(extra="allow")
- cached_tokens: Optional[int] = 0
- text_tokens: Optional[int] = 0
- audio_tokens: Optional[int] = 0
- cached_tokens_details: Optional[CachedTokensDetails] = None
- image_tokens: Optional[int] = 0
+ cached_tokens: int | None = 0
+ text_tokens: int | None = 0
+ audio_tokens: int | None = 0
+ cached_tokens_details: CachedTokensDetails | None = None
+ image_tokens: int | None = 0
class Usage(BaseModel):
@@ -1052,14 +1052,14 @@ class Response(BaseModel):
object: Literal["realtime.response"]
status: Literal["completed", "in_progress", "incomplete", "cancelled", "failed"]
status_details: Any
- output: List[ConversationItem]
- output_modalities: Optional[List[Literal["text", "audio"]]] = None
- max_output_tokens: Optional[Union[int, Literal["inf"]]] = None
- audio: Optional[AudioConfiguration] = None
- usage: Optional[Usage] = None
- voice: Optional[str] = None
- temperature: Optional[float] = None
- output_audio_format: Optional[str] = None
+ output: list[ConversationItem]
+ output_modalities: list[Literal["text", "audio"]] | None = None
+ max_output_tokens: int | Literal["inf"] | None = None
+ audio: AudioConfiguration | None = None
+ usage: Usage | None = None
+ voice: str | None = None
+ temperature: float | None = None
+ output_audio_format: str | None = None
_server_event_types = {
diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py
index 524a0fe73..c34345ae3 100644
--- a/src/pipecat/services/openai/realtime/llm.py
+++ b/src/pipecat/services/openai/realtime/llm.py
@@ -10,9 +10,10 @@ import base64
import io
import json
import time
+from collections.abc import Mapping
from dataclasses import dataclass, field
from dataclasses import fields as dataclass_fields
-from typing import Any, Dict, Mapping, Optional, Type
+from typing import Any
from loguru import logger
from PIL import Image
@@ -117,7 +118,7 @@ class OpenAIRealtimeLLMSettings(LLMSettings):
# -- apply_update override -----------------------------------------------
- def apply_update(self, delta: "OpenAIRealtimeLLMService.Settings") -> Dict[str, Any]:
+ def apply_update(self, delta: "OpenAIRealtimeLLMService.Settings") -> dict[str, Any]:
"""Merge a delta, keeping ``model``/``system_instruction`` in sync with SP.
When the delta contains ``session_properties``, it **replaces** the
@@ -155,7 +156,7 @@ class OpenAIRealtimeLLMSettings(LLMSettings):
@classmethod
def from_mapping(
- cls: Type["OpenAIRealtimeLLMService.Settings"], settings: Mapping[str, Any]
+ cls: type["OpenAIRealtimeLLMService.Settings"], settings: Mapping[str, Any]
) -> "OpenAIRealtimeLLMService.Settings":
"""Build a delta from a plain dict, routing SP keys into ``session_properties``.
@@ -166,9 +167,9 @@ class OpenAIRealtimeLLMSettings(LLMSettings):
# Determine which keys belong to our own dataclass fields.
own_field_names = {f.name for f in dataclass_fields(cls)} - {"extra"}
- top: Dict[str, Any] = {}
- sp_dict: Dict[str, Any] = {}
- extra: Dict[str, Any] = {}
+ top: dict[str, Any] = {}
+ sp_dict: dict[str, Any] = {}
+ extra: dict[str, Any] = {}
# Build the SP field set without instantiating (avoid __post_init__
# cost for every from_mapping call).
@@ -210,10 +211,10 @@ class OpenAIRealtimeLLMService(LLMService):
self,
*,
api_key: str,
- model: Optional[str] = None,
+ model: str | None = None,
base_url: str = "wss://api.openai.com/v1/realtime",
- session_properties: Optional[events.SessionProperties] = None,
- settings: Optional[Settings] = None,
+ session_properties: events.SessionProperties | None = None,
+ settings: Settings | None = None,
start_audio_paused: bool = False,
start_video_paused: bool = False,
video_frame_detail: str = "auto",
@@ -807,7 +808,7 @@ class OpenAIRealtimeLLMService(LLMService):
@traced_stt
async def _handle_user_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
pass
diff --git a/src/pipecat/services/openai/responses/llm.py b/src/pipecat/services/openai/responses/llm.py
index e1b4ace78..c7959f05a 100644
--- a/src/pipecat/services/openai/responses/llm.py
+++ b/src/pipecat/services/openai/responses/llm.py
@@ -10,9 +10,10 @@ import asyncio
import hashlib
import json
import os
+from collections.abc import Mapping
from contextlib import asynccontextmanager
from dataclasses import dataclass, field
-from typing import Any, Dict, List, Mapping, Optional
+from typing import Any
import httpx
from loguru import logger
@@ -124,9 +125,9 @@ class _BaseOpenAIResponsesLLMService(LLMService):
base_url=None,
organization=None,
project=None,
- default_headers: Optional[Mapping[str, str]] = None,
- service_tier: Optional[str] = None,
- settings: Optional[Settings] = None,
+ default_headers: Mapping[str, str] | None = None,
+ service_tier: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the OpenAI Responses API LLM service.
@@ -227,7 +228,7 @@ class _BaseOpenAIResponsesLLMService(LLMService):
Returns:
Dictionary of parameters for the Responses API call.
"""
- params: Dict[str, Any] = {
+ params: dict[str, Any] = {
"model": self._settings.model,
"stream": True,
# store=False avoids OpenAI-side 30-day conversation storage.
@@ -268,9 +269,9 @@ class _BaseOpenAIResponsesLLMService(LLMService):
async def run_inference(
self,
context: LLMContext,
- max_tokens: Optional[int] = None,
- system_instruction: Optional[str] = None,
- ) -> Optional[str]:
+ max_tokens: int | None = None,
+ system_instruction: str | None = None,
+ ) -> str | None:
"""Run a one-shot, out-of-band inference with the given LLM context.
Always uses the HTTP client regardless of transport variant.
@@ -304,8 +305,8 @@ class _BaseOpenAIResponsesLLMService(LLMService):
def _process_function_calls(
self,
context: LLMContext,
- function_calls: Dict[str, Dict[str, str]],
- ) -> List[FunctionCallFromLLM]:
+ function_calls: dict[str, dict[str, str]],
+ ) -> list[FunctionCallFromLLM]:
"""Convert accumulated function call data into FunctionCallFromLLM list.
Args:
@@ -315,7 +316,7 @@ class _BaseOpenAIResponsesLLMService(LLMService):
Returns:
List of parsed function call objects.
"""
- fc_list: List[FunctionCallFromLLM] = []
+ fc_list: list[FunctionCallFromLLM] = []
for item_id, fc in function_calls.items():
try:
arguments = json.loads(fc["arguments"]) if fc["arguments"] else {}
@@ -388,13 +389,13 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService, WebsocketLLMServ
self._ws_url = ws_url
# State for previous_response_id optimization
- self._previous_response_id: Optional[str] = None
- self._previous_input_hash: Optional[str] = None
- self._previous_input_length: Optional[int] = None
- self._previous_response_output: Optional[list] = None
+ self._previous_response_id: str | None = None
+ self._previous_input_hash: str | None = None
+ self._previous_input_length: int | None = None
+ self._previous_response_output: list | None = None
# Response cancellation state
- self._current_response_id: Optional[str] = None # ID of current non-cancelled response
+ self._current_response_id: str | None = None # ID of current non-cancelled response
self._cancel_pending_response: bool = False
self._needs_drain: bool = False
@@ -659,7 +660,7 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService, WebsocketLLMServ
)
self._clear_cancellation_state()
return
- except (asyncio.TimeoutError, WebsocketReconnectedError, ConnectionClosed) as e:
+ except (TimeoutError, WebsocketReconnectedError, ConnectionClosed) as e:
logger.warning(f"{self}: Error draining cancelled response: {e}")
self._clear_cancellation_state()
@@ -815,8 +816,8 @@ class OpenAIResponsesLLMService(_BaseOpenAIResponsesLLMService, WebsocketLLMServ
WebsocketReconnectedError: Connection was lost and auto-recovered.
ConnectionClosed: Connection was lost and could not be recovered.
"""
- function_calls: Dict[str, Dict[str, str]] = {}
- current_arguments: Dict[str, str] = {}
+ function_calls: dict[str, dict[str, str]] = {}
+ current_arguments: dict[str, str] = {}
while True:
event = await self._ws_recv()
@@ -991,8 +992,8 @@ class OpenAIResponsesHttpLLMService(_BaseOpenAIResponsesLLMService):
stream: AsyncStream[ResponseStreamEvent] = await self._client.responses.create(**params)
# Track function calls across stream events
- function_calls: Dict[str, Dict[str, str]] = {} # item_id -> {name, call_id, arguments}
- current_arguments: Dict[str, str] = {} # item_id -> accumulated arguments
+ function_calls: dict[str, dict[str, str]] = {} # item_id -> {name, call_id, arguments}
+ current_arguments: dict[str, str] = {} # item_id -> accumulated arguments
# Ensure stream and its async iterator are closed on cancellation/exception
# to prevent socket leaks and uvloop crashes. Closing the iterator first
diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py
index fd98dbf49..ca0537fef 100644
--- a/src/pipecat/services/openai/stt.py
+++ b/src/pipecat/services/openai/stt.py
@@ -16,8 +16,9 @@ Provides two STT services:
import base64
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Literal, Optional, Union
+from typing import Any, Literal
from loguru import logger
@@ -74,14 +75,14 @@ class OpenAISTTService(BaseWhisperSTTService):
def __init__(
self,
*,
- model: Optional[str] = None,
- api_key: Optional[str] = None,
- base_url: Optional[str] = None,
- language: Optional[Language] = Language.EN,
- prompt: Optional[str] = None,
- temperature: Optional[float] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = OPENAI_TTFS_P99,
+ model: str | None = None,
+ api_key: str | None = None,
+ base_url: str | None = None,
+ language: Language | None = Language.EN,
+ prompt: str | None = None,
+ temperature: float | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = OPENAI_TTFS_P99,
**kwargs,
):
"""Initialize OpenAI STT service.
@@ -239,15 +240,15 @@ class OpenAIRealtimeSTTService(WebsocketSTTService):
self,
*,
api_key: str,
- model: Optional[str] = None,
+ model: str | None = None,
base_url: str = "wss://api.openai.com/v1/realtime",
- language: Optional[Language] = Language.EN,
- prompt: Optional[str] = None,
- turn_detection: Optional[Union[dict, Literal[False]]] = False,
- noise_reduction: Optional[Literal["near_field", "far_field"]] = None,
+ language: Language | None = Language.EN,
+ prompt: str | None = None,
+ turn_detection: dict | Literal[False] | None = False,
+ noise_reduction: Literal["near_field", "far_field"] | None = None,
should_interrupt: bool = True,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = OPENAI_REALTIME_TTFS_P99,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = OPENAI_REALTIME_TTFS_P99,
**kwargs,
):
"""Initialize the OpenAI Realtime STT service.
@@ -712,7 +713,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService):
self,
transcript: str,
is_final: bool,
- language: Optional[Language] = None,
+ language: Language | None = None,
):
"""Record transcription result for tracing.
diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py
index 074792b33..6825d6968 100644
--- a/src/pipecat/services/openai/tts.py
+++ b/src/pipecat/services/openai/tts.py
@@ -10,8 +10,9 @@ This module provides integration with OpenAI's text-to-speech API for
generating high-quality synthetic speech from text input.
"""
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import AsyncGenerator, Dict, Literal, Optional
+from typing import Literal
from loguru import logger
from openai import AsyncOpenAI, BadRequestError
@@ -43,7 +44,7 @@ ValidVoice = Literal[
"verse",
]
-VALID_VOICES: Dict[str, ValidVoice] = {
+VALID_VOICES: dict[str, ValidVoice] = {
"alloy": "alloy",
"ash": "ash",
"ballad": "ballad",
@@ -97,21 +98,21 @@ class OpenAITTSService(TTSService):
speed: Voice speed control (0.25 to 4.0, default 1.0).
"""
- instructions: Optional[str] = None
- speed: Optional[float] = None
+ instructions: str | None = None
+ speed: float | None = None
def __init__(
self,
*,
- api_key: Optional[str] = None,
- base_url: Optional[str] = None,
- voice: Optional[str] = None,
- model: Optional[str] = None,
- sample_rate: Optional[int] = None,
- instructions: Optional[str] = None,
- speed: Optional[float] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ api_key: str | None = None,
+ base_url: str | None = None,
+ voice: str | None = None,
+ model: str | None = None,
+ sample_rate: int | None = None,
+ instructions: str | None = None,
+ speed: float | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize OpenAI TTS service.
diff --git a/src/pipecat/services/openrouter/llm.py b/src/pipecat/services/openrouter/llm.py
index f92fb5e3b..d257e4cc9 100644
--- a/src/pipecat/services/openrouter/llm.py
+++ b/src/pipecat/services/openrouter/llm.py
@@ -11,7 +11,7 @@ extending the base OpenAI LLM service functionality.
"""
from dataclasses import dataclass
-from typing import Any, Dict, Optional
+from typing import Any
from loguru import logger
@@ -39,10 +39,10 @@ class OpenRouterLLMService(OpenAILLMService):
def __init__(
self,
*,
- api_key: Optional[str] = None,
- model: Optional[str] = None,
+ api_key: str | None = None,
+ model: str | None = None,
base_url: str = "https://openrouter.ai/api/v1",
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the OpenRouter LLM service.
@@ -95,7 +95,7 @@ class OpenRouterLLMService(OpenAILLMService):
logger.debug(f"Creating OpenRouter client with api {base_url}")
return super().create_client(api_key, base_url, **kwargs)
- def build_chat_completion_params(self, params_from_context: Dict[str, Any]) -> Dict[str, Any]:
+ def build_chat_completion_params(self, params_from_context: dict[str, Any]) -> dict[str, Any]:
"""Builds chat parameters, handling model-specific constraints.
Args:
diff --git a/src/pipecat/services/perplexity/llm.py b/src/pipecat/services/perplexity/llm.py
index 453db7f2d..9a2852fbf 100644
--- a/src/pipecat/services/perplexity/llm.py
+++ b/src/pipecat/services/perplexity/llm.py
@@ -12,7 +12,6 @@ reporting patterns while maintaining compatibility with the Pipecat framework.
"""
from dataclasses import dataclass
-from typing import Optional
from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams
from pipecat.adapters.services.perplexity_adapter import PerplexityLLMAdapter
@@ -50,8 +49,8 @@ class PerplexityLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://api.perplexity.ai",
- model: Optional[str] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Perplexity LLM service.
diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py
index 1b0037abb..ab055c76d 100644
--- a/src/pipecat/services/piper/tts.py
+++ b/src/pipecat/services/piper/tts.py
@@ -7,9 +7,10 @@
"""Piper TTS service implementation."""
import asyncio
+from collections.abc import AsyncGenerator, AsyncIterator
from dataclasses import dataclass
from pathlib import Path
-from typing import Any, AsyncGenerator, AsyncIterator, Optional
+from typing import Any
import aiohttp
from loguru import logger
@@ -53,11 +54,11 @@ class PiperTTSService(TTSService):
def __init__(
self,
*,
- voice_id: Optional[str] = None,
- download_dir: Optional[Path] = None,
+ voice_id: str | None = None,
+ download_dir: Path | None = None,
force_redownload: bool = False,
use_cuda: bool = False,
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Piper TTS service.
@@ -209,8 +210,8 @@ class PiperHttpTTSService(TTSService):
*,
base_url: str,
aiohttp_session: aiohttp.ClientSession,
- voice_id: Optional[str] = None,
- settings: Optional[Settings] = None,
+ voice_id: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Piper TTS service.
diff --git a/src/pipecat/services/qwen/llm.py b/src/pipecat/services/qwen/llm.py
index df07467ba..5d6ecbb1b 100644
--- a/src/pipecat/services/qwen/llm.py
+++ b/src/pipecat/services/qwen/llm.py
@@ -7,7 +7,6 @@
"""Qwen LLM service implementation using OpenAI-compatible interface."""
from dataclasses import dataclass
-from typing import Optional
from loguru import logger
@@ -41,8 +40,8 @@ class QwenLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
- model: Optional[str] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Qwen LLM service.
diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py
index d4bb77fb0..595b295e3 100644
--- a/src/pipecat/services/resembleai/tts.py
+++ b/src/pipecat/services/resembleai/tts.py
@@ -8,8 +8,8 @@
import base64
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import AsyncGenerator, Optional
from loguru import logger
@@ -58,12 +58,12 @@ class ResembleAITTSService(WebsocketTTSService):
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
url: str = "wss://websocket.cluster.resemble.ai/stream",
- precision: Optional[str] = "PCM_16",
- output_format: Optional[str] = "wav",
- sample_rate: Optional[int] = 22050,
- settings: Optional[Settings] = None,
+ precision: str | None = "PCM_16",
+ output_format: str | None = "wav",
+ sample_rate: int | None = 22050,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Resemble AI TTS service.
@@ -269,7 +269,7 @@ class ResembleAITTSService(WebsocketTTSService):
"""
await super().on_audio_context_completed(context_id)
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio and finalize the current context."""
logger.trace(f"{self}: flushing audio")
# For Resemble AI, we just wait for the audio_end message
diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py
index 41045688c..2745f2cf4 100644
--- a/src/pipecat/services/rime/tts.py
+++ b/src/pipecat/services/rime/tts.py
@@ -12,8 +12,9 @@ using Rime's API for streaming and batch audio synthesis.
import base64
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, ClassVar, Dict, Optional
+from typing import Any, ClassVar
import aiohttp
from loguru import logger
@@ -98,7 +99,7 @@ class RimeTTSSettings(TTSSettings):
temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
top_p: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
- _aliases: ClassVar[Dict[str, str]] = {"speaker": "voice"}
+ _aliases: ClassVar[dict[str, str]] = {"speaker": "voice"}
@dataclass
@@ -117,7 +118,7 @@ class RimeNonJsonTTSSettings(TTSSettings):
temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
top_p: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
- _aliases: ClassVar[Dict[str, str]] = {"speaker": "voice"}
+ _aliases: ClassVar[dict[str, str]] = {"speaker": "voice"}
class RimeTTSService(WebsocketTTSService):
@@ -151,32 +152,32 @@ class RimeTTSService(WebsocketTTSService):
save_oovs: Whether to save out-of-vocabulary words (mistv2 only).
"""
- language: Optional[Language] = Language.EN
- segment: Optional[str] = None
- speed_alpha: Optional[float] = None
+ language: Language | None = Language.EN
+ segment: str | None = None
+ speed_alpha: float | None = None
# Arcana params
- repetition_penalty: Optional[float] = None
- temperature: Optional[float] = None
- top_p: Optional[float] = None
+ repetition_penalty: float | None = None
+ temperature: float | None = None
+ top_p: float | None = None
# Mistv2 params
- reduce_latency: Optional[bool] = None
- pause_between_brackets: Optional[bool] = None
- phonemize_between_brackets: Optional[bool] = None
- no_text_normalization: Optional[bool] = None
- save_oovs: Optional[bool] = None
+ reduce_latency: bool | None = None
+ pause_between_brackets: bool | None = None
+ phonemize_between_brackets: bool | None = None
+ no_text_normalization: bool | None = None
+ save_oovs: bool | None = None
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
url: str = "wss://users-ws.rime.ai/ws3",
- model: Optional[str] = None,
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- text_aggregation_mode: Optional[TextAggregationMode] = None,
- aggregate_sentences: Optional[bool] = None,
+ model: str | None = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ text_aggregation_mode: TextAggregationMode | None = None,
+ aggregate_sentences: bool | None = None,
**kwargs,
):
"""Initialize Rime TTS service.
@@ -545,7 +546,7 @@ class RimeTTSService(WebsocketTTSService):
return word_pairs
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio synthesis."""
flush_id = context_id or self.get_active_audio_context_id()
if not flush_id or not self._websocket:
@@ -663,23 +664,23 @@ class RimeHttpTTSService(TTSService):
reduce_latency: Whether to reduce latency at potential quality cost.
"""
- language: Optional[Language] = Language.EN
- pause_between_brackets: Optional[bool] = False
- phonemize_between_brackets: Optional[bool] = False
- inline_speed_alpha: Optional[str] = None
- speed_alpha: Optional[float] = 1.0
- reduce_latency: Optional[bool] = False
+ language: Language | None = Language.EN
+ pause_between_brackets: bool | None = False
+ phonemize_between_brackets: bool | None = False
+ inline_speed_alpha: str | None = None
+ speed_alpha: float | None = 1.0
+ reduce_latency: bool | None = False
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
aiohttp_session: aiohttp.ClientSession,
- model: Optional[str] = None,
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize Rime HTTP TTS service.
@@ -886,26 +887,26 @@ class RimeNonJsonTTSService(InterruptibleTTSService):
extra: Additional parameters to pass to the API (for future compatibility).
"""
- language: Optional[Language] = None
- segment: Optional[str] = None
- repetition_penalty: Optional[float] = None
- temperature: Optional[float] = None
- top_p: Optional[float] = None
- extra: Optional[dict[str, Any]] = None
+ language: Language | None = None
+ segment: str | None = None
+ repetition_penalty: float | None = None
+ temperature: float | None = None
+ top_p: float | None = None
+ extra: dict[str, Any] | None = None
def __init__(
self,
*,
api_key: str,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
url: str = "wss://users.rime.ai/ws",
- model: Optional[str] = None,
+ model: str | None = None,
audio_format: str = "pcm",
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- aggregate_sentences: Optional[bool] = None,
- text_aggregation_mode: Optional[TextAggregationMode] = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ aggregate_sentences: bool | None = None,
+ text_aggregation_mode: TextAggregationMode | None = None,
**kwargs,
):
"""Initialize Rime Non-JSON WebSocket TTS service.
@@ -1113,7 +1114,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService):
return self._websocket
raise Exception("Websocket not connected")
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio synthesis."""
if not self._websocket:
return
diff --git a/src/pipecat/services/sambanova/llm.py b/src/pipecat/services/sambanova/llm.py
index 66f3d52bf..4efc5e7ed 100644
--- a/src/pipecat/services/sambanova/llm.py
+++ b/src/pipecat/services/sambanova/llm.py
@@ -8,7 +8,7 @@
import json
from dataclasses import dataclass
-from typing import Any, Dict, Optional
+from typing import Any
from loguru import logger
from openai import AsyncStream
@@ -51,10 +51,10 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore
self,
*,
api_key: str,
- model: Optional[str] = None,
+ model: str | None = None,
base_url: str = "https://api.sambanova.ai/v1",
- settings: Optional[Settings] = None,
- **kwargs: Dict[Any, Any],
+ settings: Settings | None = None,
+ **kwargs: dict[Any, Any],
) -> None:
"""Initialize SambaNova LLM service.
@@ -88,9 +88,9 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore
def create_client(
self,
- api_key: Optional[str] = None,
- base_url: Optional[str] = None,
- **kwargs: Dict[Any, Any],
+ api_key: str | None = None,
+ base_url: str | None = None,
+ **kwargs: dict[Any, Any],
) -> Any:
"""Create OpenAI-compatible client for SambaNova API endpoint.
diff --git a/src/pipecat/services/sarvam/_sdk.py b/src/pipecat/services/sarvam/_sdk.py
index 31085d289..45988c49d 100644
--- a/src/pipecat/services/sarvam/_sdk.py
+++ b/src/pipecat/services/sarvam/_sdk.py
@@ -5,12 +5,11 @@
#
import platform
-from typing import Dict
from pipecat import version as pipecat_version
-def sdk_headers() -> Dict[str, str]:
+def sdk_headers() -> dict[str, str]:
"""SDK identification headers for upstream providers."""
return {
"User-Agent": f"Pipecat/{pipecat_version()} Python/{platform.python_version()}",
diff --git a/src/pipecat/services/sarvam/llm.py b/src/pipecat/services/sarvam/llm.py
index ce353ff2b..d86ba1874 100644
--- a/src/pipecat/services/sarvam/llm.py
+++ b/src/pipecat/services/sarvam/llm.py
@@ -6,8 +6,9 @@
"""Sarvam LLM service implementation using OpenAI-compatible interface."""
+from collections.abc import Mapping
from dataclasses import dataclass, field
-from typing import Literal, Mapping, Optional
+from typing import Literal
from loguru import logger
from openai import NOT_GIVEN
@@ -57,8 +58,8 @@ class SarvamLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://api.sarvam.ai/v1",
- settings: Optional[Settings] = None,
- default_headers: Optional[Mapping[str, str]] = None,
+ settings: Settings | None = None,
+ default_headers: Mapping[str, str] | None = None,
**kwargs,
):
"""Initialize Sarvam LLM service.
diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py
index 7271a3e5c..faadd914b 100644
--- a/src/pipecat/services/sarvam/stt.py
+++ b/src/pipecat/services/sarvam/stt.py
@@ -12,8 +12,9 @@ can handle multiple audio formats for Indian language speech recognition.
"""
import base64
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, Dict, Literal, Optional
+from typing import Any, Literal
from loguru import logger
from pydantic import BaseModel
@@ -99,13 +100,13 @@ class ModelConfig:
supports_prompt: bool
supports_mode: bool
supports_language: bool
- default_language: Optional[str]
- default_mode: Optional[str]
+ default_language: str | None
+ default_mode: str | None
use_translate_endpoint: bool
use_translate_method: bool
-MODEL_CONFIGS: Dict[str, ModelConfig] = {
+MODEL_CONFIGS: dict[str, ModelConfig] = {
"saarika:v2.5": ModelConfig(
supports_prompt=False,
supports_mode=False,
@@ -192,26 +193,24 @@ class SarvamSTTService(STTService):
high_vad_sensitivity: Enable high VAD (Voice Activity Detection) sensitivity. Defaults to None.
"""
- language: Optional[Language] = None
- prompt: Optional[str] = None
- mode: Optional[Literal["transcribe", "translate", "verbatim", "translit", "codemix"]] = None
- vad_signals: Optional[bool] = None
- high_vad_sensitivity: Optional[bool] = None
+ language: Language | None = None
+ prompt: str | None = None
+ mode: Literal["transcribe", "translate", "verbatim", "translit", "codemix"] | None = None
+ vad_signals: bool | None = None
+ high_vad_sensitivity: bool | None = None
def __init__(
self,
*,
api_key: str,
- model: Optional[str] = None,
- mode: Optional[
- Literal["transcribe", "translate", "verbatim", "translit", "codemix"]
- ] = None,
- sample_rate: Optional[int] = None,
+ model: str | None = None,
+ mode: Literal["transcribe", "translate", "verbatim", "translit", "codemix"] | None = None,
+ sample_rate: int | None = None,
input_audio_codec: str = "wav",
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = SARVAM_TTFS_P99,
- keepalive_timeout: Optional[float] = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = SARVAM_TTFS_P99,
+ keepalive_timeout: float | None = None,
keepalive_interval: float = 5.0,
**kwargs,
):
@@ -339,7 +338,7 @@ class SarvamSTTService(STTService):
"""
return language_to_sarvam_language(language)
- def _get_language_string(self) -> Optional[str]:
+ def _get_language_string(self) -> str | None:
"""Resolve the current language setting to a Sarvam language code string."""
if self._settings.language:
return language_to_sarvam_language(self._settings.language)
@@ -408,7 +407,7 @@ class SarvamSTTService(STTService):
return changed
- async def set_prompt(self, prompt: Optional[str]):
+ async def set_prompt(self, prompt: str | None):
"""Set the transcription/translation prompt and reconnect.
.. deprecated:: 0.0.104
@@ -731,7 +730,7 @@ class SarvamSTTService(STTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing.
diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py
index ca9a38223..01b62b87d 100644
--- a/src/pipecat/services/sarvam/tts.py
+++ b/src/pipecat/services/sarvam/tts.py
@@ -40,9 +40,10 @@ See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream for full API
import asyncio
import base64
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, AsyncGenerator, ClassVar, Dict, List, Optional, Tuple
+from enum import StrEnum
+from typing import Any, ClassVar
import aiohttp
from loguru import logger
@@ -72,7 +73,7 @@ except ModuleNotFoundError as e:
raise Exception(f"Missing module: {e}")
-class SarvamTTSModel(str, Enum):
+class SarvamTTSModel(StrEnum):
"""Available Sarvam TTS models.
Parameters:
@@ -92,7 +93,7 @@ class SarvamTTSModel(str, Enum):
BULBUL_V3 = "bulbul:v3"
-class SarvamTTSSpeakerV2(str, Enum):
+class SarvamTTSSpeakerV2(StrEnum):
"""Available speakers for bulbul:v2 model.
Female voices: anushka, manisha, vidya, arya
@@ -108,7 +109,7 @@ class SarvamTTSSpeakerV2(str, Enum):
HITESH = "hitesh"
-class SarvamTTSSpeakerV3(str, Enum):
+class SarvamTTSSpeakerV3(StrEnum):
"""Available speakers for bulbul:v3-beta model.
Includes a wider variety of voices with different characteristics.
@@ -161,12 +162,12 @@ class TTSModelConfig:
supports_temperature: bool
default_sample_rate: int
default_speaker: str
- pace_range: Tuple[float, float]
+ pace_range: tuple[float, float]
preprocessing_always_enabled: bool
- speakers: Tuple[str, ...]
+ speakers: tuple[str, ...]
-TTS_MODEL_CONFIGS: Dict[str, TTSModelConfig] = {
+TTS_MODEL_CONFIGS: dict[str, TTSModelConfig] = {
"bulbul:v2": TTSModelConfig(
supports_pitch=True,
supports_loudness=True,
@@ -200,7 +201,7 @@ TTS_MODEL_CONFIGS: Dict[str, TTSModelConfig] = {
}
-def get_speakers_for_model(model: str) -> List[str]:
+def get_speakers_for_model(model: str) -> list[str]:
"""Get the list of available speakers for a given model.
Args:
@@ -215,7 +216,7 @@ def get_speakers_for_model(model: str) -> List[str]:
return list(TTS_MODEL_CONFIGS["bulbul:v2"].speakers)
-def language_to_sarvam_language(language: Language) -> Optional[str]:
+def language_to_sarvam_language(language: Language) -> str | None:
"""Convert Pipecat Language enum to Sarvam AI language codes.
Args:
@@ -291,7 +292,7 @@ class SarvamTTSSettings(SarvamHttpTTSSettings):
Controls memory usage and processing efficiency. Defaults to 150.
"""
- _aliases: ClassVar[Dict[str, str]] = {"target_language_code": "language"}
+ _aliases: ClassVar[dict[str, str]] = {"target_language_code": "language"}
min_buffer_size: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
max_chunk_length: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
@@ -374,30 +375,30 @@ class SarvamHttpTTSService(TTSService):
**Note:** Only supported for bulbul:v3-beta. Ignored for v2.
"""
- language: Optional[Language] = Language.EN
- pitch: Optional[float] = Field(
+ language: Language | None = Language.EN
+ pitch: float | None = Field(
default=0.0,
ge=-0.75,
le=0.75,
description="Voice pitch adjustment. Only for bulbul:v2.",
)
- pace: Optional[float] = Field(
+ pace: float | None = Field(
default=1.0,
ge=0.3,
le=3.0,
description="Speech pace. v2: 0.3-3.0, v3: 0.5-2.0.",
)
- loudness: Optional[float] = Field(
+ loudness: float | None = Field(
default=1.0,
ge=0.3,
le=3.0,
description="Volume multiplier. Only for bulbul:v2.",
)
- enable_preprocessing: Optional[bool] = Field(
+ enable_preprocessing: bool | None = Field(
default=False,
description="Enable text preprocessing. Always enabled for v3-beta model.",
)
- temperature: Optional[float] = Field(
+ temperature: float | None = Field(
default=0.6,
ge=0.01,
le=1.0,
@@ -409,12 +410,12 @@ class SarvamHttpTTSService(TTSService):
*,
api_key: str,
aiohttp_session: aiohttp.ClientSession,
- voice_id: Optional[str] = None,
- model: Optional[str] = None,
+ voice_id: str | None = None,
+ model: str | None = None,
base_url: str = "https://api.sarvam.ai",
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Sarvam TTS service.
@@ -548,7 +549,7 @@ class SarvamHttpTTSService(TTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Sarvam AI language format.
Args:
@@ -754,46 +755,46 @@ class SarvamTTSService(InterruptibleTTSService):
roopa, kabir, aayan, shubh, ashutosh, advait, amelia, sophia
"""
- pitch: Optional[float] = Field(
+ pitch: float | None = Field(
default=0.0,
ge=-0.75,
le=0.75,
description="Voice pitch adjustment. Only for bulbul:v2.",
)
- pace: Optional[float] = Field(
+ pace: float | None = Field(
default=1.0,
ge=0.3,
le=3.0,
description="Speech pace. v2: 0.3-3.0, v3: 0.5-2.0.",
)
- loudness: Optional[float] = Field(
+ loudness: float | None = Field(
default=1.0,
ge=0.3,
le=3.0,
description="Volume multiplier. Only for bulbul:v2.",
)
- enable_preprocessing: Optional[bool] = Field(
+ enable_preprocessing: bool | None = Field(
default=False,
description="Enable text preprocessing. Always enabled for v3 models.",
)
- min_buffer_size: Optional[int] = Field(
+ min_buffer_size: int | None = Field(
default=50,
description="Minimum characters to buffer before TTS processing.",
)
- max_chunk_length: Optional[int] = Field(
+ max_chunk_length: int | None = Field(
default=150,
description="Maximum length for sentence splitting.",
)
- output_audio_codec: Optional[str] = Field(
+ output_audio_codec: str | None = Field(
default="linear16",
description="Audio codec: linear16, mulaw, alaw, opus, flac, aac, wav, mp3.",
)
- output_audio_bitrate: Optional[str] = Field(
+ output_audio_bitrate: str | None = Field(
default="128k",
description="Audio bitrate: 32k, 64k, 96k, 128k, 192k.",
)
- language: Optional[Language] = Language.EN
- temperature: Optional[float] = Field(
+ language: Language | None = Language.EN
+ temperature: float | None = Field(
default=0.6,
ge=0.01,
le=1.0,
@@ -804,14 +805,14 @@ class SarvamTTSService(InterruptibleTTSService):
self,
*,
api_key: str,
- model: Optional[str] = None,
- voice_id: Optional[str] = None,
+ model: str | None = None,
+ voice_id: str | None = None,
url: str = "wss://api.sarvam.ai/text-to-speech/ws",
- aggregate_sentences: Optional[bool] = None,
- text_aggregation_mode: Optional[TextAggregationMode] = None,
- sample_rate: Optional[int] = None,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ aggregate_sentences: bool | None = None,
+ text_aggregation_mode: TextAggregationMode | None = None,
+ sample_rate: int | None = None,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Sarvam TTS service with voice and transport configuration.
@@ -979,7 +980,7 @@ class SarvamTTSService(InterruptibleTTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Sarvam AI language format.
Args:
@@ -1020,7 +1021,7 @@ class SarvamTTSService(InterruptibleTTSService):
await super().cancel(frame)
await self._disconnect()
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio synthesis by sending flush command."""
try:
if self._websocket:
diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py
index 54465b041..585053277 100644
--- a/src/pipecat/services/settings.py
+++ b/src/pipecat/services/settings.py
@@ -37,8 +37,9 @@ Key helpers:
from __future__ import annotations
import copy
+from collections.abc import Mapping
from dataclasses import dataclass, field, fields
-from typing import TYPE_CHECKING, Any, ClassVar, Dict, Mapping, Optional, Type, TypeVar
+from typing import TYPE_CHECKING, Any, ClassVar, TypeVar
from loguru import logger
@@ -65,7 +66,7 @@ class _NotGiven:
``validate_complete()``.
"""
- _instance: Optional[_NotGiven] = None
+ _instance: _NotGiven | None = None
def __new__(cls) -> _NotGiven:
if cls._instance is None:
@@ -153,12 +154,12 @@ class ServiceSettings:
model string or ``None`` if the service has no model concept.
"""
- extra: Dict[str, Any] = field(default_factory=dict)
+ extra: dict[str, Any] = field(default_factory=dict)
"""Catch-all for service-specific keys that have no declared field."""
# -- class-level configuration -------------------------------------------
- _aliases: ClassVar[Dict[str, str]] = {}
+ _aliases: ClassVar[dict[str, str]] = {}
"""Map of alternative key names to canonical field names.
For example ``{"voice_id": "voice"}`` lets callers use either spelling.
@@ -167,7 +168,7 @@ class ServiceSettings:
# -- public API ----------------------------------------------------------
- def given_fields(self) -> Dict[str, Any]:
+ def given_fields(self) -> dict[str, Any]:
"""Return a dict of only the fields that are not ``NOT_GIVEN``.
Primarily useful for delta-mode objects to inspect which fields were
@@ -180,7 +181,7 @@ class ServiceSettings:
Returns:
Dictionary mapping field names to their provided values.
"""
- result: Dict[str, Any] = {}
+ result: dict[str, Any] = {}
for f in fields(self):
if f.name == "extra":
continue
@@ -190,7 +191,7 @@ class ServiceSettings:
result.update(self.extra)
return result
- def apply_update(self: _S, delta: _S) -> Dict[str, Any]:
+ def apply_update(self: _S, delta: _S) -> dict[str, Any]:
"""Merge a delta-mode object into this store-mode object.
Only fields in *delta* that are **given** (i.e. not ``NOT_GIVEN``)
@@ -218,7 +219,7 @@ class ServiceSettings:
# changed == {"voice": "alice"}
# current.voice == "bob", current.language == "en"
"""
- changed: Dict[str, Any] = {}
+ changed: dict[str, Any] = {}
for f in fields(self):
if f.name == "extra":
continue
@@ -240,7 +241,7 @@ class ServiceSettings:
return changed
@classmethod
- def from_mapping(cls: Type[_S], settings: Mapping[str, Any]) -> _S:
+ def from_mapping(cls: type[_S], settings: Mapping[str, Any]) -> _S:
"""Build a **delta-mode** settings object from a plain dictionary.
This exists for backward compatibility with code that passes plain
@@ -266,8 +267,8 @@ class ServiceSettings:
# delta.extra == {"speed": 1.2}
"""
field_names = {f.name for f in fields(cls)} - {"extra"}
- kwargs: Dict[str, Any] = {}
- extra: Dict[str, Any] = {}
+ kwargs: dict[str, Any] = {}
+ extra: dict[str, Any] = {}
for key, value in settings.items():
# Resolve aliases first
@@ -410,7 +411,7 @@ class TTSSettings(ServiceSettings):
voice: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
language: Language | str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
- _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"}
+ _aliases: ClassVar[dict[str, str]] = {"voice_id": "voice"}
@dataclass
diff --git a/src/pipecat/services/simli/video.py b/src/pipecat/services/simli/video.py
index ce681d6e8..d6ca15dc0 100644
--- a/src/pipecat/services/simli/video.py
+++ b/src/pipecat/services/simli/video.py
@@ -8,7 +8,6 @@
import asyncio
from dataclasses import dataclass
-from typing import Optional
import numpy as np
from loguru import logger
@@ -71,9 +70,9 @@ class SimliVideoService(AIService):
before the avatar disconnects.
"""
- enable_logging: Optional[bool] = None
- max_session_length: Optional[int] = None
- max_idle_time: Optional[int] = None
+ enable_logging: bool | None = None
+ max_session_length: int | None = None
+ max_idle_time: int | None = None
def __init__(
self,
@@ -82,11 +81,11 @@ class SimliVideoService(AIService):
face_id: str,
simli_url: str = "https://api.simli.ai",
is_trinity_avatar: bool = False,
- params: Optional[InputParams] = None,
- max_session_length: Optional[int] = None,
- max_idle_time: Optional[int] = None,
- enable_logging: Optional[bool] = None,
- settings: Optional[Settings] = None,
+ params: InputParams | None = None,
+ max_session_length: int | None = None,
+ max_idle_time: int | None = None,
+ enable_logging: bool | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Simli video service.
diff --git a/src/pipecat/services/smallest/stt.py b/src/pipecat/services/smallest/stt.py
index 78fa0e44d..8673bd040 100644
--- a/src/pipecat/services/smallest/stt.py
+++ b/src/pipecat/services/smallest/stt.py
@@ -14,9 +14,10 @@ This module provides a STT service using Smallest AI's Waves API:
import asyncio
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, AsyncGenerator, Optional
+from enum import StrEnum
+from typing import Any
from urllib.parse import urlencode
from loguru import logger
@@ -97,7 +98,7 @@ def language_to_smallest_stt_language(language: Language) -> str:
return resolve_language(language, LANGUAGE_MAP)
-class SmallestSTTModel(str, Enum):
+class SmallestSTTModel(StrEnum):
"""Available Smallest AI STT models."""
PULSE = "pulse"
@@ -156,9 +157,9 @@ class SmallestSTTService(WebsocketSTTService):
api_key: str,
base_url: str = "wss://api.smallest.ai",
encoding: str = "linear16",
- sample_rate: Optional[int] = None,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = SMALLEST_TTFS_P99,
+ sample_rate: int | None = None,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = SMALLEST_TTFS_P99,
**kwargs,
):
"""Initialize the Smallest AI STT service.
@@ -207,7 +208,7 @@ class SmallestSTTService(WebsocketSTTService):
"""Check if this service can generate processing metrics."""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Smallest service language format.
Args:
@@ -406,7 +407,7 @@ class SmallestSTTService(WebsocketSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[str] = None
+ self, transcript: str, is_final: bool, language: str | None = None
):
"""Handle a transcription result with tracing."""
pass
diff --git a/src/pipecat/services/smallest/tts.py b/src/pipecat/services/smallest/tts.py
index 5ef4aaa49..33c492c01 100644
--- a/src/pipecat/services/smallest/tts.py
+++ b/src/pipecat/services/smallest/tts.py
@@ -13,9 +13,10 @@ Waves API for real-time text-to-speech synthesis.
import asyncio
import base64
import json
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, AsyncGenerator, Optional
+from enum import StrEnum
+from typing import Any
from loguru import logger
@@ -43,14 +44,14 @@ except ModuleNotFoundError as e:
raise Exception(f"Missing module: {e}")
-class SmallestTTSModel(str, Enum):
+class SmallestTTSModel(StrEnum):
"""Available Smallest AI TTS models."""
LIGHTNING_V2 = "lightning-v2"
LIGHTNING_V3_1 = "lightning-v3.1"
-def language_to_smallest_tts_language(language: Language) -> Optional[str]:
+def language_to_smallest_tts_language(language: Language) -> str | None:
"""Convert a Language enum to a Smallest TTS language string.
Args:
@@ -125,8 +126,8 @@ class SmallestTTSService(InterruptibleTTSService):
*,
api_key: str,
base_url: str = "wss://waves-api.smallest.ai",
- sample_rate: Optional[int] = None,
- settings: Optional[Settings] = None,
+ sample_rate: int | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Smallest AI WebSocket TTS service.
@@ -173,7 +174,7 @@ class SmallestTTSService(InterruptibleTTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to Smallest service language format.
Args:
@@ -354,7 +355,7 @@ class SmallestTTSService(InterruptibleTTSService):
msg = {"flush": True}
await self._websocket.send(json.dumps(msg))
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any pending audio synthesis."""
if not self._websocket or self._websocket.state is State.CLOSED:
return
diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py
index 5163ef113..66f27d40f 100644
--- a/src/pipecat/services/soniox/stt.py
+++ b/src/pipecat/services/soniox/stt.py
@@ -8,8 +8,9 @@
import json
import time
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import Any, AsyncGenerator, List, Optional
+from typing import Any
from loguru import logger
from pydantic import BaseModel
@@ -70,10 +71,10 @@ class SonioxContextObject(BaseModel):
https://soniox.com/docs/stt/concepts/context
"""
- general: Optional[List[SonioxContextGeneralItem]] = None
- text: Optional[str] = None
- terms: Optional[List[str]] = None
- translation_terms: Optional[List[SonioxContextTranslationTerm]] = None
+ general: list[SonioxContextGeneralItem] | None = None
+ text: str | None = None
+ terms: list[str] | None = None
+ translation_terms: list[SonioxContextTranslationTerm] | None = None
class SonioxInputParams(BaseModel):
@@ -99,17 +100,17 @@ class SonioxInputParams(BaseModel):
model: str = "stt-rt-v4"
- audio_format: Optional[str] = "pcm_s16le"
- num_channels: Optional[int] = 1
+ audio_format: str | None = "pcm_s16le"
+ num_channels: int | None = 1
- language_hints: Optional[List[Language]] = None
- language_hints_strict: Optional[bool] = None
- context: Optional[SonioxContextObject | str] = None
+ language_hints: list[Language] | None = None
+ language_hints_strict: bool | None = None
+ context: SonioxContextObject | str | None = None
- enable_speaker_diarization: Optional[bool] = False
- enable_language_identification: Optional[bool] = False
+ enable_speaker_diarization: bool | None = False
+ enable_language_identification: bool | None = False
- client_reference_id: Optional[str] = None
+ client_reference_id: str | None = None
def is_end_token(token: dict) -> bool:
@@ -190,8 +191,8 @@ def language_to_soniox_language(language: Language) -> str:
def _prepare_language_hints(
- language_hints: Optional[List[Language]],
-) -> Optional[List[str]]:
+ language_hints: list[Language] | None,
+) -> list[str] | None:
if language_hints is None:
return None
@@ -215,7 +216,7 @@ class SonioxSTTSettings(STTSettings):
client_reference_id: Client reference ID to use for transcription.
"""
- language_hints: List[Language] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
+ language_hints: list[Language] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
language_hints_strict: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
context: SonioxContextObject | str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
enable_speaker_diarization: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
@@ -243,14 +244,14 @@ class SonioxSTTService(WebsocketSTTService):
*,
api_key: str,
url: str = "wss://stt-rt.soniox.com/transcribe-websocket",
- sample_rate: Optional[int] = None,
- model: Optional[str] = None,
+ sample_rate: int | None = None,
+ model: str | None = None,
audio_format: str = "pcm_s16le",
num_channels: int = 1,
- params: Optional[SonioxInputParams] = None,
+ params: SonioxInputParams | None = None,
vad_force_turn_endpoint: bool = True,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = SONIOX_TTFS_P99,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = SONIOX_TTFS_P99,
**kwargs,
):
"""Initialize the Soniox STT service.
@@ -337,7 +338,7 @@ class SonioxSTTService(WebsocketSTTService):
self._num_channels = num_channels
self._final_transcription_buffer = []
- self._last_tokens_received: Optional[float] = None
+ self._last_tokens_received: float | None = None
self._receive_task = None
@@ -417,7 +418,7 @@ class SonioxSTTService(WebsocketSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
pass
diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py
index ae8e35850..6f049b29f 100644
--- a/src/pipecat/services/speechmatics/stt.py
+++ b/src/pipecat/services/speechmatics/stt.py
@@ -9,9 +9,10 @@
import asyncio
import os
import warnings
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from enum import Enum
-from typing import Any, AsyncGenerator, ClassVar
+from enum import StrEnum
+from typing import Any, ClassVar
from dotenv import load_dotenv
from loguru import logger
@@ -66,7 +67,7 @@ except ModuleNotFoundError as e:
load_dotenv()
-class TurnDetectionMode(str, Enum):
+class TurnDetectionMode(StrEnum):
"""Endpoint and turn detection handling mode.
How the STT engine handles the endpointing of speech. If using Pipecat's built-in endpointing,
@@ -680,7 +681,7 @@ class SpeechmaticsSTTService(STTService):
try:
if self._client:
await self._client.disconnect()
- except asyncio.TimeoutError:
+ except TimeoutError:
logger.warning(f"{self} timeout while closing Speechmatics client connection")
except Exception as e:
await self.push_error(error_msg=f"Error closing Speechmatics client: {e}", exception=e)
diff --git a/src/pipecat/services/speechmatics/tts.py b/src/pipecat/services/speechmatics/tts.py
index 64f64378a..69db9e2c3 100644
--- a/src/pipecat/services/speechmatics/tts.py
+++ b/src/pipecat/services/speechmatics/tts.py
@@ -7,8 +7,8 @@
"""Speechmatics TTS service integration."""
import asyncio
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import AsyncGenerator, Optional
from urllib.parse import urlencode
import aiohttp
@@ -75,11 +75,11 @@ class SpeechmaticsTTSService(TTSService):
*,
api_key: str,
base_url: str = "https://preview.tts.speechmatics.com",
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
aiohttp_session: aiohttp.ClientSession,
- sample_rate: Optional[int] = SPEECHMATICS_SAMPLE_RATE,
- params: Optional[InputParams] = None,
- settings: Optional[Settings] = None,
+ sample_rate: int | None = SPEECHMATICS_SAMPLE_RATE,
+ params: InputParams | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Speechmatics TTS service.
diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py
index 8fe83be33..74d2f90ea 100644
--- a/src/pipecat/services/stt_service.py
+++ b/src/pipecat/services/stt_service.py
@@ -12,7 +12,8 @@ import time
import warnings
import wave
from abc import abstractmethod
-from typing import Any, AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
+from typing import Any
from loguru import logger
from websockets.protocol import State
@@ -82,12 +83,12 @@ class STTService(AIService):
self,
*,
audio_passthrough=True,
- sample_rate: Optional[int] = None,
+ sample_rate: int | None = None,
stt_ttfb_timeout: float = 2.0,
- ttfs_p99_latency: Optional[float] = None,
- keepalive_timeout: Optional[float] = None,
+ ttfs_p99_latency: float | None = None,
+ keepalive_timeout: float | None = None,
keepalive_interval: float = 5.0,
- settings: Optional[STTSettings] = None,
+ settings: STTSettings | None = None,
**kwargs,
):
"""Initialize the STT service.
@@ -152,7 +153,7 @@ class STTService(AIService):
# STT TTFB tracking state
self._stt_ttfb_timeout = stt_ttfb_timeout
- self._ttfb_timeout_task: Optional[asyncio.Task] = None
+ self._ttfb_timeout_task: asyncio.Task | None = None
self._user_speaking: bool = False
self._finalize_pending: bool = False
self._finalize_requested: bool = False
@@ -161,7 +162,7 @@ class STTService(AIService):
# Keepalive state
self._keepalive_timeout = keepalive_timeout
self._keepalive_interval = keepalive_interval
- self._keepalive_task: Optional[asyncio.Task] = None
+ self._keepalive_task: asyncio.Task | None = None
self._last_audio_time: float = 0
# VAD-aware reconnect state
@@ -261,7 +262,7 @@ class STTService(AIService):
settings_cls = type(self._settings)
await self._update_settings(settings_cls(language=language))
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a language to the service-specific language format.
Args:
@@ -690,7 +691,7 @@ class SegmentedSTTService(STTService):
VAD detection.
"""
- def __init__(self, *, sample_rate: Optional[int] = None, **kwargs):
+ def __init__(self, *, sample_rate: int | None = None, **kwargs):
"""Initialize the segmented STT service.
Args:
diff --git a/src/pipecat/services/tavus/video.py b/src/pipecat/services/tavus/video.py
index a41bee5b4..a12b91f3b 100644
--- a/src/pipecat/services/tavus/video.py
+++ b/src/pipecat/services/tavus/video.py
@@ -12,7 +12,6 @@ avatar functionality through Tavus's streaming API.
import asyncio
from dataclasses import dataclass
-from typing import Optional
import aiohttp
from daily.daily import AudioData, VideoFrame
@@ -69,7 +68,7 @@ class TavusVideoService(AIService):
replica_id: str,
persona_id: str = "pipecat-stream",
session: aiohttp.ClientSession,
- settings: Optional[Settings] = None,
+ settings: Settings | None = None,
**kwargs,
) -> None:
"""Initialize the Tavus video service.
@@ -94,15 +93,15 @@ class TavusVideoService(AIService):
self._persona_id = persona_id
self._other_participant_has_joined = False
- self._client: Optional[TavusTransportClient] = None
+ self._client: TavusTransportClient | None = None
self._conversation_id: str
self._resampler = create_stream_resampler()
self._audio_buffer = bytearray()
- self._send_task: Optional[asyncio.Task] = None
+ self._send_task: asyncio.Task | None = None
# This is the custom track destination expected by Tavus
- self._transport_destination: Optional[str] = "stream"
+ self._transport_destination: str | None = "stream"
self._transport_ready = False
async def setup(self, setup: FrameProcessorSetup):
diff --git a/src/pipecat/services/together/llm.py b/src/pipecat/services/together/llm.py
index 3711daa72..9626d1a8b 100644
--- a/src/pipecat/services/together/llm.py
+++ b/src/pipecat/services/together/llm.py
@@ -7,7 +7,6 @@
"""Together.ai LLM service implementation using OpenAI-compatible interface."""
from dataclasses import dataclass
-from typing import Optional
from loguru import logger
@@ -42,8 +41,8 @@ class TogetherLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://api.together.xyz/v1",
- model: Optional[str] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize Together.ai LLM service.
diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py
index 86a3b91a6..fe4790cbb 100644
--- a/src/pipecat/services/tts_service.py
+++ b/src/pipecat/services/tts_service.py
@@ -10,19 +10,11 @@ import asyncio
import uuid
import warnings
from abc import abstractmethod
+from collections.abc import AsyncGenerator, AsyncIterator, Awaitable, Callable, Sequence
from dataclasses import dataclass
-from enum import Enum
+from enum import StrEnum
from typing import (
Any,
- AsyncGenerator,
- AsyncIterator,
- Awaitable,
- Callable,
- Dict,
- List,
- Optional,
- Sequence,
- Tuple,
)
from loguru import logger
@@ -77,10 +69,10 @@ class TTSContext:
"""
append_to_context: bool = True
- push_assistant_aggregation: Optional[bool] = False
+ push_assistant_aggregation: bool | None = False
-class TextAggregationMode(str, Enum):
+class TextAggregationMode(StrEnum):
"""Controls how incoming text is aggregated before TTS synthesis.
Parameters:
@@ -145,8 +137,8 @@ class TTSService(AIService):
def __init__(
self,
*,
- text_aggregation_mode: Optional[TextAggregationMode] = None,
- aggregate_sentences: Optional[bool] = None,
+ text_aggregation_mode: TextAggregationMode | None = None,
+ aggregate_sentences: bool | None = None,
# if True, TTSService will push TextFrames and LLMFullResponseEndFrames,
# otherwise subclass must do it
push_text_frames: bool = True,
@@ -166,22 +158,21 @@ class TTSService(AIService):
# (helps prevent some TTS services from vocalizing trailing punctuation)
append_trailing_space: bool = False,
# TTS output sample rate
- sample_rate: Optional[int] = None,
+ sample_rate: int | None = None,
# Types of text aggregations that should not be spoken.
- skip_aggregator_types: Optional[List[str]] = [],
+ skip_aggregator_types: list[str] | None = [],
# A list of callables to transform text before just before sending it to TTS.
# Each callable takes the aggregated text and its type, and returns the transformed text.
# To register, provide a list of tuples of (aggregation_type | '*', transform_function).
- text_transforms: Optional[
- List[
- Tuple[AggregationType | str, Callable[[str, str | AggregationType], Awaitable[str]]]
- ]
- ] = None,
+ text_transforms: list[
+ tuple[AggregationType | str, Callable[[str, str | AggregationType], Awaitable[str]]]
+ ]
+ | None = None,
# Text filter executed after text has been aggregated.
- text_filters: Optional[Sequence[BaseTextFilter]] = None,
+ text_filters: Sequence[BaseTextFilter] | None = None,
# Audio transport destination of the generated frames.
- transport_destination: Optional[str] = None,
- settings: Optional[TTSSettings] = None,
+ transport_destination: str | None = None,
+ settings: TTSSettings | None = None,
# if True, the context ID is reused within an LLM turn
reuse_context_id_within_turn: bool = True,
**kwargs,
@@ -288,24 +279,24 @@ class TTSService(AIService):
self._sample_rate = 0
self._text_aggregator = SimpleTextAggregator(aggregation_type=self._text_aggregation_mode)
- self._skip_aggregator_types: List[str] = skip_aggregator_types or []
- self._text_transforms: List[
- Tuple[AggregationType | str, Callable[[str, AggregationType | str], Awaitable[str]]]
+ self._skip_aggregator_types: list[str] = skip_aggregator_types or []
+ self._text_transforms: list[
+ tuple[AggregationType | str, Callable[[str, AggregationType | str], Awaitable[str]]]
] = text_transforms or []
# TODO: Deprecate _text_filters when added to LLMTextProcessor
self._text_filters: Sequence[BaseTextFilter] = text_filters or []
- self._transport_destination: Optional[str] = transport_destination
+ self._transport_destination: str | None = transport_destination
self._resampler = create_stream_resampler()
self._processing_text: bool = False
- self._tts_contexts: Dict[str, TTSContext] = {}
+ self._tts_contexts: dict[str, TTSContext] = {}
self._streamed_text: str = ""
self._text_aggregation_metrics_started: bool = False
# Word timestamp state
self._initial_word_timestamp: int = -1
- self._initial_word_times: List[Tuple[str, float, Optional[str]]] = []
+ self._initial_word_times: list[tuple[str, float, str | None]] = []
# PTS of the last word frame pushed via _add_word_timestamps, used to assign
# correct PTS to TTSStoppedFrame and LLMFullResponseEndFrame.
self._word_last_pts: int = 0
@@ -327,10 +318,10 @@ class TTSService(AIService):
# they clear at different times: _turn_context_id is cleared when the LLM turn
# ends (synthesis done) while _playing_context_id remains set until the audio
# finishes playing. Merging them would null out the playback cursor prematurely.
- self._playing_context_id: Optional[str] = None
- self._turn_context_id: Optional[str] = None
- self._audio_contexts: Dict[str, asyncio.Queue] = {}
- self._audio_context_task: Optional[asyncio.Task] = None
+ self._playing_context_id: str | None = None
+ self._turn_context_id: str | None = None
+ self._audio_contexts: dict[str, asyncio.Queue] = {}
+ self._audio_context_task: asyncio.Task | None = None
self._register_event_handler("on_connected")
self._register_event_handler("on_disconnected")
@@ -467,7 +458,7 @@ class TTSService(AIService):
"""
pass
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a language to the service-specific language format.
Args:
@@ -491,7 +482,7 @@ class TTSService(AIService):
return text + " "
return text
- async def flush_audio(self, context_id: Optional[str] = None):
+ async def flush_audio(self, context_id: str | None = None):
"""Flush any buffered audio data.
Args:
@@ -793,8 +784,8 @@ class TTSService(AIService):
iterator: AsyncIterator[bytes],
*,
strip_wav_header: bool = False,
- in_sample_rate: Optional[int] = None,
- context_id: Optional[str] = None,
+ in_sample_rate: int | None = None,
+ context_id: str | None = None,
) -> AsyncGenerator[Frame, None]:
"""Stream audio frames from an async byte iterator with optional resampling.
@@ -896,9 +887,9 @@ class TTSService(AIService):
async def _push_tts_frames(
self,
src_frame: AggregatedTextFrame,
- includes_inter_frame_spaces: Optional[bool] = False,
- append_tts_text_to_context: Optional[bool] = True,
- push_assistant_aggregation: Optional[bool] = False,
+ includes_inter_frame_spaces: bool | None = False,
+ append_tts_text_to_context: bool | None = True,
+ push_assistant_aggregation: bool | None = False,
):
type = src_frame.aggregated_by
text = src_frame.text
@@ -1069,7 +1060,7 @@ class TTSService(AIService):
self._initial_word_times = []
async def add_word_timestamps(
- self, word_times: List[Tuple[str, float]], context_id: Optional[str] = None
+ self, word_times: list[tuple[str, float]], context_id: str | None = None
):
"""Add word timestamps for processing.
@@ -1096,7 +1087,7 @@ class TTSService(AIService):
await self._add_word_timestamps(word_times=word_times, context_id=context_id)
async def _add_word_timestamps(
- self, word_times: List[Tuple[str, float]], context_id: Optional[str] = None
+ self, word_times: list[tuple[str, float]], context_id: str | None = None
):
"""Process word timestamps directly, building and pushing TTSTextFrames inline.
@@ -1194,11 +1185,11 @@ class TTSService(AIService):
self._playing_context_id
)
- def get_audio_contexts(self) -> List[str]:
+ def get_audio_contexts(self) -> list[str]:
"""Get a list of all available audio contexts."""
return list(self._audio_contexts.keys())
- def get_active_audio_context_id(self) -> Optional[str]:
+ def get_active_audio_context_id(self) -> str | None:
"""Get the active audio context ID.
Returns:
@@ -1242,7 +1233,7 @@ class TTSService(AIService):
# must be emitted in-order relative to surrounding audio contexts.
# None – shutdown sentinel (sent by stop()).
self._serialization_queue: asyncio.Queue = asyncio.Queue()
- self._audio_contexts: Dict[str, asyncio.Queue] = {}
+ self._audio_contexts: dict[str, asyncio.Queue] = {}
self._audio_context_task = self.create_task(self._audio_context_task_handler())
async def _stop_audio_context_task(self):
@@ -1342,7 +1333,7 @@ class TTSService(AIService):
await self.push_error_frame(frame)
else:
await self.push_frame(frame)
- except asyncio.TimeoutError:
+ except TimeoutError:
# We didn't get audio, so let's consider this context finished.
logger.trace(f"{self} time out on audio context {context_id}")
if should_push_stop_frame and self._push_stop_frames:
diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py
index 264241ca7..935562dc1 100644
--- a/src/pipecat/services/ultravox/llm.py
+++ b/src/pipecat/services/ultravox/llm.py
@@ -16,7 +16,7 @@ import datetime
import json
import uuid
from dataclasses import dataclass, field
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, Literal
import aiohttp
from loguru import logger
@@ -94,13 +94,13 @@ class AgentInputParams(BaseModel):
api_key: str
agent_id: uuid.UUID
- template_context: Dict[str, Any] = Field(default_factory=dict)
- metadata: Dict[str, str] = Field(default_factory=dict)
- output_medium: Optional[Literal["text", "voice"]] = None
- max_duration: Optional[datetime.timedelta] = Field(
+ template_context: dict[str, Any] = Field(default_factory=dict)
+ metadata: dict[str, str] = Field(default_factory=dict)
+ output_medium: Literal["text", "voice"] | None = None
+ max_duration: datetime.timedelta | None = Field(
default=None, ge=datetime.timedelta(seconds=10), le=datetime.timedelta(hours=1)
)
- extra: Dict[str, Any] = Field(default_factory=dict)
+ extra: dict[str, Any] = Field(default_factory=dict)
class OneShotInputParams(BaseModel):
@@ -122,18 +122,18 @@ class OneShotInputParams(BaseModel):
"""
api_key: str
- system_prompt: Optional[str] = None
+ system_prompt: str | None = None
temperature: float = Field(default=0.0, ge=0.0, le=1.0)
- model: Optional[str] = None
- voice: Optional[uuid.UUID] = None
- metadata: Dict[str, str] = Field(default_factory=dict)
- output_medium: Optional[Literal["text", "voice"]] = None
+ model: str | None = None
+ voice: uuid.UUID | None = None
+ metadata: dict[str, str] = Field(default_factory=dict)
+ output_medium: Literal["text", "voice"] | None = None
max_duration: datetime.timedelta = Field(
default=datetime.timedelta(hours=1),
ge=datetime.timedelta(seconds=10),
le=datetime.timedelta(hours=1),
)
- extra: Dict[str, Any] = Field(default_factory=dict)
+ extra: dict[str, Any] = Field(default_factory=dict)
class JoinUrlInputParams(BaseModel):
@@ -163,9 +163,9 @@ class UltravoxRealtimeLLMService(LLMService):
def __init__(
self,
*,
- params: Union[AgentInputParams, OneShotInputParams, JoinUrlInputParams],
- settings: Optional[Settings] = None,
- one_shot_selected_tools: Optional[ToolsSchema] = None,
+ params: AgentInputParams | OneShotInputParams | JoinUrlInputParams,
+ settings: Settings | None = None,
+ one_shot_selected_tools: ToolsSchema | None = None,
**kwargs,
):
"""Initialize the Ultravox Realtime LLM service.
@@ -213,11 +213,11 @@ class UltravoxRealtimeLLMService(LLMService):
else:
self._selected_tools = one_shot_selected_tools
- self._socket: Optional[websocket_client.ClientConnection] = None
- self._receive_task: Optional[asyncio.Task] = None
+ self._socket: websocket_client.ClientConnection | None = None
+ self._receive_task: asyncio.Task | None = None
self._disconnecting = False
self._bot_responding: Literal[None, "text", "voice"] = None
- self._last_user_id: Optional[str] = None
+ self._last_user_id: str | None = None
self._sample_rate = 48000
self._resampler = create_stream_resampler()
@@ -258,7 +258,7 @@ class UltravoxRealtimeLLMService(LLMService):
await self.push_error("Failed to connect to Ultravox", e, fatal=True)
@staticmethod
- def _output_medium_to_api(medium: Optional[Literal["text", "voice"]]) -> Optional[str]:
+ def _output_medium_to_api(medium: Literal["text", "voice"] | None) -> str | None:
if medium == "text":
return "MESSAGE_MEDIUM_TEXT"
elif medium == "voice":
@@ -324,8 +324,8 @@ class UltravoxRealtimeLLMService(LLMService):
raise Exception(f"Ultravox API error {response.status}: {error_text}")
return (await response.json())["joinUrl"]
- def _to_selected_tools(self, tool: ToolsSchema) -> List[Dict[str, Any]]:
- result: List[Dict[str, Any]] = []
+ def _to_selected_tools(self, tool: ToolsSchema) -> list[dict[str, Any]]:
+ result: list[dict[str, Any]] = []
for standard_tool in tool.standard_tools:
result.append(
{
@@ -476,7 +476,7 @@ class UltravoxRealtimeLLMService(LLMService):
return
await self._send({"type": "set_output_medium", "medium": output_medium})
- async def _send(self, content: Union[bytes, Dict[str, Any]]):
+ async def _send(self, content: bytes | dict[str, Any]):
"""Send content via the WebSocket connection.
Args:
@@ -565,7 +565,7 @@ class UltravoxRealtimeLLMService(LLMService):
self._bot_responding = None
async def _handle_tool_invocation(
- self, tool_name: str, invocation_id: str, parameters: Dict[str, Any]
+ self, tool_name: str, invocation_id: str, parameters: dict[str, Any]
):
await self.run_function_calls(
[
@@ -590,7 +590,7 @@ class UltravoxRealtimeLLMService(LLMService):
)
async def _handle_agent_transcript(
- self, medium: str, text: Optional[str], delta: Optional[str], final: bool
+ self, medium: str, text: str | None, delta: str | None, final: bool
):
if medium == "voice":
# In voice mode, audio is handled by _handle_audio(). Here we push
diff --git a/src/pipecat/services/vision_service.py b/src/pipecat/services/vision_service.py
index 572f3b423..74d70f1d4 100644
--- a/src/pipecat/services/vision_service.py
+++ b/src/pipecat/services/vision_service.py
@@ -12,7 +12,7 @@ visual content.
"""
from abc import abstractmethod
-from typing import AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
from pipecat.frames.frames import Frame, UserImageRawFrame
from pipecat.processors.frame_processor import FrameDirection
@@ -28,7 +28,7 @@ class VisionService(AIService):
with the AI service infrastructure for metrics and lifecycle management.
"""
- def __init__(self, *, settings: Optional[VisionSettings] = None, **kwargs):
+ def __init__(self, *, settings: VisionSettings | None = None, **kwargs):
"""Initialize the vision service.
Args:
diff --git a/src/pipecat/services/websocket_service.py b/src/pipecat/services/websocket_service.py
index 9258aa90c..83ddb3746 100644
--- a/src/pipecat/services/websocket_service.py
+++ b/src/pipecat/services/websocket_service.py
@@ -9,7 +9,7 @@
import asyncio
import time
from abc import ABC, abstractmethod
-from typing import Awaitable, Callable, Optional
+from collections.abc import Awaitable, Callable
import websockets
from loguru import logger
@@ -42,7 +42,7 @@ class WebsocketService(ABC):
reconnect_on_error: Whether to automatically reconnect on connection errors.
**kwargs: Additional arguments (unused, for compatibility).
"""
- self._websocket: Optional[websockets.WebSocketClientProtocol] = None
+ self._websocket: websockets.WebSocketClientProtocol | None = None
self._reconnect_on_error = reconnect_on_error
self._reconnect_in_progress: bool = False
self._disconnecting: bool = False
@@ -81,7 +81,7 @@ class WebsocketService(ABC):
async def _try_reconnect(
self,
max_retries: int = 3,
- report_error: Optional[Callable[[ErrorFrame], Awaitable[None]]] = None,
+ report_error: Callable[[ErrorFrame], Awaitable[None]] | None = None,
) -> bool:
# Prevent concurrent reconnection attempts
if self._reconnect_in_progress:
@@ -89,7 +89,7 @@ class WebsocketService(ABC):
return False
self._reconnect_in_progress = True
- last_exception: Optional[Exception] = None
+ last_exception: Exception | None = None
try:
for attempt in range(1, max_retries + 1):
try:
@@ -136,7 +136,7 @@ class WebsocketService(ABC):
self,
error_message: str,
report_error: Callable[[ErrorFrame], Awaitable[None]],
- error: Optional[Exception] = None,
+ error: Exception | None = None,
) -> bool:
"""Check if reconnection should be attempted and try if appropriate.
diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py
index a891253ce..9ac84c41c 100644
--- a/src/pipecat/services/whisper/base_stt.py
+++ b/src/pipecat/services/whisper/base_stt.py
@@ -10,8 +10,8 @@ This module provides common functionality for services implementing the Whisper
interface, including language mapping, metrics generation, and error handling.
"""
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
-from typing import AsyncGenerator, Optional
from loguru import logger
from openai import AsyncOpenAI
@@ -40,7 +40,7 @@ class BaseWhisperSTTSettings(STTSettings):
temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN)
-def language_to_whisper_language(language: Language) -> Optional[str]:
+def language_to_whisper_language(language: Language) -> str | None:
"""Maps pipecat Language enum to Whisper API language codes.
Language support for Whisper API.
@@ -128,16 +128,16 @@ class BaseWhisperSTTService(SegmentedSTTService):
def __init__(
self,
*,
- model: Optional[str] = None,
- api_key: Optional[str] = None,
- base_url: Optional[str] = None,
- language: Optional[Language] = None,
- prompt: Optional[str] = None,
- temperature: Optional[float] = None,
+ model: str | None = None,
+ api_key: str | None = None,
+ base_url: str | None = None,
+ language: Language | None = None,
+ prompt: str | None = None,
+ temperature: float | None = None,
include_prob_metrics: bool = False,
push_empty_transcripts: bool = False,
- settings: Optional[Settings] = None,
- ttfs_p99_latency: Optional[float] = WHISPER_TTFS_P99,
+ settings: Settings | None = None,
+ ttfs_p99_latency: float | None = WHISPER_TTFS_P99,
**kwargs,
):
"""Initialize the Whisper STT service.
@@ -217,7 +217,7 @@ class BaseWhisperSTTService(SegmentedSTTService):
self._include_prob_metrics = include_prob_metrics
self._push_empty_transcripts = push_empty_transcripts
- def _create_client(self, api_key: Optional[str], base_url: Optional[str]):
+ def _create_client(self, api_key: str | None, base_url: str | None):
return AsyncOpenAI(api_key=api_key, base_url=base_url)
def can_generate_metrics(self) -> bool:
@@ -228,7 +228,7 @@ class BaseWhisperSTTService(SegmentedSTTService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert from pipecat Language to service language code.
Args:
@@ -241,7 +241,7 @@ class BaseWhisperSTTService(SegmentedSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
pass
diff --git a/src/pipecat/services/whisper/stt.py b/src/pipecat/services/whisper/stt.py
index ac5d90c30..7d92009d1 100644
--- a/src/pipecat/services/whisper/stt.py
+++ b/src/pipecat/services/whisper/stt.py
@@ -11,13 +11,14 @@ supporting both Faster Whisper and MLX Whisper backends for efficient inference.
"""
import asyncio
+from collections.abc import AsyncGenerator
from dataclasses import dataclass, field
from enum import Enum
-from typing import AsyncGenerator, Optional
+from typing import TYPE_CHECKING
import numpy as np
from loguru import logger
-from typing_extensions import TYPE_CHECKING, override
+from typing_extensions import override
from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame
from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven
@@ -96,7 +97,7 @@ class MLXModel(Enum):
LARGE_V3_TURBO_Q4 = "mlx-community/whisper-large-v3-turbo-q4"
-def language_to_whisper_language(language: Language) -> Optional[str]:
+def language_to_whisper_language(language: Language) -> str | None:
"""Maps pipecat Language enum to Whisper language codes.
Args:
@@ -213,12 +214,12 @@ class WhisperSTTService(SegmentedSTTService):
def __init__(
self,
*,
- model: Optional[str | Model] = None,
+ model: str | Model | None = None,
device: str = "auto",
compute_type: str = "default",
- no_speech_prob: Optional[float] = None,
- language: Optional[Language] = None,
- settings: Optional[Settings] = None,
+ no_speech_prob: float | None = None,
+ language: Language | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the Whisper STT service.
@@ -280,7 +281,7 @@ class WhisperSTTService(SegmentedSTTService):
self._device = device
self._compute_type = compute_type
- self._model: Optional[WhisperModel] = None
+ self._model: WhisperModel | None = None
self._load()
@@ -292,7 +293,7 @@ class WhisperSTTService(SegmentedSTTService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert from pipecat Language to Whisper language code.
Args:
@@ -325,7 +326,7 @@ class WhisperSTTService(SegmentedSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
pass
@@ -387,11 +388,11 @@ class WhisperSTTServiceMLX(WhisperSTTService):
def __init__(
self,
*,
- model: Optional[str | MLXModel] = None,
- no_speech_prob: Optional[float] = None,
- language: Optional[Language] = None,
- temperature: Optional[float] = None,
- settings: Optional[Settings] = None,
+ model: str | MLXModel | None = None,
+ no_speech_prob: float | None = None,
+ language: Language | None = None,
+ temperature: float | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the MLX Whisper STT service.
@@ -466,7 +467,7 @@ class WhisperSTTServiceMLX(WhisperSTTService):
@traced_stt
async def _handle_transcription(
- self, transcript: str, is_final: bool, language: Optional[Language] = None
+ self, transcript: str, is_final: bool, language: Language | None = None
):
"""Handle a transcription result with tracing."""
pass
diff --git a/src/pipecat/services/whisper/utils.py b/src/pipecat/services/whisper/utils.py
index ac7e0cca5..182418e3c 100644
--- a/src/pipecat/services/whisper/utils.py
+++ b/src/pipecat/services/whisper/utils.py
@@ -7,12 +7,11 @@
"""Utility functions for extracting probability metrics from STT services."""
import math
-from typing import Optional
from pipecat.frames.frames import TranscriptionFrame
-def extract_whisper_probability(frame: TranscriptionFrame) -> Optional[float]:
+def extract_whisper_probability(frame: TranscriptionFrame) -> float | None:
"""Extract probability from Whisper-based TranscriptionFrame result.
Works with Groq, OpenAI Whisper, or other Whisper-based services that use
@@ -53,7 +52,7 @@ def extract_whisper_probability(frame: TranscriptionFrame) -> Optional[float]:
return None
-def extract_openai_gpt4o_probability(frame: TranscriptionFrame) -> Optional[float]:
+def extract_openai_gpt4o_probability(frame: TranscriptionFrame) -> float | None:
"""Extract probability from OpenAI GPT-4o-transcribe TranscriptionFrame result.
Args:
@@ -90,7 +89,7 @@ def extract_openai_gpt4o_probability(frame: TranscriptionFrame) -> Optional[floa
return None
-def extract_deepgram_probability(frame: TranscriptionFrame) -> Optional[float]:
+def extract_deepgram_probability(frame: TranscriptionFrame) -> float | None:
"""Extract probability from Deepgram TranscriptionFrame result.
Args:
diff --git a/src/pipecat/services/xai/llm.py b/src/pipecat/services/xai/llm.py
index 0bbfb62b3..e0d84373f 100644
--- a/src/pipecat/services/xai/llm.py
+++ b/src/pipecat/services/xai/llm.py
@@ -12,7 +12,6 @@ and context aggregation functionality.
"""
from dataclasses import dataclass
-from typing import Optional
from loguru import logger
@@ -48,8 +47,8 @@ class GrokLLMService(OpenAILLMService):
*,
api_key: str,
base_url: str = "https://api.x.ai/v1",
- model: Optional[str] = None,
- settings: Optional[Settings] = None,
+ model: str | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the GrokLLMService with API key and model.
diff --git a/src/pipecat/services/xai/realtime/events.py b/src/pipecat/services/xai/realtime/events.py
index 1f89a92f7..c5e4ab755 100644
--- a/src/pipecat/services/xai/realtime/events.py
+++ b/src/pipecat/services/xai/realtime/events.py
@@ -12,7 +12,7 @@ https://docs.x.ai/docs/guides/voice/agent
import json
import uuid
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
@@ -82,7 +82,7 @@ class TurnDetection(BaseModel):
type: Detection type, must be "server_vad" or None for manual.
"""
- type: Optional[Literal["server_vad"]] = "server_vad"
+ type: Literal["server_vad"] | None = "server_vad"
#
@@ -97,7 +97,7 @@ class AudioInput(BaseModel):
format: The format configuration for input audio.
"""
- format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None
+ format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None
class AudioOutput(BaseModel):
@@ -107,7 +107,7 @@ class AudioOutput(BaseModel):
format: The format configuration for output audio.
"""
- format: Optional[Union[PCMAudioFormat, PCMUAudioFormat, PCMAAudioFormat]] = None
+ format: PCMAudioFormat | PCMUAudioFormat | PCMAAudioFormat | None = None
class AudioConfiguration(BaseModel):
@@ -118,8 +118,8 @@ class AudioConfiguration(BaseModel):
output: Configuration for output audio.
"""
- input: Optional[AudioInput] = None
- output: Optional[AudioOutput] = None
+ input: AudioInput | None = None
+ output: AudioOutput | None = None
#
@@ -147,7 +147,7 @@ class XSearchTool(BaseModel):
"""
type: Literal["x_search"] = "x_search"
- allowed_x_handles: Optional[List[str]] = None
+ allowed_x_handles: list[str] | None = None
class FileSearchTool(BaseModel):
@@ -162,8 +162,8 @@ class FileSearchTool(BaseModel):
"""
type: Literal["file_search"] = "file_search"
- vector_store_ids: List[str]
- max_num_results: Optional[int] = 10
+ vector_store_ids: list[str]
+ max_num_results: int | None = 10
class FunctionTool(BaseModel):
@@ -179,11 +179,11 @@ class FunctionTool(BaseModel):
type: Literal["function"] = "function"
name: str
description: str
- parameters: Dict[str, Any]
+ parameters: dict[str, Any]
# Union type for all Grok tools
-GrokTool = Union[WebSearchTool, XSearchTool, FileSearchTool, FunctionTool, Dict[str, Any]]
+GrokTool = WebSearchTool | XSearchTool | FileSearchTool | FunctionTool | dict[str, Any]
#
@@ -215,14 +215,14 @@ class SessionProperties(BaseModel):
# Needed to support ToolSchema in tools field.
model_config = ConfigDict(arbitrary_types_allowed=True)
- instructions: Optional[str] = None
- voice: Optional[GrokVoice | str] = "Ara"
- turn_detection: Optional[TurnDetection] = Field(
+ instructions: str | None = None
+ voice: GrokVoice | str | None = "Ara"
+ turn_detection: TurnDetection | None = Field(
default_factory=lambda: TurnDetection(type="server_vad")
)
- audio: Optional[AudioConfiguration] = None
+ audio: AudioConfiguration | None = None
# Tools can be ToolsSchema when provided by user, or list of dicts for API
- tools: Optional[ToolsSchema | List[GrokTool]] = None
+ tools: ToolsSchema | list[GrokTool] | None = None
#
@@ -241,9 +241,9 @@ class ItemContent(BaseModel):
"""
type: Literal["text", "audio", "input_text", "input_audio", "output_text", "output_audio"]
- text: Optional[str] = None
- audio: Optional[str] = None # base64-encoded audio
- transcript: Optional[str] = None
+ text: str | None = None
+ audio: str | None = None # base64-encoded audio
+ transcript: str | None = None
class ConversationItem(BaseModel):
@@ -263,15 +263,15 @@ class ConversationItem(BaseModel):
"""
id: str = Field(default_factory=lambda: str(uuid.uuid4().hex))
- object: Optional[Literal["realtime.item"]] = None
+ object: Literal["realtime.item"] | None = None
type: Literal["message", "function_call", "function_call_output"]
- status: Optional[Literal["completed", "in_progress", "incomplete"]] = None
- role: Optional[Literal["user", "assistant", "system", "tool"]] = None
- content: Optional[List[ItemContent]] = None
- call_id: Optional[str] = None
- name: Optional[str] = None
- arguments: Optional[str] = None
- output: Optional[str] = None
+ status: Literal["completed", "in_progress", "incomplete"] | None = None
+ role: Literal["user", "assistant", "system", "tool"] | None = None
+ content: list[ItemContent] | None = None
+ call_id: str | None = None
+ name: str | None = None
+ arguments: str | None = None
+ output: str | None = None
class RealtimeConversation(BaseModel):
@@ -293,7 +293,7 @@ class ResponseProperties(BaseModel):
modalities: Output modalities for the response (text, audio, or both).
"""
- modalities: Optional[List[Literal["text", "audio"]]] = ["text", "audio"]
+ modalities: list[Literal["text", "audio"]] | None = ["text", "audio"]
#
@@ -312,11 +312,11 @@ class RealtimeError(BaseModel):
event_id: Event ID associated with the error, if applicable.
"""
- type: Optional[str] = None
- code: Optional[str] = ""
+ type: str | None = None
+ code: str | None = ""
message: str
- param: Optional[str] = None
- event_id: Optional[str] = None
+ param: str | None = None
+ event_id: str | None = None
#
@@ -390,7 +390,7 @@ class ConversationItemCreateEvent(ClientEvent):
"""
type: Literal["conversation.item.create"] = "conversation.item.create"
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
item: ConversationItem
@@ -403,7 +403,7 @@ class ResponseCreateEvent(ClientEvent):
"""
type: Literal["response.create"] = "response.create"
- response: Optional[ResponseProperties] = None
+ response: ResponseProperties | None = None
class ResponseCancelEvent(ClientEvent):
@@ -471,7 +471,7 @@ class ConversationItemAdded(ServerEvent):
"""
type: Literal["conversation.item.added"]
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
item: ConversationItem
@@ -527,7 +527,7 @@ class InputAudioBufferCommitted(ServerEvent):
"""
type: Literal["input_audio_buffer.committed"]
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
item_id: str
@@ -646,11 +646,11 @@ class ResponseFunctionCallArgumentsDelta(ServerEvent):
"""
type: Literal["response.function_call_arguments.delta"]
- response_id: Optional[str] = None
- item_id: Optional[str] = None
+ response_id: str | None = None
+ item_id: str | None = None
call_id: str
delta: str
- previous_item_id: Optional[str] = None
+ previous_item_id: str | None = None
class ResponseFunctionCallArgumentsDone(ServerEvent):
@@ -680,9 +680,9 @@ class Usage(BaseModel):
output_tokens: Number of output tokens used.
"""
- total_tokens: Optional[int] = None
- input_tokens: Optional[int] = None
- output_tokens: Optional[int] = None
+ total_tokens: int | None = None
+ input_tokens: int | None = None
+ output_tokens: int | None = None
class Response(BaseModel):
@@ -699,9 +699,9 @@ class Response(BaseModel):
id: str
object: Literal["realtime.response"]
status: Literal["completed", "in_progress", "incomplete", "cancelled", "failed"]
- status_details: Optional[Any] = None
- output: List[ConversationItem]
- usage: Optional[Usage] = None
+ status_details: Any | None = None
+ output: list[ConversationItem]
+ usage: Usage | None = None
class ResponseCreated(ServerEvent):
@@ -727,7 +727,7 @@ class ResponseDone(ServerEvent):
type: Literal["response.done"]
response: Response
- usage: Optional[Usage] = None
+ usage: Usage | None = None
class ResponseOutputItemDone(ServerEvent):
@@ -755,7 +755,7 @@ class ContentPart(BaseModel):
"""
type: str
- transcript: Optional[str] = None
+ transcript: str | None = None
class ResponseContentPartAdded(ServerEvent):
diff --git a/src/pipecat/services/xai/realtime/llm.py b/src/pipecat/services/xai/realtime/llm.py
index 8c7175b0a..3b2620a1f 100644
--- a/src/pipecat/services/xai/realtime/llm.py
+++ b/src/pipecat/services/xai/realtime/llm.py
@@ -13,9 +13,10 @@ https://docs.x.ai/docs/guides/voice/agent
import base64
import json
import time
+from collections.abc import Mapping
from dataclasses import dataclass, field
from dataclasses import fields as dataclass_fields
-from typing import Any, Dict, Mapping, Optional, Type
+from typing import Any
from loguru import logger
@@ -110,7 +111,7 @@ class GrokRealtimeLLMSettings(LLMSettings):
# -- apply_update override -----------------------------------------------
- def apply_update(self, delta: "GrokRealtimeLLMService.Settings") -> Dict[str, Any]:
+ def apply_update(self, delta: "GrokRealtimeLLMService.Settings") -> dict[str, Any]:
"""Merge a delta, keeping ``system_instruction`` in sync with SP.
When the delta contains ``session_properties``, it **replaces** the
@@ -142,7 +143,7 @@ class GrokRealtimeLLMSettings(LLMSettings):
@classmethod
def from_mapping(
- cls: Type["GrokRealtimeLLMService.Settings"], settings: Mapping[str, Any]
+ cls: type["GrokRealtimeLLMService.Settings"], settings: Mapping[str, Any]
) -> "GrokRealtimeLLMService.Settings":
"""Build a delta from a plain dict, routing SP keys into ``session_properties``.
@@ -153,9 +154,9 @@ class GrokRealtimeLLMSettings(LLMSettings):
# Determine which keys belong to our own dataclass fields.
own_field_names = {f.name for f in dataclass_fields(cls)} - {"extra"}
- top: Dict[str, Any] = {}
- sp_dict: Dict[str, Any] = {}
- extra: Dict[str, Any] = {}
+ top: dict[str, Any] = {}
+ sp_dict: dict[str, Any] = {}
+ extra: dict[str, Any] = {}
sp_keys = set(events.SessionProperties.model_fields.keys())
@@ -204,8 +205,8 @@ class GrokRealtimeLLMService(LLMService):
*,
api_key: str,
base_url: str = "wss://api.x.ai/v1/realtime",
- session_properties: Optional[events.SessionProperties] = None,
- settings: Optional[Settings] = None,
+ session_properties: events.SessionProperties | None = None,
+ settings: Settings | None = None,
start_audio_paused: bool = False,
**kwargs,
):
@@ -308,7 +309,7 @@ class GrokRealtimeLLMService(LLMService):
"""
self._audio_input_paused = paused
- def _get_configured_sample_rate(self, direction: str) -> Optional[int]:
+ def _get_configured_sample_rate(self, direction: str) -> int | None:
"""Get manually configured sample rate for input or output.
Args:
diff --git a/src/pipecat/services/xai/tts.py b/src/pipecat/services/xai/tts.py
index c580ce912..17f67cf9a 100644
--- a/src/pipecat/services/xai/tts.py
+++ b/src/pipecat/services/xai/tts.py
@@ -10,8 +10,8 @@ Uses xAI's HTTP TTS endpoint documented at:
https://docs.x.ai/developers/model-capabilities/audio/text-to-speech
"""
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import AsyncGenerator, Optional
import aiohttp
from loguru import logger
@@ -23,7 +23,7 @@ from pipecat.transcriptions.language import Language, resolve_language
from pipecat.utils.tracing.service_decorators import traced_tts
-def language_to_xai_language(language: Language) -> Optional[str]:
+def language_to_xai_language(language: Language) -> str | None:
"""Convert a Language enum to xAI language code.
Args:
@@ -83,10 +83,10 @@ class XAIHttpTTSService(TTSService):
*,
api_key: str,
base_url: str = "https://api.x.ai/v1/tts",
- sample_rate: Optional[int] = None,
- encoding: Optional[str] = "pcm",
- aiohttp_session: Optional[aiohttp.ClientSession] = None,
- settings: Optional[Settings] = None,
+ sample_rate: int | None = None,
+ encoding: str | None = "pcm",
+ aiohttp_session: aiohttp.ClientSession | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the xAI TTS service.
@@ -127,7 +127,7 @@ class XAIHttpTTSService(TTSService):
"""Check if this service can generate processing metrics."""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to xAI language format.
Args:
diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py
index b164f8945..c39d79b0b 100644
--- a/src/pipecat/services/xtts/tts.py
+++ b/src/pipecat/services/xtts/tts.py
@@ -10,8 +10,9 @@ This module provides integration with Coqui XTTS streaming server for
text-to-speech synthesis using local Docker deployment.
"""
+from collections.abc import AsyncGenerator
from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Dict, Optional
+from typing import Any
import aiohttp
from loguru import logger
@@ -36,7 +37,7 @@ from pipecat.utils.tracing.service_decorators import traced_tts
# https://github.com/coqui-ai/xtts-streaming-server
-def language_to_xtts_language(language: Language) -> Optional[str]:
+def language_to_xtts_language(language: Language) -> str | None:
"""Convert a Language enum to XTTS language code.
Args:
@@ -89,12 +90,12 @@ class XTTSService(TTSService):
def __init__(
self,
*,
- voice_id: Optional[str] = None,
+ voice_id: str | None = None,
base_url: str,
aiohttp_session: aiohttp.ClientSession,
language: Language = Language.EN,
- sample_rate: Optional[int] = None,
- settings: Optional[Settings] = None,
+ sample_rate: int | None = None,
+ settings: Settings | None = None,
**kwargs,
):
"""Initialize the XTTS service.
@@ -149,7 +150,7 @@ class XTTSService(TTSService):
# Init-only fields (not runtime-updatable)
self._base_url = base_url
- self._studio_speakers: Optional[Dict[str, Any]] = None
+ self._studio_speakers: dict[str, Any] | None = None
self._aiohttp_session = aiohttp_session
self._resampler = create_stream_resampler()
@@ -162,7 +163,7 @@ class XTTSService(TTSService):
"""
return True
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
"""Convert a Language enum to XTTS service language format.
Args:
diff --git a/src/pipecat/tests/utils.py b/src/pipecat/tests/utils.py
index c837a58b7..23df0ff81 100644
--- a/src/pipecat/tests/utils.py
+++ b/src/pipecat/tests/utils.py
@@ -7,8 +7,8 @@
"""Testing utilities for Pipecat pipeline components."""
import asyncio
+from collections.abc import Awaitable, Callable, Sequence
from dataclasses import dataclass
-from typing import Awaitable, Callable, List, Optional, Sequence, Tuple
from pipecat.frames.frames import (
EndFrame,
@@ -124,15 +124,15 @@ async def run_test(
processor: FrameProcessor,
*,
enable_rtvi: bool = False,
- expected_down_frames: Optional[Sequence[type]] = None,
- expected_up_frames: Optional[Sequence[type]] = None,
+ expected_down_frames: Sequence[type] | None = None,
+ expected_up_frames: Sequence[type] | None = None,
frames_to_send: Sequence[Frame],
frames_to_send_direction: FrameDirection = FrameDirection.DOWNSTREAM,
ignore_start: bool = True,
- observers: Optional[List[BaseObserver]] = None,
- pipeline_params: Optional[PipelineParams] = None,
+ observers: list[BaseObserver] | None = None,
+ pipeline_params: PipelineParams | None = None,
send_end_frame: bool = True,
-) -> Tuple[Sequence[Frame], Sequence[Frame]]:
+) -> tuple[Sequence[Frame], Sequence[Frame]]:
"""Run a test pipeline with the specified processor and validate frame flow.
This function creates a test pipeline with the given processor, sends the
diff --git a/src/pipecat/transcriptions/language.py b/src/pipecat/transcriptions/language.py
index 1980590e3..1606bfeac 100644
--- a/src/pipecat/transcriptions/language.py
+++ b/src/pipecat/transcriptions/language.py
@@ -11,24 +11,10 @@ and BCP 47 standards, supporting both language-only and language-region
combinations for various speech and text processing services.
"""
-import sys
-from enum import Enum
+from enum import StrEnum
from loguru import logger
-if sys.version_info < (3, 11):
-
- class StrEnum(str, Enum):
- """String enumeration base class for Python < 3.11 compatibility."""
-
- def __new__(cls, value):
- """Create a new instance of the StrEnum."""
- obj = str.__new__(cls, value)
- obj._value_ = value
- return obj
-else:
- from enum import StrEnum
-
class Language(StrEnum):
"""Language codes for speech and text processing services.
diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py
index 4fb4a1736..02b090181 100644
--- a/src/pipecat/transports/base_input.py
+++ b/src/pipecat/transports/base_input.py
@@ -260,6 +260,6 @@ class BaseInputTransport(FrameProcessor):
await self.push_frame(frame)
self._audio_in_queue.task_done()
- except asyncio.TimeoutError:
+ except TimeoutError:
if not audio_received:
continue
diff --git a/src/pipecat/transports/base_output.py b/src/pipecat/transports/base_output.py
index fe1044f51..4e946bbb1 100644
--- a/src/pipecat/transports/base_output.py
+++ b/src/pipecat/transports/base_output.py
@@ -13,8 +13,9 @@ output processing, including frame buffering, mixing, timing, and media streamin
import asyncio
import itertools
import time
+from collections.abc import AsyncGenerator, Mapping
from concurrent.futures import ThreadPoolExecutor
-from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional
+from typing import Any
from loguru import logger
from PIL import Image
@@ -87,7 +88,7 @@ class BaseOutputTransport(FrameProcessor):
# We will have one media sender per output frame destination. This allow
# us to send multiple streams at the same time if the transport allows
# it.
- self._media_senders: Dict[Any, "BaseOutputTransport.MediaSender"] = {}
+ self._media_senders: dict[Any, BaseOutputTransport.MediaSender] = {}
@property
def sample_rate(self) -> int:
@@ -383,7 +384,7 @@ class BaseOutputTransport(FrameProcessor):
self,
transport: "BaseOutputTransport",
*,
- destination: Optional[str],
+ destination: str | None,
sample_rate: int,
audio_chunk_size: int,
params: TransportParams,
@@ -414,7 +415,7 @@ class BaseOutputTransport(FrameProcessor):
# The user can provide a single mixer, to be used by the default
# destination, or a destination/mixer mapping.
- self._mixer: Optional[BaseAudioMixer] = None
+ self._mixer: BaseAudioMixer | None = None
# These are the images that we should send at our desired framerate.
self._video_images = None
@@ -431,9 +432,9 @@ class BaseOutputTransport(FrameProcessor):
# Last time the bot actually spoke.
self._bot_speech_last_time = 0
- self._audio_task: Optional[asyncio.Task] = None
- self._video_task: Optional[asyncio.Task] = None
- self._clock_task: Optional[asyncio.Task] = None
+ self._audio_task: asyncio.Task | None = None
+ self._video_task: asyncio.Task | None = None
+ self._clock_task: asyncio.Task | None = None
@property
def sample_rate(self) -> int:
@@ -753,7 +754,7 @@ class BaseOutputTransport(FrameProcessor):
)
yield frame
self._audio_queue.task_done()
- except asyncio.TimeoutError:
+ except TimeoutError:
# Fallback: notify the bot stopped speaking upstream if necessary based on timeout.
await self._bot_stopped_speaking()
@@ -856,7 +857,7 @@ class BaseOutputTransport(FrameProcessor):
"""
self._video_images = itertools.cycle([image])
- async def _set_video_images(self, images: List[OutputImageRawFrame]):
+ async def _set_video_images(self, images: list[OutputImageRawFrame]):
"""Set multiple video images for cycling output.
Args:
diff --git a/src/pipecat/transports/base_transport.py b/src/pipecat/transports/base_transport.py
index ed14f6eb9..829fc6fe1 100644
--- a/src/pipecat/transports/base_transport.py
+++ b/src/pipecat/transports/base_transport.py
@@ -12,7 +12,7 @@ functionality.
"""
from abc import abstractmethod
-from typing import List, Mapping, Optional
+from collections.abc import Mapping
from pydantic import BaseModel, ConfigDict, Field
@@ -57,18 +57,18 @@ class TransportParams(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)
audio_out_enabled: bool = False
- audio_out_sample_rate: Optional[int] = None
+ audio_out_sample_rate: int | None = None
audio_out_channels: int = 1
audio_out_bitrate: int = 96000
audio_out_10ms_chunks: int = 4
- audio_out_mixer: Optional[BaseAudioMixer | Mapping[Optional[str], BaseAudioMixer]] = None
- audio_out_destinations: List[str] = Field(default_factory=list)
+ audio_out_mixer: BaseAudioMixer | Mapping[str | None, BaseAudioMixer] | None = None
+ audio_out_destinations: list[str] = Field(default_factory=list)
audio_out_end_silence_secs: int = 2
audio_out_auto_silence: bool = True
audio_in_enabled: bool = False
- audio_in_sample_rate: Optional[int] = None
+ audio_in_sample_rate: int | None = None
audio_in_channels: int = 1
- audio_in_filter: Optional[BaseAudioFilter] = None
+ audio_in_filter: BaseAudioFilter | None = None
audio_in_stream_on_start: bool = True
audio_in_passthrough: bool = True
video_in_enabled: bool = False
@@ -79,8 +79,8 @@ class TransportParams(BaseModel):
video_out_bitrate: int = 800000
video_out_framerate: int = 30
video_out_color_format: str = "RGB"
- video_out_codec: Optional[str] = None
- video_out_destinations: List[str] = Field(default_factory=list)
+ video_out_codec: str | None = None
+ video_out_destinations: list[str] = Field(default_factory=list)
class BaseTransport(BaseObject):
@@ -93,9 +93,9 @@ class BaseTransport(BaseObject):
def __init__(
self,
*,
- name: Optional[str] = None,
- input_name: Optional[str] = None,
- output_name: Optional[str] = None,
+ name: str | None = None,
+ input_name: str | None = None,
+ output_name: str | None = None,
):
"""Initialize the base transport.
diff --git a/src/pipecat/transports/daily/transport.py b/src/pipecat/transports/daily/transport.py
index b58701a0b..7084f1d15 100644
--- a/src/pipecat/transports/daily/transport.py
+++ b/src/pipecat/transports/daily/transport.py
@@ -13,10 +13,11 @@ real-time communication features.
import asyncio
import time
+from collections.abc import Awaitable, Callable, Mapping
from concurrent.futures import CancelledError as FuturesCancelledError
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass, field
-from typing import Any, Awaitable, Callable, Dict, Mapping, Optional, Tuple
+from typing import Any
import aiohttp
from loguru import logger
@@ -87,7 +88,7 @@ class DailyOutputTransportMessageFrame(OutputTransportMessageFrame):
participant_id: Optional ID of the participant this message is for/from.
"""
- participant_id: Optional[str] = None
+ participant_id: str | None = None
@dataclass
@@ -98,7 +99,7 @@ class DailyOutputTransportMessageUrgentFrame(OutputTransportMessageUrgentFrame):
participant_id: Optional ID of the participant this message is for/from.
"""
- participant_id: Optional[str] = None
+ participant_id: str | None = None
@dataclass
@@ -109,7 +110,7 @@ class DailyInputTransportMessageFrame(InputTransportMessageFrame):
participant_id: Optional ID of the participant this message is for/from.
"""
- participant_id: Optional[str] = None
+ participant_id: str | None = None
@dataclass
@@ -171,9 +172,9 @@ class DailyOutputDTMFFrame(OutputDTMFFrame):
or ``auto``). When ``None``, Daily's default method is used.
"""
- session_id: Optional[str] = None
- digit_duration_ms: Optional[int] = None
- method: Optional[str] = None
+ session_id: str | None = None
+ digit_duration_ms: int | None = None
+ method: str | None = None
@dataclass
@@ -194,9 +195,9 @@ class DailyOutputDTMFUrgentFrame(OutputDTMFUrgentFrame):
or ``auto``). When ``None``, Daily's default method is used.
"""
- session_id: Optional[str] = None
- digit_duration_ms: Optional[int] = None
- method: Optional[str] = None
+ session_id: str | None = None
+ digit_duration_ms: int | None = None
+ method: str | None = None
class WebRTCVADAnalyzer(VADAnalyzer):
@@ -205,7 +206,7 @@ class WebRTCVADAnalyzer(VADAnalyzer):
Implements voice activity detection using Daily's native WebRTC VAD.
"""
- def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None):
+ def __init__(self, *, sample_rate: int | None = None, params: VADParams | None = None):
"""Initialize the WebRTC VAD analyzer.
Args:
@@ -295,7 +296,7 @@ class DailyCustomVideoTrackParams(BaseModel):
width: int = 1024
height: int = 768
color_format: str = "RGB"
- send_settings: Optional[Dict[str, Any]] = None
+ send_settings: dict[str, Any] | None = None
class DailyCustomAudioTrackParams(BaseModel):
@@ -311,9 +312,9 @@ class DailyCustomAudioTrackParams(BaseModel):
See https://reference-python.daily.co/types.html#audiopublishingsettings
"""
- sample_rate: Optional[int] = None
+ sample_rate: int | None = None
channels: int = 1
- send_settings: Optional[Dict[str, Any]] = None
+ send_settings: dict[str, Any] | None = None
class DailyParams(TransportParams):
@@ -336,9 +337,9 @@ class DailyParams(TransportParams):
api_key: str = ""
audio_in_user_tracks: bool = True
camera_out_enabled: bool = True
- custom_audio_track_params: Optional[Mapping[str, DailyCustomAudioTrackParams]] = None
- custom_video_track_params: Optional[Mapping[str, DailyCustomVideoTrackParams]] = None
- dialin_settings: Optional[DailyDialinSettings] = None
+ custom_audio_track_params: Mapping[str, DailyCustomAudioTrackParams] | None = None
+ custom_video_track_params: Mapping[str, DailyCustomVideoTrackParams] | None = None
+ dialin_settings: DailyDialinSettings | None = None
microphone_out_enabled: bool = True
transcription_enabled: bool = False
transcription_settings: DailyTranscriptionSettings = DailyTranscriptionSettings()
@@ -482,7 +483,7 @@ class DailyTransportClient(EventHandler):
def __init__(
self,
room_url: str,
- token: Optional[str],
+ token: str | None,
bot_name: str,
params: DailyParams,
callbacks: DailyCallbacks,
@@ -505,7 +506,7 @@ class DailyTransportClient(EventHandler):
Daily.init()
self._room_url: str = room_url
- self._token: Optional[str] = token
+ self._token: str | None = token
self._bot_name: str = bot_name
self._params: DailyParams = params
self._callbacks = callbacks
@@ -524,7 +525,7 @@ class DailyTransportClient(EventHandler):
self._joined_event = asyncio.Event()
self._leave_counter = 0
- self._task_manager: Optional[BaseTaskManager] = None
+ self._task_manager: BaseTaskManager | None = None
# We use the executor to cleanup the client. We just do it from one
# place, so only one thread is really needed.
@@ -550,11 +551,11 @@ class DailyTransportClient(EventHandler):
self._in_sample_rate = 0
self._out_sample_rate = 0
- self._speaker: Optional[VirtualSpeakerDevice] = None
- self._camera_track: Optional[DailyVideoTrack] = None
- self._microphone_track: Optional[DailyAudioTrack] = None
- self._custom_audio_tracks: Dict[str, DailyAudioTrack] = {}
- self._custom_video_tracks: Dict[str, DailyVideoTrack] = {}
+ self._speaker: VirtualSpeakerDevice | None = None
+ self._camera_track: DailyVideoTrack | None = None
+ self._microphone_track: DailyAudioTrack | None = None
+ self._custom_audio_tracks: dict[str, DailyAudioTrack] = {}
+ self._custom_video_tracks: dict[str, DailyVideoTrack] = {}
def _speaker_name(self):
"""Generate a unique virtual speaker name for this client instance."""
@@ -598,7 +599,7 @@ class DailyTransportClient(EventHandler):
async def send_message(
self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
- ) -> Optional[CallClientError]:
+ ) -> CallClientError | None:
"""Send an application message to participants.
Args:
@@ -623,7 +624,7 @@ class DailyTransportClient(EventHandler):
)
return await future
- async def read_next_audio_frame(self) -> Optional[InputAudioRawFrame]:
+ async def read_next_audio_frame(self) -> InputAudioRawFrame | None:
"""Reads the next 20ms audio frame from the virtual speaker."""
if not self._speaker:
return None
@@ -647,9 +648,7 @@ class DailyTransportClient(EventHandler):
await asyncio.sleep(0.01)
return None
- async def register_audio_destination(
- self, destination: str, auto_silence: Optional[bool] = True
- ):
+ async def register_audio_destination(self, destination: str, auto_silence: bool | None = True):
"""Register a custom audio destination for multi-track output.
Args:
@@ -661,7 +660,7 @@ class DailyTransportClient(EventHandler):
self._custom_audio_tracks[destination] = await self.add_custom_audio_track(
destination, params=params, auto_silence=auto_silence
)
- publishing: Dict[str, Any] = {"customAudio": {destination: True}}
+ publishing: dict[str, Any] = {"customAudio": {destination: True}}
if params and params.send_settings:
publishing["customAudio"][destination] = {"sendSettings": params.send_settings}
self._client.update_publishing(publishing)
@@ -676,7 +675,7 @@ class DailyTransportClient(EventHandler):
self._custom_video_tracks[destination] = await self.add_custom_video_track(
destination, params=params
)
- publishing: Dict[str, Any] = {"customVideo": {destination: True}}
+ publishing: dict[str, Any] = {"customVideo": {destination: True}}
if params and params.send_settings:
publishing["customVideo"][destination] = {"sendSettings": params.send_settings}
self._client.update_publishing(publishing)
@@ -693,7 +692,7 @@ class DailyTransportClient(EventHandler):
future = self._get_event_loop().create_future()
destination = frame.transport_destination
- audio_source: Optional[CustomAudioSource] = None
+ audio_source: CustomAudioSource | None = None
if not destination and self._microphone_track:
audio_source = self._microphone_track.source
elif destination and destination in self._custom_audio_tracks:
@@ -719,7 +718,7 @@ class DailyTransportClient(EventHandler):
True if the video frame was written successfully, False otherwise.
"""
destination = frame.transport_destination
- video_source: Optional[CustomVideoSource] = None
+ video_source: CustomVideoSource | None = None
if not destination and self._camera_track:
video_source = self._camera_track.source
elif destination and destination in self._custom_video_tracks:
@@ -992,7 +991,7 @@ class DailyTransportClient(EventHandler):
"""
return self._client.participant_counts()
- async def start_dialout(self, settings) -> Tuple[str, Optional[CallClientError]]:
+ async def start_dialout(self, settings) -> tuple[str, CallClientError | None]:
"""Start a dial-out call to a phone number.
Args:
@@ -1006,7 +1005,7 @@ class DailyTransportClient(EventHandler):
self._client.start_dialout(settings, completion=completion_callback(future))
return await future
- async def stop_dialout(self, participant_id) -> Optional[CallClientError]:
+ async def stop_dialout(self, participant_id) -> CallClientError | None:
"""Stop a dial-out call for a specific participant.
Args:
@@ -1019,7 +1018,7 @@ class DailyTransportClient(EventHandler):
self._client.stop_dialout(participant_id, completion=completion_callback(future))
return await future
- async def send_dtmf(self, settings) -> Optional[CallClientError]:
+ async def send_dtmf(self, settings) -> CallClientError | None:
"""Send DTMF tones during a call.
Args:
@@ -1039,7 +1038,7 @@ class DailyTransportClient(EventHandler):
self._client.send_dtmf(settings, completion=completion_callback(future))
return await future
- async def sip_call_transfer(self, settings) -> Optional[CallClientError]:
+ async def sip_call_transfer(self, settings) -> CallClientError | None:
"""Transfer a SIP call to another destination.
Args:
@@ -1061,7 +1060,7 @@ class DailyTransportClient(EventHandler):
self._client.sip_call_transfer(settings, completion=completion_callback(future))
return await future
- async def sip_refer(self, settings) -> Optional[CallClientError]:
+ async def sip_refer(self, settings) -> CallClientError | None:
"""Send a SIP REFER request.
Args:
@@ -1076,7 +1075,7 @@ class DailyTransportClient(EventHandler):
async def start_recording(
self, streaming_settings, stream_id, force_new
- ) -> Tuple[str, Optional[CallClientError]]:
+ ) -> tuple[str, CallClientError | None]:
"""Start recording the call.
Args:
@@ -1094,7 +1093,7 @@ class DailyTransportClient(EventHandler):
)
return await future
- async def stop_recording(self, stream_id) -> Optional[CallClientError]:
+ async def stop_recording(self, stream_id) -> CallClientError | None:
"""Stop recording the call.
Args:
@@ -1107,7 +1106,7 @@ class DailyTransportClient(EventHandler):
self._client.stop_recording(stream_id, completion=completion_callback(future))
return await future
- async def start_transcription(self, settings) -> Optional[CallClientError]:
+ async def start_transcription(self, settings) -> CallClientError | None:
"""Start transcription for the call.
Args:
@@ -1123,7 +1122,7 @@ class DailyTransportClient(EventHandler):
self._client.start_transcription(settings=settings, completion=completion_callback(future))
return await future
- async def stop_transcription(self) -> Optional[CallClientError]:
+ async def stop_transcription(self) -> CallClientError | None:
"""Stop transcription for the call.
Returns:
@@ -1137,8 +1136,8 @@ class DailyTransportClient(EventHandler):
return await future
async def send_prebuilt_chat_message(
- self, message: str, user_name: Optional[str] = None
- ) -> Optional[CallClientError]:
+ self, message: str, user_name: str | None = None
+ ) -> CallClientError | None:
"""Send a chat message to Daily's Prebuilt main room.
Args:
@@ -1250,8 +1249,8 @@ class DailyTransportClient(EventHandler):
async def add_custom_audio_track(
self,
track_name: str,
- params: Optional[DailyCustomAudioTrackParams] = None,
- auto_silence: Optional[bool] = True,
+ params: DailyCustomAudioTrackParams | None = None,
+ auto_silence: bool | None = True,
) -> DailyAudioTrack:
"""Add a custom audio track for multi-stream output.
@@ -1286,7 +1285,7 @@ class DailyTransportClient(EventHandler):
return track
- async def remove_custom_audio_track(self, track_name: str) -> Optional[CallClientError]:
+ async def remove_custom_audio_track(self, track_name: str) -> CallClientError | None:
"""Remove a custom audio track.
Args:
@@ -1305,7 +1304,7 @@ class DailyTransportClient(EventHandler):
async def add_custom_video_track(
self,
track_name: str,
- params: Optional[DailyCustomVideoTrackParams] = None,
+ params: DailyCustomVideoTrackParams | None = None,
) -> DailyVideoTrack:
"""Add a custom video track for multi-stream output.
@@ -1336,7 +1335,7 @@ class DailyTransportClient(EventHandler):
return DailyVideoTrack(source=video_source, track=video_track)
- async def remove_custom_video_track(self, track_name: str) -> Optional[CallClientError]:
+ async def remove_custom_video_track(self, track_name: str) -> CallClientError | None:
"""Remove a custom video track.
Args:
@@ -1354,7 +1353,7 @@ class DailyTransportClient(EventHandler):
async def update_transcription(
self, participants=None, instance_id=None
- ) -> Optional[CallClientError]:
+ ) -> CallClientError | None:
"""Update transcription settings for specific participants.
Args:
@@ -1372,7 +1371,7 @@ class DailyTransportClient(EventHandler):
async def update_subscriptions(
self, participant_settings=None, profile_settings=None
- ) -> Optional[CallClientError]:
+ ) -> CallClientError | None:
"""Update media subscription settings.
Args:
@@ -1392,7 +1391,7 @@ class DailyTransportClient(EventHandler):
async def update_publishing(
self, publishing_settings: Mapping[str, Any]
- ) -> Optional[CallClientError]:
+ ) -> CallClientError | None:
"""Update media publishing settings.
Args:
@@ -1410,7 +1409,7 @@ class DailyTransportClient(EventHandler):
async def update_remote_participants(
self, remote_participants: Mapping[str, Any]
- ) -> Optional[CallClientError]:
+ ) -> CallClientError | None:
"""Update settings for remote participants.
Args:
@@ -1757,7 +1756,7 @@ class DailyInputTransport(BaseInputTransport):
self._capture_participant_audio = []
# Audio task when using a virtual speaker (i.e. no user tracks).
- self._audio_in_task: Optional[asyncio.Task] = None
+ self._audio_in_task: asyncio.Task | None = None
async def start_audio_in_streaming(self):
"""Start receiving audio from participants."""
@@ -2192,7 +2191,7 @@ class DailyOutputTransport(BaseOutputTransport):
if not frame.buttons:
return
- settings: Dict[str, Any] = {"tones": frame.to_string()}
+ settings: dict[str, Any] = {"tones": frame.to_string()}
if isinstance(frame, (DailyOutputDTMFFrame, DailyOutputDTMFUrgentFrame)):
if frame.session_id is not None:
settings["sessionId"] = frame.session_id
@@ -2272,11 +2271,11 @@ class DailyTransport(BaseTransport):
def __init__(
self,
room_url: str,
- token: Optional[str],
+ token: str | None,
bot_name: str,
- params: Optional[DailyParams] = None,
- input_name: Optional[str] = None,
- output_name: Optional[str] = None,
+ params: DailyParams | None = None,
+ input_name: str | None = None,
+ output_name: str | None = None,
):
"""Initialize the Daily transport.
@@ -2326,8 +2325,8 @@ class DailyTransport(BaseTransport):
self._client = DailyTransportClient(
room_url, token, bot_name, self._params, callbacks, self.name
)
- self._input: Optional[DailyInputTransport] = None
- self._output: Optional[DailyOutputTransport] = None
+ self._input: DailyInputTransport | None = None
+ self._output: DailyOutputTransport | None = None
self._other_participant_has_joined = False
@@ -2459,7 +2458,7 @@ class DailyTransport(BaseTransport):
"""
return self._client.participant_counts()
- async def send_dtmf(self, settings) -> Optional[CallClientError]:
+ async def send_dtmf(self, settings) -> CallClientError | None:
"""Send DTMF tones during a call.
Args:
@@ -2475,7 +2474,7 @@ class DailyTransport(BaseTransport):
logger.error(f"Unable to send DTMF: {error}")
return error
- async def start_dialout(self, settings=None) -> Tuple[str, Optional[CallClientError]]:
+ async def start_dialout(self, settings=None) -> tuple[str, CallClientError | None]:
"""Start a dial-out call to a phone number.
Args:
@@ -2492,7 +2491,7 @@ class DailyTransport(BaseTransport):
logger.error(f"Unable to start dialout: {error}")
return session_id, error
- async def stop_dialout(self, participant_id) -> Optional[CallClientError]:
+ async def stop_dialout(self, participant_id) -> CallClientError | None:
"""Stop a dial-out call for a specific participant.
Args:
@@ -2508,7 +2507,7 @@ class DailyTransport(BaseTransport):
logger.error(f"Unable to stop dialout: {error}")
return error
- async def sip_call_transfer(self, settings) -> Optional[CallClientError]:
+ async def sip_call_transfer(self, settings) -> CallClientError | None:
"""Transfer a SIP call to another destination.
Args:
@@ -2524,7 +2523,7 @@ class DailyTransport(BaseTransport):
logger.error(f"Unable to transfer SIP call: {error}")
return error
- async def sip_refer(self, settings) -> Optional[CallClientError]:
+ async def sip_refer(self, settings) -> CallClientError | None:
"""Send a SIP REFER request.
Args:
@@ -2542,7 +2541,7 @@ class DailyTransport(BaseTransport):
async def start_recording(
self, streaming_settings=None, stream_id=None, force_new=None
- ) -> Tuple[str, Optional[CallClientError]]:
+ ) -> tuple[str, CallClientError | None]:
"""Start recording the call.
Args:
@@ -2563,7 +2562,7 @@ class DailyTransport(BaseTransport):
logger.error(f"Unable to start recording: {error}")
return r_id, error
- async def stop_recording(self, stream_id=None) -> Optional[CallClientError]:
+ async def stop_recording(self, stream_id=None) -> CallClientError | None:
"""Stop recording the call.
Args:
@@ -2579,7 +2578,7 @@ class DailyTransport(BaseTransport):
logger.error(f"Unable to stop recording: {error}")
return error
- async def start_transcription(self, settings=None) -> Optional[CallClientError]:
+ async def start_transcription(self, settings=None) -> CallClientError | None:
"""Start transcription for the call.
Args:
@@ -2595,7 +2594,7 @@ class DailyTransport(BaseTransport):
logger.error(f"Unable to start transcription: {error}")
return error
- async def stop_transcription(self) -> Optional[CallClientError]:
+ async def stop_transcription(self) -> CallClientError | None:
"""Stop transcription for the call.
Returns:
@@ -2609,8 +2608,8 @@ class DailyTransport(BaseTransport):
return error
async def send_prebuilt_chat_message(
- self, message: str, user_name: Optional[str] = None
- ) -> Optional[CallClientError]:
+ self, message: str, user_name: str | None = None
+ ) -> CallClientError | None:
"""Send a chat message to Daily's Prebuilt main room.
Args:
@@ -2671,7 +2670,7 @@ class DailyTransport(BaseTransport):
async def update_publishing(
self, publishing_settings: Mapping[str, Any]
- ) -> Optional[CallClientError]:
+ ) -> CallClientError | None:
"""Update media publishing settings.
Args:
@@ -2689,7 +2688,7 @@ class DailyTransport(BaseTransport):
async def update_subscriptions(
self, participant_settings=None, profile_settings=None
- ) -> Optional[CallClientError]:
+ ) -> CallClientError | None:
"""Update media subscription settings.
Args:
@@ -2712,7 +2711,7 @@ class DailyTransport(BaseTransport):
async def update_remote_participants(
self, remote_participants: Mapping[str, Any]
- ) -> Optional[CallClientError]:
+ ) -> CallClientError | None:
"""Update settings for remote participants.
Args:
@@ -2822,7 +2821,7 @@ class DailyTransport(BaseTransport):
return
logger.debug("Event dialin-ready was handled successfully")
- except asyncio.TimeoutError:
+ except TimeoutError:
logger.error(f"Timeout handling dialin-ready event ({url})")
except Exception as e:
logger.error(f"Error handling dialin-ready event ({url}): {e}")
diff --git a/src/pipecat/transports/daily/utils.py b/src/pipecat/transports/daily/utils.py
index 8c7526357..8926c406e 100644
--- a/src/pipecat/transports/daily/utils.py
+++ b/src/pipecat/transports/daily/utils.py
@@ -10,7 +10,7 @@ Methods that wrap the Daily API to create rooms, check room URLs, and get meetin
"""
import time
-from typing import Any, Dict, List, Literal, Optional
+from typing import Any, Literal
from urllib.parse import urlparse
import aiohttp
@@ -35,8 +35,8 @@ class DailyRoomSipParams(BaseModel):
video: bool = False
sip_mode: str = "dial-in"
num_endpoints: int = 1
- codecs: Optional[Dict[str, List[str]]] = None
- provider: Optional[str] = None
+ codecs: dict[str, list[str]] | None = None
+ provider: str | None = None
class RecordingsBucketConfig(BaseModel):
@@ -102,20 +102,20 @@ class DailyRoomProperties(BaseModel):
model_config = ConfigDict(extra="allow")
- exp: Optional[float] = None
+ exp: float | None = None
enable_chat: bool = False
enable_prejoin_ui: bool = False
enable_emoji_reactions: bool = False
eject_at_room_exp: bool = False
- enable_dialout: Optional[bool] = None
- enable_recording: Optional[Literal["cloud", "cloud-audio-only", "local", "raw-tracks"]] = None
- enable_transcription_storage: Optional[bool] = None
- geo: Optional[str] = None
- max_participants: Optional[int] = None
- recordings_bucket: Optional[RecordingsBucketConfig] = None
- transcription_bucket: Optional[TranscriptionBucketConfig] = None
- sip: Optional[DailyRoomSipParams] = None
- sip_uri: Optional[Dict[str, Any]] = None
+ enable_dialout: bool | None = None
+ enable_recording: Literal["cloud", "cloud-audio-only", "local", "raw-tracks"] | None = None
+ enable_transcription_storage: bool | None = None
+ geo: str | None = None
+ max_participants: int | None = None
+ recordings_bucket: RecordingsBucketConfig | None = None
+ transcription_bucket: TranscriptionBucketConfig | None = None
+ sip: DailyRoomSipParams | None = None
+ sip_uri: dict[str, Any] | None = None
start_video_off: bool = False
@property
@@ -140,7 +140,7 @@ class DailyRoomParams(BaseModel):
properties: Room configuration properties.
"""
- name: Optional[str] = None
+ name: str | None = None
privacy: Literal["private", "public"] = "public"
properties: DailyRoomProperties = Field(default_factory=DailyRoomProperties)
@@ -191,21 +191,21 @@ class DailyMeetingTokenProperties(BaseModel):
permissions: Specifies the initial default permissions for a non-meeting-owner participant.
"""
- room_name: Optional[str] = None
- eject_at_token_exp: Optional[bool] = None
- eject_after_elapsed: Optional[int] = None
- nbf: Optional[int] = None
- exp: Optional[int] = None
- is_owner: Optional[bool] = None
- user_name: Optional[str] = None
- user_id: Optional[str] = None
- enable_screenshare: Optional[bool] = None
- start_video_off: Optional[bool] = None
- start_audio_off: Optional[bool] = None
- enable_recording: Optional[Literal["cloud", "cloud-audio-only", "local", "raw-tracks"]] = None
- enable_prejoin_ui: Optional[bool] = None
- start_cloud_recording: Optional[bool] = None
- permissions: Optional[Dict[str, Any]] = None
+ room_name: str | None = None
+ eject_at_token_exp: bool | None = None
+ eject_after_elapsed: int | None = None
+ nbf: int | None = None
+ exp: int | None = None
+ is_owner: bool | None = None
+ user_name: str | None = None
+ user_id: str | None = None
+ enable_screenshare: bool | None = None
+ start_video_off: bool | None = None
+ start_audio_off: bool | None = None
+ enable_recording: Literal["cloud", "cloud-audio-only", "local", "raw-tracks"] | None = None
+ enable_prejoin_ui: bool | None = None
+ start_cloud_recording: bool | None = None
+ permissions: dict[str, Any] | None = None
class DailyMeetingTokenParams(BaseModel):
@@ -304,7 +304,7 @@ class DailyRESTHelper:
expiry_time: float = 60 * 60,
eject_at_token_exp: bool = False,
owner: bool = True,
- params: Optional[DailyMeetingTokenParams] = None,
+ params: DailyMeetingTokenParams | None = None,
) -> str:
"""Generate a meeting token for user to join a Daily room.
diff --git a/src/pipecat/transports/heygen/transport.py b/src/pipecat/transports/heygen/transport.py
index d79d0080e..ac13c2fe9 100644
--- a/src/pipecat/transports/heygen/transport.py
+++ b/src/pipecat/transports/heygen/transport.py
@@ -16,7 +16,7 @@ The module consists of three main components:
- HeyGenTransport: Main transport implementation that coordinates input/output transports
"""
-from typing import Any, Optional, Union
+from typing import Any
import aiohttp
from loguru import logger
@@ -309,10 +309,10 @@ class HeyGenTransport(BaseTransport):
session: aiohttp.ClientSession,
api_key: str,
params: HeyGenParams = HeyGenParams(),
- input_name: Optional[str] = None,
- output_name: Optional[str] = None,
- session_request: Optional[Union[LiveAvatarNewSessionRequest, NewSessionRequest]] = None,
- service_type: Optional[ServiceType] = None,
+ input_name: str | None = None,
+ output_name: str | None = None,
+ session_request: LiveAvatarNewSessionRequest | NewSessionRequest | None = None,
+ service_type: ServiceType | None = None,
):
"""Initialize the HeyGen transport.
@@ -346,8 +346,8 @@ class HeyGenTransport(BaseTransport):
on_participant_disconnected=self._on_participant_disconnected,
),
)
- self._input: Optional[HeyGenInputTransport] = None
- self._output: Optional[HeyGenOutputTransport] = None
+ self._input: HeyGenInputTransport | None = None
+ self._output: HeyGenOutputTransport | None = None
self._HeyGen_participant_id = None
# Register supported handlers. The user will only be able to register
diff --git a/src/pipecat/transports/lemonslice/api.py b/src/pipecat/transports/lemonslice/api.py
index 8ced216f0..f8114c5f4 100644
--- a/src/pipecat/transports/lemonslice/api.py
+++ b/src/pipecat/transports/lemonslice/api.py
@@ -10,7 +10,7 @@ This module provides helper classes for interacting with the LemonSlice API,
including session creation and termination.
"""
-from typing import Any, Optional
+from typing import Any
import aiohttp
from loguru import logger
@@ -38,15 +38,15 @@ class LemonSliceApi:
async def create_session(
self,
*,
- agent_image_url: Optional[str] = None,
- agent_id: Optional[str] = None,
- agent_prompt: Optional[str] = None,
- idle_timeout: Optional[int] = None,
- daily_room_url: Optional[str] = None,
- daily_token: Optional[str] = None,
- connection_properties: Optional[dict[str, Any]] = None,
- extra_properties: Optional[dict[str, Any]] = None,
- api_url: Optional[str] = None,
+ agent_image_url: str | None = None,
+ agent_id: str | None = None,
+ agent_prompt: str | None = None,
+ idle_timeout: int | None = None,
+ daily_room_url: str | None = None,
+ daily_token: str | None = None,
+ connection_properties: dict[str, Any] | None = None,
+ extra_properties: dict[str, Any] | None = None,
+ api_url: str | None = None,
) -> dict:
"""Create a new session with the specified agent_id or agent_image_url.
diff --git a/src/pipecat/transports/lemonslice/transport.py b/src/pipecat/transports/lemonslice/transport.py
index 18e53a4c6..90966ade2 100644
--- a/src/pipecat/transports/lemonslice/transport.py
+++ b/src/pipecat/transports/lemonslice/transport.py
@@ -10,8 +10,9 @@ This module adds LemonSlice avatars to Daily rooms, enabling
real-time voice conversations with synchronized avatars.
"""
+from collections.abc import Awaitable, Callable, Mapping
from functools import partial
-from typing import Any, Awaitable, Callable, Mapping, Optional
+from typing import Any
import aiohttp
from daily.daily import AudioData
@@ -60,14 +61,14 @@ class LemonSliceNewSessionRequest(BaseModel):
model_config = ConfigDict(extra="allow")
- agent_image_url: Optional[str] = None
- agent_id: Optional[str] = None
- agent_prompt: Optional[str] = None
- idle_timeout: Optional[int] = None
- daily_room_url: Optional[str] = None
- daily_token: Optional[str] = None
- lemonslice_properties: Optional[dict] = None
- api_url: Optional[str] = None
+ agent_image_url: str | None = None
+ agent_id: str | None = None
+ agent_prompt: str | None = None
+ idle_timeout: int | None = None
+ daily_room_url: str | None = None
+ daily_token: str | None = None
+ lemonslice_properties: dict | None = None
+ api_url: str | None = None
class LemonSliceCallbacks(BaseModel):
@@ -114,7 +115,7 @@ class LemonSliceTransportClient:
params: LemonSliceParams = LemonSliceParams(),
callbacks: LemonSliceCallbacks,
api_key: str,
- session_request: Optional[LemonSliceNewSessionRequest] = None,
+ session_request: LemonSliceNewSessionRequest | None = None,
session: aiohttp.ClientSession,
) -> None:
"""Initialize the LemonSlice transport client.
@@ -131,9 +132,9 @@ class LemonSliceTransportClient:
self._bot_name = bot_name
self._api = LemonSliceApi(api_key, session)
self._session_request = session_request or LemonSliceNewSessionRequest()
- self._session_id: Optional[str] = None
- self._control_url: Optional[str] = None
- self._daily_transport_client: Optional[DailyTransportClient] = None
+ self._session_id: str | None = None
+ self._control_url: str | None = None
+ self._daily_transport_client: DailyTransportClient | None = None
self._callbacks = callbacks
self._params = params
@@ -540,7 +541,7 @@ class LemonSliceOutputTransport(BaseOutputTransport):
# Whether we have seen a StartFrame already.
self._initialized = False
# This is the custom track destination expected by LemonSlice
- self._transport_destination: Optional[str] = "stream"
+ self._transport_destination: str | None = "stream"
async def setup(self, setup: FrameProcessorSetup):
"""Setup the output transport.
@@ -692,10 +693,10 @@ class LemonSliceTransport(BaseTransport):
bot_name: str,
session: aiohttp.ClientSession,
api_key: str,
- session_request: Optional[LemonSliceNewSessionRequest] = None,
+ session_request: LemonSliceNewSessionRequest | None = None,
params: LemonSliceParams = LemonSliceParams(),
- input_name: Optional[str] = None,
- output_name: Optional[str] = None,
+ input_name: str | None = None,
+ output_name: str | None = None,
):
"""Initialize the LemonSlice transport.
@@ -724,8 +725,8 @@ class LemonSliceTransport(BaseTransport):
session=session,
params=params,
)
- self._input: Optional[LemonSliceInputTransport] = None
- self._output: Optional[LemonSliceOutputTransport] = None
+ self._input: LemonSliceInputTransport | None = None
+ self._output: LemonSliceOutputTransport | None = None
self._lemonslice_participant_id = None
# Register supported handlers. The user will only be able to register
diff --git a/src/pipecat/transports/livekit/transport.py b/src/pipecat/transports/livekit/transport.py
index 04e99f4c0..550c1947e 100644
--- a/src/pipecat/transports/livekit/transport.py
+++ b/src/pipecat/transports/livekit/transport.py
@@ -13,8 +13,9 @@ event handling for conversational AI applications.
import asyncio
import json
+from collections.abc import Awaitable, Callable
from dataclasses import dataclass
-from typing import Any, Awaitable, Callable, List, Optional
+from typing import Any
from loguru import logger
from pydantic import BaseModel
@@ -78,7 +79,7 @@ class LiveKitOutputTransportMessageFrame(OutputTransportMessageFrame):
participant_id: Optional ID of the participant this message is for/from.
"""
- participant_id: Optional[str] = None
+ participant_id: str | None = None
@dataclass
@@ -89,7 +90,7 @@ class LiveKitOutputTransportMessageUrgentFrame(OutputTransportMessageUrgentFrame
participant_id: Optional ID of the participant this message is for/from.
"""
- participant_id: Optional[str] = None
+ participant_id: str | None = None
class LiveKitParams(TransportParams):
@@ -160,18 +161,18 @@ class LiveKitTransportClient:
self._params = params
self._callbacks = callbacks
self._transport_name = transport_name
- self._room: Optional[rtc.Room] = None
+ self._room: rtc.Room | None = None
self._participant_id: str = ""
self._connected = False
self._disconnect_counter = 0
- self._audio_source: Optional[rtc.AudioSource] = None
- self._audio_track: Optional[rtc.LocalAudioTrack] = None
+ self._audio_source: rtc.AudioSource | None = None
+ self._audio_track: rtc.LocalAudioTrack | None = None
self._audio_tracks = {}
self._audio_queue = asyncio.Queue()
self._video_tracks = {}
self._video_queue = asyncio.Queue()
self._other_participant_has_joined = False
- self._task_manager: Optional[BaseTaskManager] = None
+ self._task_manager: BaseTaskManager | None = None
self._async_lock = asyncio.Lock()
@property
@@ -292,7 +293,7 @@ class LiveKitTransportClient:
logger.info(f"Disconnected from {self._room_name}")
await self._callbacks.on_disconnected()
- async def send_data(self, data: bytes, participant_id: Optional[str] = None):
+ async def send_data(self, data: bytes, participant_id: str | None = None):
"""Send data to participants in the room.
Args:
@@ -357,7 +358,7 @@ class LiveKitTransportClient:
logger.error(f"Error publishing audio: {e}")
return False
- def get_participants(self) -> List[str]:
+ def get_participants(self) -> list[str]:
"""Get list of participant IDs in the room.
Returns:
@@ -971,9 +972,9 @@ class LiveKitTransport(BaseTransport):
url: str,
token: str,
room_name: str,
- params: Optional[LiveKitParams] = None,
- input_name: Optional[str] = None,
- output_name: Optional[str] = None,
+ params: LiveKitParams | None = None,
+ input_name: str | None = None,
+ output_name: str | None = None,
):
"""Initialize the LiveKit transport.
@@ -1005,8 +1006,8 @@ class LiveKitTransport(BaseTransport):
self._client = LiveKitTransportClient(
url, token, room_name, self._params, callbacks, self.name
)
- self._input: Optional[LiveKitInputTransport] = None
- self._output: Optional[LiveKitOutputTransport] = None
+ self._input: LiveKitInputTransport | None = None
+ self._output: LiveKitOutputTransport | None = None
self._register_event_handler("on_connected")
self._register_event_handler("on_disconnected")
@@ -1064,7 +1065,7 @@ class LiveKitTransport(BaseTransport):
if self._output:
await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM)
- def get_participants(self) -> List[str]:
+ def get_participants(self) -> list[str]:
"""Get list of participant IDs in the room.
Returns:
@@ -1166,7 +1167,7 @@ class LiveKitTransport(BaseTransport):
await self._input.push_app_message(data.decode(), participant_id)
await self._call_event_handler("on_data_received", data, participant_id)
- async def send_message(self, message: str, participant_id: Optional[str] = None):
+ async def send_message(self, message: str, participant_id: str | None = None):
"""Send a message to participants in the room.
Args:
@@ -1179,7 +1180,7 @@ class LiveKitTransport(BaseTransport):
)
await self._output.send_message(frame)
- async def send_message_urgent(self, message: str, participant_id: Optional[str] = None):
+ async def send_message_urgent(self, message: str, participant_id: str | None = None):
"""Send an urgent message to participants in the room.
Args:
diff --git a/src/pipecat/transports/local/audio.py b/src/pipecat/transports/local/audio.py
index 94ee94019..db12e2f0d 100644
--- a/src/pipecat/transports/local/audio.py
+++ b/src/pipecat/transports/local/audio.py
@@ -12,7 +12,6 @@ audio input and output through the system's default audio devices.
import asyncio
from concurrent.futures import ThreadPoolExecutor
-from typing import Optional
from loguru import logger
@@ -40,8 +39,8 @@ class LocalAudioTransportParams(TransportParams):
output_device_index: PyAudio device index for audio output. If None, uses default.
"""
- input_device_index: Optional[int] = None
- output_device_index: Optional[int] = None
+ input_device_index: int | None = None
+ output_device_index: int | None = None
class LocalAudioInputTransport(BaseInputTransport):
@@ -206,8 +205,8 @@ class LocalAudioTransport(BaseTransport):
self._params = params
self._pyaudio = pyaudio.PyAudio()
- self._input: Optional[LocalAudioInputTransport] = None
- self._output: Optional[LocalAudioOutputTransport] = None
+ self._input: LocalAudioInputTransport | None = None
+ self._output: LocalAudioOutputTransport | None = None
#
# BaseTransport
diff --git a/src/pipecat/transports/local/tk.py b/src/pipecat/transports/local/tk.py
index 658b45afe..a11f4f277 100644
--- a/src/pipecat/transports/local/tk.py
+++ b/src/pipecat/transports/local/tk.py
@@ -13,7 +13,6 @@ PyAudio for audio I/O, suitable for desktop applications and testing.
import asyncio
import tkinter as tk
from concurrent.futures import ThreadPoolExecutor
-from typing import Optional
import numpy as np
from loguru import logger
@@ -53,8 +52,8 @@ class TkTransportParams(TransportParams):
audio_output_device_index: PyAudio device index for audio output. If None, uses default.
"""
- audio_input_device_index: Optional[int] = None
- audio_output_device_index: Optional[int] = None
+ audio_input_device_index: int | None = None
+ audio_output_device_index: int | None = None
class TkInputTransport(BaseInputTransport):
@@ -251,8 +250,8 @@ class TkLocalTransport(BaseTransport):
self._params = params
self._pyaudio = pyaudio.PyAudio()
- self._input: Optional[TkInputTransport] = None
- self._output: Optional[TkOutputTransport] = None
+ self._input: TkInputTransport | None = None
+ self._output: TkOutputTransport | None = None
#
# BaseTransport
diff --git a/src/pipecat/transports/smallwebrtc/connection.py b/src/pipecat/transports/smallwebrtc/connection.py
index 3f0d6a9ee..efb9a6923 100644
--- a/src/pipecat/transports/smallwebrtc/connection.py
+++ b/src/pipecat/transports/smallwebrtc/connection.py
@@ -15,7 +15,7 @@ import asyncio
import json
import time
import uuid
-from typing import Any, List, Literal, Optional, Union
+from typing import Any, Literal
from loguru import logger
from pydantic import BaseModel, TypeAdapter
@@ -89,8 +89,8 @@ class SignallingMessage:
outbound: Types of messages that can be sent to peers.
"""
- Inbound = Union[TrackStatusMessage] # in case we need to add new messages in the future
- outbound = Union[RenegotiateMessage]
+ Inbound = TrackStatusMessage # in case we need to add new messages in the future
+ outbound = RenegotiateMessage
class SmallWebRTCTrack:
@@ -112,7 +112,7 @@ class SmallWebRTCTrack:
self._track = receiver.track
self._enabled = True
self._last_recv_time: float = 0.0
- self._idle_task: Optional[asyncio.Task] = None
+ self._idle_task: asyncio.Task | None = None
self._idle_timeout: float = 2.0 # seconds before discarding old frames
def set_enabled(self, enabled: bool) -> None:
@@ -145,7 +145,7 @@ class SmallWebRTCTrack:
remote_track._queue.get_nowait() # Remove the oldest frame
remote_track._queue.task_done()
- async def recv(self) -> Optional[Frame]:
+ async def recv(self) -> Frame | None:
"""Receive the next frame from the track.
Enables the internal receiving state and starts idle watcher.
@@ -213,7 +213,7 @@ class SmallWebRTCConnection(BaseObject):
def __init__(
self,
- ice_servers: Optional[Union[List[str], List[IceServer]]] = None,
+ ice_servers: list[str] | list[IceServer] | None = None,
connection_timeout_secs: int = 60,
):
"""Initialize the WebRTC connection.
@@ -227,7 +227,7 @@ class SmallWebRTCConnection(BaseObject):
"""
super().__init__()
if not ice_servers:
- self.ice_servers: List[IceServer] = []
+ self.ice_servers: list[IceServer] = []
elif all(isinstance(s, IceServer) for s in ice_servers):
self.ice_servers = ice_servers
elif all(isinstance(s, str) for s in ice_servers):
@@ -281,7 +281,7 @@ class SmallWebRTCConnection(BaseObject):
logger.debug("Initializing new peer connection")
rtc_config = RTCConfiguration(iceServers=self.ice_servers)
- self._answer: Optional[RTCSessionDescription] = None
+ self._answer: RTCSessionDescription | None = None
self._pc = RTCPeerConnection(rtc_config)
self._pc_id = f"{self.name}-{uuid.uuid4().hex}"
self._setup_listeners()
diff --git a/src/pipecat/transports/smallwebrtc/request_handler.py b/src/pipecat/transports/smallwebrtc/request_handler.py
index 351a11af0..63c9bea14 100644
--- a/src/pipecat/transports/smallwebrtc/request_handler.py
+++ b/src/pipecat/transports/smallwebrtc/request_handler.py
@@ -10,9 +10,10 @@ This module provides a client for handling web requests and managing WebRTC conn
"""
import asyncio
+from collections.abc import Awaitable, Callable
from dataclasses import dataclass
from enum import Enum
-from typing import Any, Awaitable, Callable, Dict, List, Optional
+from typing import Any
from aiortc.sdp import candidate_from_sdp
from fastapi import HTTPException
@@ -35,9 +36,9 @@ class SmallWebRTCRequest:
sdp: str
type: str
- pc_id: Optional[str] = None
- restart_pc: Optional[bool] = None
- request_data: Optional[Any] = None
+ pc_id: str | None = None
+ restart_pc: bool | None = None
+ request_data: Any | None = None
@classmethod
def from_dict(cls, data: dict):
@@ -72,7 +73,7 @@ class SmallWebRTCPatchRequest:
"""
pc_id: str
- candidates: List[IceCandidate]
+ candidates: list[IceCandidate]
class ConnectionMode(Enum):
@@ -95,9 +96,9 @@ class SmallWebRTCRequestHandler:
def __init__(
self,
- ice_servers: Optional[List[IceServer]] = None,
+ ice_servers: list[IceServer] | None = None,
esp32_mode: bool = False,
- host: Optional[str] = None,
+ host: str | None = None,
connection_mode: ConnectionMode = ConnectionMode.MULTIPLE,
) -> None:
"""Initialize a SmallWebRTC request handler.
@@ -117,9 +118,9 @@ class SmallWebRTCRequestHandler:
self._connection_mode = connection_mode
# Store connections by pc_id
- self._pcs_map: Dict[str, SmallWebRTCConnection] = {}
+ self._pcs_map: dict[str, SmallWebRTCConnection] = {}
- def _check_single_connection_constraints(self, pc_id: Optional[str]) -> None:
+ def _check_single_connection_constraints(self, pc_id: str | None) -> None:
"""Check if the connection request satisfies single connection mode constraints.
Args:
@@ -152,7 +153,7 @@ class SmallWebRTCRequestHandler:
detail="Cannot create new connection with existing connection active",
)
- def update_ice_servers(self, ice_servers: Optional[List[IceServer]] = None):
+ def update_ice_servers(self, ice_servers: list[IceServer] | None = None):
"""Update the list of ICE servers used for WebRTC connections."""
self._ice_servers = ice_servers
@@ -160,7 +161,7 @@ class SmallWebRTCRequestHandler:
self,
request: SmallWebRTCRequest,
webrtc_connection_callback: Callable[[Any], Awaitable[None]],
- ) -> Optional[Dict[str, str]]:
+ ) -> dict[str, str] | None:
"""Handle a SmallWebRTC request and resolve the pending answer.
This method will:
diff --git a/src/pipecat/transports/smallwebrtc/transport.py b/src/pipecat/transports/smallwebrtc/transport.py
index 76ea34464..0d96d4aa6 100644
--- a/src/pipecat/transports/smallwebrtc/transport.py
+++ b/src/pipecat/transports/smallwebrtc/transport.py
@@ -15,7 +15,8 @@ import asyncio
import fractions
import time
from collections import deque
-from typing import Any, Awaitable, Callable, List, Optional
+from collections.abc import Awaitable, Callable
+from typing import Any
import numpy as np
from loguru import logger
@@ -235,9 +236,9 @@ class SmallWebRTCClient:
self._audio_output_track = None
self._video_output_track = None
- self._audio_input_track: Optional[AudioStreamTrack] = None
- self._video_input_track: Optional[VideoStreamTrack] = None
- self._screen_video_track: Optional[VideoStreamTrack] = None
+ self._audio_input_track: AudioStreamTrack | None = None
+ self._video_input_track: VideoStreamTrack | None = None
+ self._screen_video_track: VideoStreamTrack | None = None
self._params = None
self._audio_in_channels = None
@@ -314,7 +315,7 @@ class SmallWebRTCClient:
try:
frame = await asyncio.wait_for(video_track.recv(), timeout=2.0)
- except asyncio.TimeoutError:
+ except TimeoutError:
if (
self._webrtc_connection.is_connected()
and video_track
@@ -369,7 +370,7 @@ class SmallWebRTCClient:
try:
frame = await asyncio.wait_for(self._audio_input_track.recv(), timeout=2.0)
- except asyncio.TimeoutError:
+ except TimeoutError:
if (
self._webrtc_connection.is_connected()
and self._audio_input_track
@@ -583,7 +584,7 @@ class SmallWebRTCInputTransport(BaseInputTransport):
self._receive_audio_task = None
self._receive_video_task = None
self._receive_screen_video_task = None
- self._image_requests: List[UserImageRequestFrame] = []
+ self._image_requests: list[UserImageRequestFrame] = []
# Whether we have seen a StartFrame already.
self._initialized = False
@@ -897,8 +898,8 @@ class SmallWebRTCTransport(BaseTransport):
self,
webrtc_connection: SmallWebRTCConnection,
params: TransportParams,
- input_name: Optional[str] = None,
- output_name: Optional[str] = None,
+ input_name: str | None = None,
+ output_name: str | None = None,
):
"""Initialize the WebRTC transport.
@@ -919,8 +920,8 @@ class SmallWebRTCTransport(BaseTransport):
self._client = SmallWebRTCClient(webrtc_connection, self._callbacks)
- self._input: Optional[SmallWebRTCInputTransport] = None
- self._output: Optional[SmallWebRTCOutputTransport] = None
+ self._input: SmallWebRTCInputTransport | None = None
+ self._output: SmallWebRTCOutputTransport | None = None
# Register supported handlers. The user will only be able to register
# these handlers.
diff --git a/src/pipecat/transports/tavus/transport.py b/src/pipecat/transports/tavus/transport.py
index 426b7f72c..9ad28979d 100644
--- a/src/pipecat/transports/tavus/transport.py
+++ b/src/pipecat/transports/tavus/transport.py
@@ -12,8 +12,9 @@ audio/video streaming capabilities through the Tavus API.
"""
import os
+from collections.abc import Awaitable, Callable, Mapping
from functools import partial
-from typing import Any, Awaitable, Callable, Mapping, Optional
+from typing import Any
import aiohttp
from daily.daily import AudioData
@@ -197,8 +198,8 @@ class TavusTransportClient:
self._api = TavusApi(api_key, session)
self._replica_id = replica_id
self._persona_id = persona_id
- self._conversation_id: Optional[str] = None
- self._client: Optional[DailyTransportClient] = None
+ self._conversation_id: str | None = None
+ self._client: DailyTransportClient | None = None
self._callbacks = callbacks
self._params = params
@@ -417,9 +418,7 @@ class TavusTransportClient:
return False
return await self._client.write_audio_frame(frame)
- async def register_audio_destination(
- self, destination: str, auto_silence: Optional[bool] = True
- ):
+ async def register_audio_destination(self, destination: str, auto_silence: bool | None = True):
"""Register an audio destination for output.
Args:
@@ -563,7 +562,7 @@ class TavusOutputTransport(BaseOutputTransport):
# Whether we have seen a StartFrame already.
self._initialized = False
# This is the custom track destination expected by Tavus
- self._transport_destination: Optional[str] = "stream"
+ self._transport_destination: str | None = "stream"
async def setup(self, setup: FrameProcessorSetup):
"""Setup the output transport.
@@ -693,8 +692,8 @@ class TavusTransport(BaseTransport):
replica_id: str,
persona_id: str = "pipecat-stream",
params: TavusParams = TavusParams(),
- input_name: Optional[str] = None,
- output_name: Optional[str] = None,
+ input_name: str | None = None,
+ output_name: str | None = None,
):
"""Initialize the Tavus transport.
@@ -726,8 +725,8 @@ class TavusTransport(BaseTransport):
session=session,
params=params,
)
- self._input: Optional[TavusInputTransport] = None
- self._output: Optional[TavusOutputTransport] = None
+ self._input: TavusInputTransport | None = None
+ self._output: TavusOutputTransport | None = None
self._tavus_participant_id = None
# Register supported handlers. The user will only be able to register
diff --git a/src/pipecat/transports/websocket/client.py b/src/pipecat/transports/websocket/client.py
index b5b99ee97..5665dfd23 100644
--- a/src/pipecat/transports/websocket/client.py
+++ b/src/pipecat/transports/websocket/client.py
@@ -15,7 +15,7 @@ import asyncio
import io
import time
import wave
-from typing import Awaitable, Callable, Optional
+from collections.abc import Awaitable, Callable
import websockets
from loguru import logger
@@ -51,8 +51,8 @@ class WebsocketClientParams(TransportParams):
"""
add_wav_header: bool = True
- additional_headers: Optional[dict[str, str]] = None
- serializer: Optional[FrameSerializer] = None
+ additional_headers: dict[str, str] | None = None
+ serializer: FrameSerializer | None = None
class WebsocketClientCallbacks(BaseModel):
@@ -97,8 +97,8 @@ class WebsocketClientSession:
self._transport_name = transport_name
self._leave_counter = 0
- self._task_manager: Optional[BaseTaskManager] = None
- self._websocket: Optional[websockets.WebSocketClientProtocol] = None
+ self._task_manager: BaseTaskManager | None = None
+ self._websocket: websockets.WebSocketClientProtocol | None = None
@property
def task_manager(self) -> BaseTaskManager:
@@ -487,7 +487,7 @@ class WebsocketClientTransport(BaseTransport):
def __init__(
self,
uri: str,
- params: Optional[WebsocketClientParams] = None,
+ params: WebsocketClientParams | None = None,
):
"""Initialize the WebSocket client transport.
@@ -507,8 +507,8 @@ class WebsocketClientTransport(BaseTransport):
)
self._session = WebsocketClientSession(uri, self._params, callbacks, self.name)
- self._input: Optional[WebsocketClientInputTransport] = None
- self._output: Optional[WebsocketClientOutputTransport] = None
+ self._input: WebsocketClientInputTransport | None = None
+ self._output: WebsocketClientOutputTransport | None = None
# Register supported handlers. The user will only be able to register
# these handlers.
diff --git a/src/pipecat/transports/websocket/fastapi.py b/src/pipecat/transports/websocket/fastapi.py
index d9b7d7ae1..1e449b01e 100644
--- a/src/pipecat/transports/websocket/fastapi.py
+++ b/src/pipecat/transports/websocket/fastapi.py
@@ -16,7 +16,7 @@ import io
import time
import typing
import wave
-from typing import Awaitable, Callable, Optional
+from collections.abc import Awaitable, Callable
from loguru import logger
from pydantic import BaseModel
@@ -63,9 +63,9 @@ class FastAPIWebsocketParams(TransportParams):
"""
add_wav_header: bool = False
- serializer: Optional[FrameSerializer] = None
- session_timeout: Optional[int] = None
- fixed_audio_packet_size: Optional[int] = None
+ serializer: FrameSerializer | None = None
+ session_timeout: int | None = None
+ fixed_audio_packet_size: int | None = None
class FastAPIWebsocketCallbacks(BaseModel):
@@ -550,8 +550,8 @@ class FastAPIWebsocketTransport(BaseTransport):
self,
websocket: WebSocket,
params: FastAPIWebsocketParams,
- input_name: Optional[str] = None,
- output_name: Optional[str] = None,
+ input_name: str | None = None,
+ output_name: str | None = None,
):
"""Initialize the FastAPI WebSocket transport.
diff --git a/src/pipecat/transports/websocket/server.py b/src/pipecat/transports/websocket/server.py
index fa3645d37..82b899f71 100644
--- a/src/pipecat/transports/websocket/server.py
+++ b/src/pipecat/transports/websocket/server.py
@@ -15,7 +15,7 @@ import asyncio
import io
import time
import wave
-from typing import Awaitable, Callable, Optional
+from collections.abc import Awaitable, Callable
from loguru import logger
from pydantic import BaseModel
@@ -59,8 +59,8 @@ class WebsocketServerParams(TransportParams):
"""
add_wav_header: bool = False
- serializer: Optional[FrameSerializer] = None
- session_timeout: Optional[int] = None
+ serializer: FrameSerializer | None = None
+ session_timeout: int | None = None
class WebsocketServerCallbacks(BaseModel):
@@ -113,7 +113,7 @@ class WebsocketServerInputTransport(BaseInputTransport):
self._params = params
self._callbacks = callbacks
- self._websocket: Optional[websockets.WebSocketServerProtocol] = None
+ self._websocket: websockets.WebSocketServerProtocol | None = None
self._server_task = None
@@ -264,7 +264,7 @@ class WebsocketServerOutputTransport(BaseOutputTransport):
self._transport = transport
self._params = params
- self._websocket: Optional[websockets.WebSocketServerProtocol] = None
+ self._websocket: websockets.WebSocketServerProtocol | None = None
# write_audio_frame() is called quickly, as soon as we get audio
# (e.g. from the TTS), and since this is just a network connection we
@@ -277,7 +277,7 @@ class WebsocketServerOutputTransport(BaseOutputTransport):
# Whether we have seen a StartFrame already.
self._initialized = False
- async def set_client_connection(self, websocket: Optional[websockets.WebSocketServerProtocol]):
+ async def set_client_connection(self, websocket: websockets.WebSocketServerProtocol | None):
"""Set the active client WebSocket connection.
Args:
@@ -441,8 +441,8 @@ class WebsocketServerTransport(BaseTransport):
params: WebsocketServerParams,
host: str = "localhost",
port: int = 8765,
- input_name: Optional[str] = None,
- output_name: Optional[str] = None,
+ input_name: str | None = None,
+ output_name: str | None = None,
):
"""Initialize the WebSocket server transport.
@@ -464,9 +464,9 @@ class WebsocketServerTransport(BaseTransport):
on_session_timeout=self._on_session_timeout,
on_websocket_ready=self._on_websocket_ready,
)
- self._input: Optional[WebsocketServerInputTransport] = None
- self._output: Optional[WebsocketServerOutputTransport] = None
- self._websocket: Optional[websockets.WebSocketServerProtocol] = None
+ self._input: WebsocketServerInputTransport | None = None
+ self._output: WebsocketServerOutputTransport | None = None
+ self._websocket: websockets.WebSocketServerProtocol | None = None
# Register supported handlers. The user will only be able to register
# these handlers.
diff --git a/src/pipecat/transports/whatsapp/api.py b/src/pipecat/transports/whatsapp/api.py
index 5944cc8b8..250377619 100644
--- a/src/pipecat/transports/whatsapp/api.py
+++ b/src/pipecat/transports/whatsapp/api.py
@@ -9,7 +9,7 @@
API to communicate with WhatsApp Cloud API.
"""
-from typing import Any, Dict, List, Optional, Union
+from typing import Any
import aiohttp
from loguru import logger
@@ -44,7 +44,7 @@ class WhatsAppError(BaseModel):
code: int
message: str
href: str
- error_data: Dict[str, Any]
+ error_data: dict[str, Any]
class WhatsAppConnectCall(BaseModel):
@@ -68,7 +68,7 @@ class WhatsAppConnectCall(BaseModel):
to: str
event: str # "connect"
timestamp: str
- direction: Optional[str]
+ direction: str | None
session: WhatsAppSession
@@ -97,12 +97,12 @@ class WhatsAppTerminateCall(BaseModel):
to: str
event: str # "terminate"
timestamp: str
- direction: Optional[str]
- biz_opaque_callback_data: Optional[str] = None
- status: Optional[str] = None # "FAILED" or "COMPLETED" or "REJECTED"
- start_time: Optional[str] = None
- end_time: Optional[str] = None
- duration: Optional[int] = None
+ direction: str | None
+ biz_opaque_callback_data: str | None = None
+ status: str | None = None # "FAILED" or "COMPLETED" or "REJECTED"
+ start_time: str | None = None
+ end_time: str | None = None
+ duration: int | None = None
class WhatsAppProfile(BaseModel):
@@ -151,8 +151,8 @@ class WhatsAppConnectCallValue(BaseModel):
messaging_product: str
metadata: WhatsAppMetadata
- contacts: List[WhatsAppContact]
- calls: List[WhatsAppConnectCall]
+ contacts: list[WhatsAppContact]
+ calls: list[WhatsAppConnectCall]
class WhatsAppTerminateCallValue(BaseModel):
@@ -167,8 +167,8 @@ class WhatsAppTerminateCallValue(BaseModel):
messaging_product: str
metadata: WhatsAppMetadata
- calls: List[WhatsAppTerminateCall]
- errors: Optional[List[WhatsAppError]] = None
+ calls: list[WhatsAppTerminateCall]
+ errors: list[WhatsAppError] | None = None
class WhatsAppChange(BaseModel):
@@ -179,7 +179,7 @@ class WhatsAppChange(BaseModel):
field: Always "calls" for calling webhooks
"""
- value: Union[WhatsAppConnectCallValue, WhatsAppTerminateCallValue]
+ value: WhatsAppConnectCallValue | WhatsAppTerminateCallValue
field: str
@@ -192,7 +192,7 @@ class WhatsAppEntry(BaseModel):
"""
id: str
- changes: List[WhatsAppChange]
+ changes: list[WhatsAppChange]
class WhatsAppWebhookRequest(BaseModel):
@@ -207,7 +207,7 @@ class WhatsAppWebhookRequest(BaseModel):
"""
object: str
- entry: List[WhatsAppEntry]
+ entry: list[WhatsAppEntry]
class WhatsAppApi:
diff --git a/src/pipecat/transports/whatsapp/client.py b/src/pipecat/transports/whatsapp/client.py
index f2ed1f00d..8f479520f 100644
--- a/src/pipecat/transports/whatsapp/client.py
+++ b/src/pipecat/transports/whatsapp/client.py
@@ -14,7 +14,7 @@ WhatsApp call events.
import asyncio
import hashlib
import hmac
-from typing import Awaitable, Callable, Dict, List, Optional
+from collections.abc import Awaitable, Callable
import aiohttp
from loguru import logger
@@ -48,8 +48,8 @@ class WhatsAppClient:
whatsapp_token: str,
phone_number_id: str,
session: aiohttp.ClientSession,
- ice_servers: Optional[List[IceServer]] = None,
- whatsapp_secret: Optional[str] = None,
+ ice_servers: list[IceServer] | None = None,
+ whatsapp_secret: str | None = None,
) -> None:
"""Initialize the WhatsApp client.
@@ -65,7 +65,7 @@ class WhatsAppClient:
whatsapp_token=whatsapp_token, phone_number_id=phone_number_id, session=session
)
self._whatsapp_secret = whatsapp_secret
- self._ongoing_calls_map: Dict[str, SmallWebRTCConnection] = {}
+ self._ongoing_calls_map: dict[str, SmallWebRTCConnection] = {}
# Set default ICE servers if none provided
if ice_servers is None:
@@ -73,11 +73,11 @@ class WhatsAppClient:
else:
self._ice_servers = ice_servers
- def update_ice_servers(self, ice_servers: Optional[List[IceServer]] = None):
+ def update_ice_servers(self, ice_servers: list[IceServer] | None = None):
"""Update the list of ICE servers used for WebRTC connections."""
self._ice_servers = ice_servers
- def update_whatsapp_secret(self, whatsapp_secret: Optional[str] = None):
+ def update_whatsapp_secret(self, whatsapp_secret: str | None = None):
"""Update the WhatsApp APP secret for validating that the webhook request came from WhatsApp."""
self._whatsapp_secret = whatsapp_secret
@@ -125,7 +125,7 @@ class WhatsAppClient:
logger.debug("All calls terminated successfully")
async def handle_verify_webhook_request(
- self, params: Dict[str, str], expected_verification_token: str
+ self, params: dict[str, str], expected_verification_token: str
) -> int:
"""Handle a verify webhook request from WhatsApp.
@@ -177,9 +177,9 @@ class WhatsAppClient:
async def handle_webhook_request(
self,
request: WhatsAppWebhookRequest,
- connection_callback: Optional[Callable[[SmallWebRTCConnection], Awaitable[None]]] = None,
- raw_body: Optional[bytes] = None,
- sha256_signature: Optional[str] = None,
+ connection_callback: Callable[[SmallWebRTCConnection], Awaitable[None]] | None = None,
+ raw_body: bytes | None = None,
+ sha256_signature: str | None = None,
) -> bool:
"""Handle a webhook request from WhatsApp.
diff --git a/src/pipecat/turns/user_idle_controller.py b/src/pipecat/turns/user_idle_controller.py
index 188daa327..0fa0053bb 100644
--- a/src/pipecat/turns/user_idle_controller.py
+++ b/src/pipecat/turns/user_idle_controller.py
@@ -7,7 +7,6 @@
"""This module defines a controller for managing user idle detection."""
import asyncio
-from typing import Optional
from pipecat.frames.frames import (
BotStartedSpeakingFrame,
@@ -64,11 +63,11 @@ class UserIdleController(BaseObject):
self._user_idle_timeout = user_idle_timeout
- self._task_manager: Optional[BaseTaskManager] = None
+ self._task_manager: BaseTaskManager | None = None
self._user_turn_in_progress: bool = False
self._function_calls_in_progress: int = 0
- self._idle_timer_task: Optional[asyncio.Task] = None
+ self._idle_timer_task: asyncio.Task | None = None
self._register_event_handler("on_user_turn_idle", sync=True)
diff --git a/src/pipecat/turns/user_mute/base_user_mute_strategy.py b/src/pipecat/turns/user_mute/base_user_mute_strategy.py
index 91ecb37d5..67aaa2867 100644
--- a/src/pipecat/turns/user_mute/base_user_mute_strategy.py
+++ b/src/pipecat/turns/user_mute/base_user_mute_strategy.py
@@ -6,8 +6,6 @@
"""Base strategy for deciding whether user frames should be muted."""
-from typing import Optional
-
from pipecat.frames.frames import Frame
from pipecat.utils.asyncio.task_manager import BaseTaskManager
from pipecat.utils.base_object import BaseObject
@@ -32,7 +30,7 @@ class BaseUserMuteStrategy(BaseObject):
def __init__(self, **kwargs):
"""Initialize the base user mute strategy."""
super().__init__(**kwargs)
- self._task_manager: Optional[BaseTaskManager] = None
+ self._task_manager: BaseTaskManager | None = None
@property
def task_manager(self) -> BaseTaskManager:
diff --git a/src/pipecat/turns/user_mute/function_call_user_mute_strategy.py b/src/pipecat/turns/user_mute/function_call_user_mute_strategy.py
index f50f31bd4..fa2aa6b71 100644
--- a/src/pipecat/turns/user_mute/function_call_user_mute_strategy.py
+++ b/src/pipecat/turns/user_mute/function_call_user_mute_strategy.py
@@ -6,8 +6,6 @@
"""User mute strategy that mutes the user while a function call is executing."""
-from typing import Set
-
from pipecat.frames.frames import (
Frame,
FunctionCallCancelFrame,
@@ -30,7 +28,7 @@ class FunctionCallUserMuteStrategy(BaseUserMuteStrategy):
def __init__(self):
"""Initialize the function call user mute strategy."""
super().__init__()
- self._function_call_in_progress: Set[str] = set()
+ self._function_call_in_progress: set[str] = set()
async def reset(self):
"""Reset the strategy to its initial state."""
diff --git a/src/pipecat/turns/user_start/base_user_turn_start_strategy.py b/src/pipecat/turns/user_start/base_user_turn_start_strategy.py
index 94b4f635d..401178cc7 100644
--- a/src/pipecat/turns/user_start/base_user_turn_start_strategy.py
+++ b/src/pipecat/turns/user_start/base_user_turn_start_strategy.py
@@ -7,7 +7,6 @@
"""Base turn start strategy for determining when the user starts speaking."""
from dataclasses import dataclass
-from typing import Optional, Type
from pipecat.frames.frames import Frame
from pipecat.processors.frame_processor import FrameDirection
@@ -73,7 +72,7 @@ class BaseUserTurnStartStrategy(BaseObject):
super().__init__(**kwargs)
self._enable_interruptions = enable_interruptions
self._enable_user_speaking_frames = enable_user_speaking_frames
- self._task_manager: Optional[BaseTaskManager] = None
+ self._task_manager: BaseTaskManager | None = None
self._register_event_handler("on_push_frame", sync=True)
self._register_event_handler("on_broadcast_frame", sync=True)
self._register_event_handler("on_user_turn_started", sync=True)
@@ -126,7 +125,7 @@ class BaseUserTurnStartStrategy(BaseObject):
"""
await self._call_event_handler("on_push_frame", frame, direction)
- async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs):
+ async def broadcast_frame(self, frame_cls: type[Frame], **kwargs):
"""Emit on_broadcast_frame to broadcast a frame using the user aggreagtor.
Args:
diff --git a/src/pipecat/turns/user_start/wake_phrase_user_turn_start_strategy.py b/src/pipecat/turns/user_start/wake_phrase_user_turn_start_strategy.py
index bcc069ad7..14a045826 100644
--- a/src/pipecat/turns/user_start/wake_phrase_user_turn_start_strategy.py
+++ b/src/pipecat/turns/user_start/wake_phrase_user_turn_start_strategy.py
@@ -9,7 +9,6 @@
import asyncio
import enum
import re
-from typing import List, Optional
from loguru import logger
@@ -85,7 +84,7 @@ class WakePhraseUserTurnStartStrategy(BaseUserTurnStartStrategy):
def __init__(
self,
*,
- phrases: List[str],
+ phrases: list[str],
timeout: float = 10.0,
single_activation: bool = False,
**kwargs,
@@ -106,7 +105,7 @@ class WakePhraseUserTurnStartStrategy(BaseUserTurnStartStrategy):
self._timeout = timeout
self._single_activation = single_activation
- self._patterns: List[re.Pattern] = []
+ self._patterns: list[re.Pattern] = []
for phrase in phrases:
pattern = re.compile(
r"\b" + r"\s*".join(re.escape(word) for word in phrase.split()) + r"\b",
@@ -118,7 +117,7 @@ class WakePhraseUserTurnStartStrategy(BaseUserTurnStartStrategy):
self._accumulated_text = ""
self._timeout_event = asyncio.Event()
- self._timeout_task: Optional[asyncio.Task] = None
+ self._timeout_task: asyncio.Task | None = None
self._register_event_handler("on_wake_phrase_detected")
self._register_event_handler("on_wake_phrase_timeout")
@@ -276,6 +275,6 @@ class WakePhraseUserTurnStartStrategy(BaseUserTurnStartStrategy):
timeout=self._timeout,
)
self._timeout_event.clear()
- except asyncio.TimeoutError:
+ except TimeoutError:
if self._state == _WakeState.AWAKE:
self._transition_to_idle()
diff --git a/src/pipecat/turns/user_stop/base_user_turn_stop_strategy.py b/src/pipecat/turns/user_stop/base_user_turn_stop_strategy.py
index a6f89ae4b..1f8497359 100644
--- a/src/pipecat/turns/user_stop/base_user_turn_stop_strategy.py
+++ b/src/pipecat/turns/user_stop/base_user_turn_stop_strategy.py
@@ -7,7 +7,6 @@
"""Base user turn stop strategy for determining when the user stopped speaking."""
from dataclasses import dataclass
-from typing import Optional, Type
from pipecat.frames.frames import Frame
from pipecat.processors.frame_processor import FrameDirection
@@ -62,7 +61,7 @@ class BaseUserTurnStopStrategy(BaseObject):
"""
super().__init__(**kwargs)
self._enable_user_speaking_frames = enable_user_speaking_frames
- self._task_manager: Optional[BaseTaskManager] = None
+ self._task_manager: BaseTaskManager | None = None
self._register_event_handler("on_push_frame", sync=True)
self._register_event_handler("on_broadcast_frame", sync=True)
self._register_event_handler("on_user_turn_stopped", sync=True)
@@ -114,7 +113,7 @@ class BaseUserTurnStopStrategy(BaseObject):
"""
await self._call_event_handler("on_push_frame", frame, direction)
- async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs):
+ async def broadcast_frame(self, frame_cls: type[Frame], **kwargs):
"""Emit on_broadcast_frame to broadcast a frame using the user aggreagtor.
Args:
diff --git a/src/pipecat/turns/user_stop/external_user_turn_stop_strategy.py b/src/pipecat/turns/user_stop/external_user_turn_stop_strategy.py
index 4ffa3360b..b1cf0ca9b 100644
--- a/src/pipecat/turns/user_stop/external_user_turn_stop_strategy.py
+++ b/src/pipecat/turns/user_stop/external_user_turn_stop_strategy.py
@@ -7,7 +7,6 @@
"""User turn stop strategy triggered by externally emitted frames."""
import asyncio
-from typing import Optional
from pipecat.frames.frames import (
Frame,
@@ -44,7 +43,7 @@ class ExternalUserTurnStopStrategy(BaseUserTurnStopStrategy):
self._user_speaking = False
self._seen_interim_results = False
self._event = asyncio.Event()
- self._task: Optional[asyncio.Task] = None
+ self._task: asyncio.Task | None = None
async def reset(self):
"""Reset the strategy to its initial state."""
@@ -125,7 +124,7 @@ class ExternalUserTurnStopStrategy(BaseUserTurnStopStrategy):
try:
await asyncio.wait_for(self._event.wait(), timeout=self._timeout)
self._event.clear()
- except asyncio.TimeoutError:
+ except TimeoutError:
await self._maybe_trigger_user_turn_stopped()
async def _maybe_trigger_user_turn_stopped(self):
diff --git a/src/pipecat/turns/user_stop/speech_timeout_user_turn_stop_strategy.py b/src/pipecat/turns/user_stop/speech_timeout_user_turn_stop_strategy.py
index 0a8feeb2e..e37b55756 100644
--- a/src/pipecat/turns/user_stop/speech_timeout_user_turn_stop_strategy.py
+++ b/src/pipecat/turns/user_stop/speech_timeout_user_turn_stop_strategy.py
@@ -8,7 +8,6 @@
import asyncio
import time
-from typing import Optional
from loguru import logger
@@ -59,8 +58,8 @@ class SpeechTimeoutUserTurnStopStrategy(BaseUserTurnStopStrategy):
self._text = ""
self._vad_user_speaking = False
self._transcript_finalized = False
- self._vad_stopped_time: Optional[float] = None
- self._timeout_task: Optional[asyncio.Task] = None
+ self._vad_stopped_time: float | None = None
+ self._timeout_task: asyncio.Task | None = None
self._timeout_expired: bool = False
async def reset(self):
diff --git a/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py b/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py
index adc987583..667e89961 100644
--- a/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py
+++ b/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py
@@ -7,7 +7,6 @@
"""User turn stop strategy based on turn detection analyzers."""
import asyncio
-from typing import Optional
from loguru import logger
@@ -62,9 +61,9 @@ class TurnAnalyzerUserTurnStopStrategy(BaseUserTurnStopStrategy):
self._text = ""
self._turn_complete = False
self._vad_user_speaking = False
- self._vad_stopped_time: Optional[float] = None # Track when VAD stopped was received
+ self._vad_stopped_time: float | None = None # Track when VAD stopped was received
self._transcript_finalized = False
- self._timeout_task: Optional[asyncio.Task] = None
+ self._timeout_task: asyncio.Task | None = None
self._timeout_expired: bool = False
async def reset(self):
@@ -232,7 +231,7 @@ class TurnAnalyzerUserTurnStopStrategy(BaseUserTurnStopStrategy):
# Make sure the task is scheduled.
await asyncio.sleep(0)
- async def _handle_prediction_result(self, result: Optional[MetricsData]):
+ async def _handle_prediction_result(self, result: MetricsData | None):
"""Handle a prediction result event from the turn analyzer."""
if result:
await self.push_frame(MetricsFrame(data=[result]))
diff --git a/src/pipecat/turns/user_turn_completion_mixin.py b/src/pipecat/turns/user_turn_completion_mixin.py
index c99355de7..1d8da1e07 100644
--- a/src/pipecat/turns/user_turn_completion_mixin.py
+++ b/src/pipecat/turns/user_turn_completion_mixin.py
@@ -14,7 +14,7 @@ were interrupted mid-thought.
import asyncio
from dataclasses import dataclass
-from typing import Literal, Optional
+from typing import Literal
from loguru import logger
@@ -156,11 +156,11 @@ class UserTurnCompletionConfig:
incomplete_long_prompt: Custom prompt when long timeout expires.
"""
- instructions: Optional[str] = None
+ instructions: str | None = None
incomplete_short_timeout: float = 5.0
incomplete_long_timeout: float = 10.0
- incomplete_short_prompt: Optional[str] = None
- incomplete_long_prompt: Optional[str] = None
+ incomplete_short_prompt: str | None = None
+ incomplete_long_prompt: str | None = None
@property
def completion_instructions(self) -> str:
@@ -223,8 +223,8 @@ class UserTurnCompletionLLMServiceMixin:
# Timeout handling
self._user_turn_completion_config = UserTurnCompletionConfig()
- self._incomplete_timeout_task: Optional[asyncio.Task] = None
- self._incomplete_type: Optional[Literal["short", "long"]] = None
+ self._incomplete_timeout_task: asyncio.Task | None = None
+ self._incomplete_type: Literal["short", "long"] | None = None
def set_user_turn_completion_config(self, config: UserTurnCompletionConfig):
"""Set the turn completion configuration.
@@ -385,7 +385,7 @@ class UserTurnCompletionLLMServiceMixin:
# Check for incomplete markers (○ short, ◐ long)
# These indicate the user was cut off or needs time - we suppress the bot's
# response and start a timeout to re-prompt later.
- incomplete_type: Optional[Literal["short", "long"]] = None
+ incomplete_type: Literal["short", "long"] | None = None
if USER_TURN_INCOMPLETE_SHORT_MARKER in self._turn_text_buffer:
incomplete_type = "short"
elif USER_TURN_INCOMPLETE_LONG_MARKER in self._turn_text_buffer:
diff --git a/src/pipecat/turns/user_turn_controller.py b/src/pipecat/turns/user_turn_controller.py
index 9abed3932..2ba4e304b 100644
--- a/src/pipecat/turns/user_turn_controller.py
+++ b/src/pipecat/turns/user_turn_controller.py
@@ -7,7 +7,6 @@
"""This module defines a controller for managing user turn lifecycle."""
import asyncio
-from typing import Optional, Type
from pipecat.frames.frames import (
Frame,
@@ -85,13 +84,13 @@ class UserTurnController(BaseObject):
self._user_turn_strategies = user_turn_strategies
self._user_turn_stop_timeout = user_turn_stop_timeout
- self._task_manager: Optional[BaseTaskManager] = None
+ self._task_manager: BaseTaskManager | None = None
self._user_speaking = False
self._user_turn = False
self._user_turn_stop_timeout_event = asyncio.Event()
- self._user_turn_stop_timeout_task: Optional[asyncio.Task] = None
+ self._user_turn_stop_timeout_task: asyncio.Task | None = None
self._register_event_handler("on_push_frame", sync=True)
self._register_event_handler("on_broadcast_frame", sync=True)
@@ -235,7 +234,7 @@ class UserTurnController(BaseObject):
async def _on_broadcast_frame(
self,
strategy: BaseUserTurnStartStrategy | BaseUserTurnStopStrategy,
- frame_cls: Type[Frame],
+ frame_cls: type[Frame],
**kwargs,
):
await self._call_event_handler("on_broadcast_frame", frame_cls, **kwargs)
@@ -256,7 +255,7 @@ class UserTurnController(BaseObject):
await self._call_event_handler("on_reset_aggregation", strategy)
async def _trigger_user_turn_start(
- self, strategy: Optional[BaseUserTurnStartStrategy], params: UserTurnStartedParams
+ self, strategy: BaseUserTurnStartStrategy | None, params: UserTurnStartedParams
):
# Prevent two consecutive user turn starts.
if self._user_turn:
@@ -276,7 +275,7 @@ class UserTurnController(BaseObject):
await self._call_event_handler("on_user_turn_started", strategy, params)
async def _trigger_user_turn_stop(
- self, strategy: Optional[BaseUserTurnStopStrategy], params: UserTurnStoppedParams
+ self, strategy: BaseUserTurnStopStrategy | None, params: UserTurnStoppedParams
):
# Prevent two consecutive user turn stops.
if not self._user_turn:
@@ -299,7 +298,7 @@ class UserTurnController(BaseObject):
timeout=self._user_turn_stop_timeout,
)
self._user_turn_stop_timeout_event.clear()
- except asyncio.TimeoutError:
+ except TimeoutError:
if self._user_turn and not self._user_speaking:
await self._call_event_handler("on_user_turn_stop_timeout")
await self._trigger_user_turn_stop(
diff --git a/src/pipecat/turns/user_turn_processor.py b/src/pipecat/turns/user_turn_processor.py
index a3501d2c8..536d297b5 100644
--- a/src/pipecat/turns/user_turn_processor.py
+++ b/src/pipecat/turns/user_turn_processor.py
@@ -6,8 +6,6 @@
"""Frame processor for managing the user turn lifecycle."""
-from typing import Optional, Type
-
from loguru import logger
from pipecat.frames.frames import (
@@ -64,7 +62,7 @@ class UserTurnProcessor(FrameProcessor):
def __init__(
self,
*,
- user_turn_strategies: Optional[UserTurnStrategies] = None,
+ user_turn_strategies: UserTurnStrategies | None = None,
user_turn_stop_timeout: float = 5.0,
user_idle_timeout: float = 0,
**kwargs,
@@ -165,7 +163,7 @@ class UserTurnProcessor(FrameProcessor):
):
await self.push_frame(frame, direction)
- async def _on_broadcast_frame(self, controller, frame_cls: Type[Frame], **kwargs):
+ async def _on_broadcast_frame(self, controller, frame_cls: type[Frame], **kwargs):
await self.broadcast_frame(frame_cls, **kwargs)
async def _on_user_turn_started(
diff --git a/src/pipecat/turns/user_turn_strategies.py b/src/pipecat/turns/user_turn_strategies.py
index d1abfeac5..8a663d3da 100644
--- a/src/pipecat/turns/user_turn_strategies.py
+++ b/src/pipecat/turns/user_turn_strategies.py
@@ -7,7 +7,6 @@
"""Turn start strategy configuration."""
from dataclasses import dataclass
-from typing import List, Optional
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
from pipecat.turns.user_start import (
@@ -23,7 +22,7 @@ from pipecat.turns.user_stop import (
)
-def default_user_turn_start_strategies() -> List[BaseUserTurnStartStrategy]:
+def default_user_turn_start_strategies() -> list[BaseUserTurnStartStrategy]:
"""Return the default user turn start strategies.
Returns ``[VADUserTurnStartStrategy, TranscriptionUserTurnStartStrategy]``.
@@ -39,7 +38,7 @@ def default_user_turn_start_strategies() -> List[BaseUserTurnStartStrategy]:
return [VADUserTurnStartStrategy(), TranscriptionUserTurnStartStrategy()]
-def default_user_turn_stop_strategies() -> List[BaseUserTurnStopStrategy]:
+def default_user_turn_stop_strategies() -> list[BaseUserTurnStopStrategy]:
"""Return the default user turn stop strategies.
Returns ``[TurnAnalyzerUserTurnStopStrategy(LocalSmartTurnAnalyzerV3)]``.
@@ -65,8 +64,8 @@ class UserTurnStrategies:
"""
- start: Optional[List[BaseUserTurnStartStrategy]] = None
- stop: Optional[List[BaseUserTurnStopStrategy]] = None
+ start: list[BaseUserTurnStartStrategy] | None = None
+ stop: list[BaseUserTurnStopStrategy] | None = None
def __post_init__(self):
if not self.start:
diff --git a/src/pipecat/utils/asyncio/task_manager.py b/src/pipecat/utils/asyncio/task_manager.py
index c680e19b1..0b2420788 100644
--- a/src/pipecat/utils/asyncio/task_manager.py
+++ b/src/pipecat/utils/asyncio/task_manager.py
@@ -14,8 +14,8 @@ comprehensive monitoring and cleanup capabilities.
import asyncio
import traceback
from abc import ABC, abstractmethod
+from collections.abc import Coroutine, Sequence
from dataclasses import dataclass
-from typing import Coroutine, Dict, Optional, Sequence
from loguru import logger
@@ -71,7 +71,7 @@ class BaseTaskManager(ABC):
pass
@abstractmethod
- async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = None):
+ async def cancel_task(self, task: asyncio.Task, timeout: float | None = None):
"""Cancels the given asyncio Task and awaits its completion with an optional timeout.
This function removes the task from the set of registered tasks upon
@@ -114,8 +114,8 @@ class TaskManager(BaseTaskManager):
def __init__(self) -> None:
"""Initialize the task manager with empty task registry."""
- self._tasks: Dict[str, TaskData] = {}
- self._params: Optional[TaskManagerParams] = None
+ self._tasks: dict[str, TaskData] = {}
+ self._params: TaskManagerParams | None = None
def setup(self, params: TaskManagerParams):
"""Initialize the task manager with configuration parameters.
@@ -177,7 +177,7 @@ class TaskManager(BaseTaskManager):
logger.trace(f"{name}: task created")
return task
- async def cancel_task(self, task: asyncio.Task, timeout: Optional[float] = None):
+ async def cancel_task(self, task: asyncio.Task, timeout: float | None = None):
"""Cancels the given asyncio Task and awaits its completion with an optional timeout.
This function removes the task from the set of registered tasks upon
@@ -194,7 +194,7 @@ class TaskManager(BaseTaskManager):
await asyncio.wait_for(task, timeout=timeout)
else:
await task
- except asyncio.TimeoutError:
+ except TimeoutError:
logger.warning(f"{name}: timed out waiting for task to cancel")
except asyncio.CancelledError:
# Here are sure the task is cancelled properly.
diff --git a/src/pipecat/utils/base_object.py b/src/pipecat/utils/base_object.py
index f6e4c47de..896c62b6a 100644
--- a/src/pipecat/utils/base_object.py
+++ b/src/pipecat/utils/base_object.py
@@ -16,7 +16,7 @@ import inspect
import traceback
from abc import ABC
from dataclasses import dataclass
-from typing import Any, Dict, List, Optional
+from typing import Any
from loguru import logger
@@ -38,7 +38,7 @@ class EventHandler:
"""
name: str
- handlers: List[Any]
+ handlers: list[Any]
is_sync: bool
@@ -50,7 +50,7 @@ class BaseObject(ABC):
classes in the framework should inherit from this base class.
"""
- def __init__(self, *, name: Optional[str] = None, **kwargs):
+ def __init__(self, *, name: str | None = None, **kwargs):
"""Initialize the base object.
Args:
@@ -62,7 +62,7 @@ class BaseObject(ABC):
self._name = name or f"{self.__class__.__name__}#{obj_count(self)}"
# Registered event handlers.
- self._event_handlers: Dict[str, EventHandler] = {}
+ self._event_handlers: dict[str, EventHandler] = {}
# Set of tasks being executed. When a task finishes running it gets
# automatically removed from the set. When we cleanup we wait for all
diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py
index cfdf8f82f..85a7e6e19 100644
--- a/src/pipecat/utils/context/llm_context_summarization.py
+++ b/src/pipecat/utils/context/llm_context_summarization.py
@@ -13,7 +13,7 @@ context when token limits are reached, enabling efficient long-running conversat
import json
import warnings
from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, Optional
if TYPE_CHECKING:
from pipecat.services.llm_service import LLMService
@@ -90,7 +90,7 @@ class LLMContextSummaryConfig:
target_context_tokens: int = 6000
min_messages_after_summary: int = 4
- summarization_prompt: Optional[str] = None
+ summarization_prompt: str | None = None
summary_message_template: str = "Conversation summary: {summary}"
llm: Optional["LLMService"] = None
summarization_timeout: float = DEFAULT_SUMMARIZATION_TIMEOUT
@@ -139,8 +139,8 @@ class LLMAutoContextSummarizationConfig:
default ``LLMContextSummaryConfig`` values.
"""
- max_context_tokens: Optional[int] = 8000
- max_unsummarized_messages: Optional[int] = 20
+ max_context_tokens: int | None = 8000
+ max_unsummarized_messages: int | None = 20
summary_config: LLMContextSummaryConfig = field(default_factory=LLMContextSummaryConfig)
def __post_init__(self):
@@ -192,11 +192,11 @@ class LLMContextSummarizationConfig:
summarization_prompt: Custom prompt for summary generation.
"""
- max_context_tokens: Optional[int] = 8000
+ max_context_tokens: int | None = 8000
target_context_tokens: int = 6000
- max_unsummarized_messages: Optional[int] = 20
+ max_unsummarized_messages: int | None = 20
min_messages_after_summary: int = 4
- summarization_prompt: Optional[str] = None
+ summarization_prompt: str | None = None
summary_message_template: str = "Conversation summary: {summary}"
llm: Optional["LLMService"] = None
summarization_timeout: float = DEFAULT_SUMMARIZATION_TIMEOUT
@@ -269,7 +269,7 @@ class LLMMessagesToSummarize:
last_summarized_index: Index of the last message being summarized
"""
- messages: List[dict]
+ messages: list[dict]
last_summarized_index: int
@@ -415,7 +415,7 @@ class LLMContextSummarizationUtil:
@staticmethod
def _get_earliest_function_call_not_resolved_in_range(
- messages: List[dict], start_idx: int, summary_end: int
+ messages: list[dict], start_idx: int, summary_end: int
) -> int:
"""Find the earliest message index with incomplete function calls.
@@ -574,7 +574,7 @@ class LLMContextSummarizationUtil:
)
@staticmethod
- def format_messages_for_summary(messages: List[dict]) -> str:
+ def format_messages_for_summary(messages: list[dict]) -> str:
"""Format messages as a transcript for summarization.
Args:
diff --git a/src/pipecat/utils/frame_queue.py b/src/pipecat/utils/frame_queue.py
index 64617f770..1b787362d 100644
--- a/src/pipecat/utils/frame_queue.py
+++ b/src/pipecat/utils/frame_queue.py
@@ -7,7 +7,8 @@
"""Frame queue utilities for Pipecat pipeline processors."""
import asyncio
-from typing import Any, Callable, Type, Union
+from collections.abc import Callable
+from typing import Any
from pipecat.frames.frames import Frame, UninterruptibleFrame
@@ -41,7 +42,7 @@ class FrameQueue(asyncio.Queue):
self._frame_getter = frame_getter
self._uninterruptible_count: int = 0
- def has_frame(self, frame_type: Union[Type[Frame], Type[UninterruptibleFrame]]) -> bool:
+ def has_frame(self, frame_type: type[Frame] | type[UninterruptibleFrame]) -> bool:
"""Return True if any frame of the given type is in the queue.
``frame_type`` may be ``Frame``, ``UninterruptibleFrame`` (a mixin, not a
diff --git a/src/pipecat/utils/string.py b/src/pipecat/utils/string.py
index 20fcdb2e0..55b1c2d53 100644
--- a/src/pipecat/utils/string.py
+++ b/src/pipecat/utils/string.py
@@ -18,8 +18,8 @@ Dependencies:
"""
import re
+from collections.abc import Sequence
from dataclasses import dataclass
-from typing import FrozenSet, List, Optional, Sequence, Tuple
import nltk
from loguru import logger
@@ -41,7 +41,7 @@ except LookupError:
"See https://www.nltk.org/data.html for more information."
)
-SENTENCE_ENDING_PUNCTUATION: FrozenSet[str] = frozenset(
+SENTENCE_ENDING_PUNCTUATION: frozenset[str] = frozenset(
{
# Latin script punctuation (most European languages, Filipino, etc.)
".",
@@ -91,16 +91,16 @@ SENTENCE_ENDING_PUNCTUATION: FrozenSet[str] = frozenset(
# Latin punctuation that NLTK handles well — these need NLTK's disambiguation
# because "." can appear in abbreviations, decimals, etc.
-_LATIN_SENTENCE_ENDING_PUNCTUATION: FrozenSet[str] = frozenset({".", "!", "?", ";", "…"})
+_LATIN_SENTENCE_ENDING_PUNCTUATION: frozenset[str] = frozenset({".", "!", "?", ";", "…"})
# Non-Latin sentence-ending punctuation that is always unambiguous and never needs
# NLTK's disambiguation logic. Used as a fallback when NLTK doesn't support the
# language (e.g., Japanese, Chinese, Korean, Hindi, Arabic).
-UNAMBIGUOUS_SENTENCE_ENDING_PUNCTUATION: FrozenSet[str] = (
+UNAMBIGUOUS_SENTENCE_ENDING_PUNCTUATION: frozenset[str] = (
SENTENCE_ENDING_PUNCTUATION - _LATIN_SENTENCE_ENDING_PUNCTUATION
)
-StartEndTags = Tuple[str, str]
+StartEndTags = tuple[str, str]
def replace_match(text: str, match: re.Match, old: str, new: str) -> str:
@@ -179,9 +179,9 @@ def match_endofsentence(text: str) -> int:
def parse_start_end_tags(
text: str,
tags: Sequence[StartEndTags],
- current_tag: Optional[StartEndTags],
+ current_tag: StartEndTags | None,
current_tag_index: int,
-) -> Tuple[Optional[StartEndTags], int]:
+) -> tuple[StartEndTags | None, int]:
"""Parse text to identify start and end tag pairs.
If a start tag was previously found (i.e., current_tag is valid), wait for
@@ -237,7 +237,7 @@ class TextPartForConcatenation:
return f"{self.name}(text: [{self.text}], includes_inter_part_spaces: {self.includes_inter_part_spaces})"
-def concatenate_aggregated_text(text_parts: List[TextPartForConcatenation]) -> str:
+def concatenate_aggregated_text(text_parts: list[TextPartForConcatenation]) -> str:
"""Concatenate a list of text parts into a single string.
This function joins the provided list of text parts into a single string,
diff --git a/src/pipecat/utils/text/base_text_aggregator.py b/src/pipecat/utils/text/base_text_aggregator.py
index 2b050fcb7..99ca145b6 100644
--- a/src/pipecat/utils/text/base_text_aggregator.py
+++ b/src/pipecat/utils/text/base_text_aggregator.py
@@ -12,12 +12,12 @@ aggregated text should be sent for speech synthesis.
"""
from abc import ABC, abstractmethod
+from collections.abc import AsyncIterator
from dataclasses import dataclass
-from enum import Enum
-from typing import AsyncIterator, Optional
+from enum import StrEnum
-class AggregationType(str, Enum):
+class AggregationType(StrEnum):
"""Built-in aggregation strings."""
SENTENCE = "sentence"
@@ -128,7 +128,7 @@ class BaseTextAggregator(ABC):
yield # pragma: no cover
@abstractmethod
- async def flush(self) -> Optional[Aggregation]:
+ async def flush(self) -> Aggregation | None:
"""Flush any pending aggregation.
This method is called at the end of a stream (e.g., when receiving
diff --git a/src/pipecat/utils/text/base_text_filter.py b/src/pipecat/utils/text/base_text_filter.py
index ce733ad93..f00bf871d 100644
--- a/src/pipecat/utils/text/base_text_filter.py
+++ b/src/pipecat/utils/text/base_text_filter.py
@@ -12,7 +12,8 @@ and interruption handling.
"""
from abc import ABC, abstractmethod
-from typing import Any, Mapping
+from collections.abc import Mapping
+from typing import Any
class BaseTextFilter(ABC):
diff --git a/src/pipecat/utils/text/markdown_text_filter.py b/src/pipecat/utils/text/markdown_text_filter.py
index 54f8a4770..f5a527ce1 100644
--- a/src/pipecat/utils/text/markdown_text_filter.py
+++ b/src/pipecat/utils/text/markdown_text_filter.py
@@ -11,7 +11,8 @@ while preserving structure and handling special cases like code blocks and table
"""
import re
-from typing import Any, Mapping, Optional
+from collections.abc import Mapping
+from typing import Any
from markdown import Markdown
from pydantic import BaseModel
@@ -37,11 +38,11 @@ class MarkdownTextFilter(BaseTextFilter):
filter_tables: Whether to remove table content from the text. Defaults to False.
"""
- enable_text_filter: Optional[bool] = True
- filter_code: Optional[bool] = False
- filter_tables: Optional[bool] = False
+ enable_text_filter: bool | None = True
+ filter_code: bool | None = False
+ filter_tables: bool | None = False
- def __init__(self, params: Optional[InputParams] = None, **kwargs):
+ def __init__(self, params: InputParams | None = None, **kwargs):
"""Initialize the Markdown text filter.
Args:
diff --git a/src/pipecat/utils/text/pattern_pair_aggregator.py b/src/pipecat/utils/text/pattern_pair_aggregator.py
index c69622e8b..975413c78 100644
--- a/src/pipecat/utils/text/pattern_pair_aggregator.py
+++ b/src/pipecat/utils/text/pattern_pair_aggregator.py
@@ -12,8 +12,8 @@ support for custom handlers and configurable actions for when a pattern is found
"""
import re
+from collections.abc import AsyncIterator, Awaitable, Callable
from enum import Enum
-from typing import AsyncIterator, Awaitable, Callable, List, Optional, Tuple
from loguru import logger
@@ -182,7 +182,7 @@ class PatternPairAggregator(SimpleTextAggregator):
async def _process_complete_patterns(
self, text: str, last_processed_position: int = 0
- ) -> Tuple[List[PatternMatch], str]:
+ ) -> tuple[list[PatternMatch], str]:
"""Process newly complete pattern pairs in the text.
Searches for pattern pairs that have been completed since last_processed_position,
@@ -246,7 +246,7 @@ class PatternPairAggregator(SimpleTextAggregator):
return all_matches, processed_text
- def _match_start_of_pattern(self, text: str) -> Optional[Tuple[int, dict]]:
+ def _match_start_of_pattern(self, text: str) -> tuple[int, dict] | None:
"""Check if text contains incomplete pattern pairs.
Determines whether the text contains any start patterns without
diff --git a/src/pipecat/utils/text/simple_text_aggregator.py b/src/pipecat/utils/text/simple_text_aggregator.py
index b5b179fcf..f1e5d4249 100644
--- a/src/pipecat/utils/text/simple_text_aggregator.py
+++ b/src/pipecat/utils/text/simple_text_aggregator.py
@@ -11,7 +11,7 @@ until it finds an end-of-sentence marker, making it suitable for basic TTS
text processing scenarios.
"""
-from typing import AsyncIterator, Optional
+from collections.abc import AsyncIterator
from pipecat.utils.string import SENTENCE_ENDING_PUNCTUATION, match_endofsentence
from pipecat.utils.text.base_text_aggregator import Aggregation, AggregationType, BaseTextAggregator
@@ -75,7 +75,7 @@ class SimpleTextAggregator(BaseTextAggregator):
if result:
yield result
- async def _check_sentence_with_lookahead(self, char: str) -> Optional[Aggregation]:
+ async def _check_sentence_with_lookahead(self, char: str) -> Aggregation | None:
"""Check for sentence boundaries using lookahead logic.
This method implements the core sentence detection logic with lookahead.
@@ -120,7 +120,7 @@ class SimpleTextAggregator(BaseTextAggregator):
return None
- async def flush(self) -> Optional[Aggregation]:
+ async def flush(self) -> Aggregation | None:
"""Flush any remaining text in the buffer.
Returns any text remaining in the buffer. This is called at the end
diff --git a/src/pipecat/utils/text/skip_tags_aggregator.py b/src/pipecat/utils/text/skip_tags_aggregator.py
index 1b6a7f156..d21c8e050 100644
--- a/src/pipecat/utils/text/skip_tags_aggregator.py
+++ b/src/pipecat/utils/text/skip_tags_aggregator.py
@@ -11,7 +11,7 @@ between specified start/end tag pairs, ensuring that tagged content is processed
as a unit regardless of internal punctuation.
"""
-from typing import AsyncIterator, Optional, Sequence
+from collections.abc import AsyncIterator, Sequence
from pipecat.utils.string import StartEndTags, parse_start_end_tags
from pipecat.utils.text.base_text_aggregator import Aggregation, AggregationType
@@ -41,7 +41,7 @@ class SkipTagsAggregator(SimpleTextAggregator):
"""
super().__init__(**kwargs)
self._tags = tags
- self._current_tag: Optional[StartEndTags] = None
+ self._current_tag: StartEndTags | None = None
self._current_tag_index: int = 0
async def aggregate(self, text: str) -> AsyncIterator[Aggregation]:
diff --git a/src/pipecat/utils/time.py b/src/pipecat/utils/time.py
index d650f7296..98561e7d9 100644
--- a/src/pipecat/utils/time.py
+++ b/src/pipecat/utils/time.py
@@ -20,7 +20,7 @@ def time_now_iso8601() -> str:
Returns:
The current UTC time in ISO8601 format with millisecond precision.
"""
- return datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="milliseconds")
+ return datetime.datetime.now(datetime.UTC).isoformat(timespec="milliseconds")
def seconds_to_nanoseconds(seconds: float) -> int:
diff --git a/src/pipecat/utils/tracing/service_attributes.py b/src/pipecat/utils/tracing/service_attributes.py
index 5be781406..d9b86a9a4 100644
--- a/src/pipecat/utils/tracing/service_attributes.py
+++ b/src/pipecat/utils/tracing/service_attributes.py
@@ -11,7 +11,7 @@ attributes to OpenTelemetry spans, following standard semantic conventions
where applicable and Pipecat-specific conventions for additional context.
"""
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Optional
# Import for type checking only
if TYPE_CHECKING:
@@ -68,11 +68,11 @@ def add_tts_span_attributes(
service_name: str,
model: str,
voice_id: str,
- text: Optional[str] = None,
+ text: str | None = None,
settings: Optional["ServiceSettings"] = None,
- character_count: Optional[int] = None,
+ character_count: int | None = None,
operation_name: str = "tts",
- ttfb: Optional[float] = None,
+ ttfb: float | None = None,
**kwargs,
) -> None:
"""Add TTS-specific attributes to a span.
@@ -123,13 +123,13 @@ def add_stt_span_attributes(
service_name: str,
model: str,
operation_name: str = "stt",
- transcript: Optional[str] = None,
- is_final: Optional[bool] = None,
- language: Optional[str] = None,
- user_id: Optional[str] = None,
+ transcript: str | None = None,
+ is_final: bool | None = None,
+ language: str | None = None,
+ user_id: str | None = None,
settings: Optional["ServiceSettings"] = None,
vad_enabled: bool = False,
- ttfb: Optional[float] = None,
+ ttfb: float | None = None,
**kwargs,
) -> None:
"""Add STT-specific attributes to a span.
@@ -187,15 +187,15 @@ def add_llm_span_attributes(
service_name: str,
model: str,
stream: bool = True,
- messages: Optional[str] = None,
- output: Optional[str] = None,
- tools: Optional[str] = None,
- tool_count: Optional[int] = None,
- tool_choice: Optional[str] = None,
- system_instructions: Optional[str] = None,
- parameters: Optional[Dict[str, Any]] = None,
- extra_parameters: Optional[Dict[str, Any]] = None,
- ttfb: Optional[float] = None,
+ messages: str | None = None,
+ output: str | None = None,
+ tools: str | None = None,
+ tool_count: int | None = None,
+ tool_choice: str | None = None,
+ system_instructions: str | None = None,
+ parameters: dict[str, Any] | None = None,
+ extra_parameters: dict[str, Any] | None = None,
+ ttfb: float | None = None,
**kwargs,
) -> None:
"""Add LLM-specific attributes to a span.
@@ -280,16 +280,16 @@ def add_gemini_live_span_attributes(
service_name: str,
model: str,
operation_name: str,
- voice_id: Optional[str] = None,
- language: Optional[str] = None,
- modalities: Optional[str] = None,
+ voice_id: str | None = None,
+ language: str | None = None,
+ modalities: str | None = None,
settings: Optional["ServiceSettings"] = None,
- tools: Optional[List[Dict]] = None,
- tools_serialized: Optional[str] = None,
- transcript: Optional[str] = None,
- is_input: Optional[bool] = None,
- text_output: Optional[str] = None,
- audio_data_size: Optional[int] = None,
+ tools: list[dict] | None = None,
+ tools_serialized: str | None = None,
+ transcript: str | None = None,
+ is_input: bool | None = None,
+ text_output: str | None = None,
+ audio_data_size: int | None = None,
**kwargs,
) -> None:
"""Add Gemini Live specific attributes to a span.
@@ -385,14 +385,14 @@ def add_openai_realtime_span_attributes(
service_name: str,
model: str,
operation_name: str,
- session_properties: Optional[Dict[str, Any]] = None,
- transcript: Optional[str] = None,
- is_input: Optional[bool] = None,
- context_messages: Optional[str] = None,
- function_calls: Optional[List[Dict]] = None,
- tools: Optional[List[Dict]] = None,
- tools_serialized: Optional[str] = None,
- audio_data_size: Optional[int] = None,
+ session_properties: dict[str, Any] | None = None,
+ transcript: str | None = None,
+ is_input: bool | None = None,
+ context_messages: str | None = None,
+ function_calls: list[dict] | None = None,
+ tools: list[dict] | None = None,
+ tools_serialized: str | None = None,
+ audio_data_size: int | None = None,
**kwargs,
) -> None:
"""Add OpenAI Realtime specific attributes to a span.
diff --git a/src/pipecat/utils/tracing/service_decorators.py b/src/pipecat/utils/tracing/service_decorators.py
index 1922dd8f8..f4de1dd85 100644
--- a/src/pipecat/utils/tracing/service_decorators.py
+++ b/src/pipecat/utils/tracing/service_decorators.py
@@ -16,7 +16,8 @@ import functools
import inspect
import json
import logging
-from typing import TYPE_CHECKING, Callable, Optional, TypeVar
+from collections.abc import Callable
+from typing import TYPE_CHECKING, TypeVar
# Type imports for type checking only
if TYPE_CHECKING:
@@ -164,7 +165,7 @@ def _add_token_usage_to_span(span, token_usage):
span.set_attribute("gen_ai.usage.reasoning_tokens", reasoning_tokens)
-def traced_tts(func: Optional[Callable] = None, *, name: Optional[str] = None) -> Callable:
+def traced_tts(func: Callable | None = None, *, name: str | None = None) -> Callable:
"""Trace TTS service methods with TTS-specific attributes.
Automatically captures and records:
@@ -236,7 +237,7 @@ def traced_tts(func: Optional[Callable] = None, *, name: Optional[str] = None) -
raise
finally:
# Update TTFB metric at the end
- ttfb: Optional[float] = getattr(getattr(self, "_metrics", None), "ttfb", None)
+ ttfb: float | None = getattr(getattr(self, "_metrics", None), "ttfb", None)
if ttfb is not None:
span.set_attribute("metrics.ttfb", ttfb)
@@ -288,7 +289,7 @@ def traced_tts(func: Optional[Callable] = None, *, name: Optional[str] = None) -
return decorator
-def traced_stt(func: Optional[Callable] = None, *, name: Optional[str] = None) -> Callable:
+def traced_stt(func: Callable | None = None, *, name: str | None = None) -> Callable:
"""Trace STT service methods with transcription attributes.
Automatically captures and records:
@@ -329,9 +330,7 @@ def traced_stt(func: Optional[Callable] = None, *, name: Optional[str] = None) -
) as current_span:
try:
# Get TTFB metric if available
- ttfb: Optional[float] = getattr(
- getattr(self, "_metrics", None), "ttfb", None
- )
+ ttfb: float | None = getattr(getattr(self, "_metrics", None), "ttfb", None)
# Use settings from the service if available
settings = getattr(self, "_settings", None)
@@ -369,7 +368,7 @@ def traced_stt(func: Optional[Callable] = None, *, name: Optional[str] = None) -
return decorator
-def traced_llm(func: Optional[Callable] = None, *, name: Optional[str] = None) -> Callable:
+def traced_llm(func: Callable | None = None, *, name: str | None = None) -> Callable:
"""Trace LLM service methods with LLM-specific attributes.
Automatically captures and records:
@@ -567,9 +566,7 @@ def traced_llm(func: Optional[Callable] = None, *, name: Optional[str] = None) -
self.start_llm_usage_metrics = original_start_llm_usage_metrics
# Update TTFB metric
- ttfb: Optional[float] = getattr(
- getattr(self, "_metrics", None), "ttfb", None
- )
+ ttfb: float | None = getattr(getattr(self, "_metrics", None), "ttfb", None)
if ttfb is not None:
current_span.set_attribute("metrics.ttfb", ttfb)
except Exception as e:
diff --git a/src/pipecat/utils/tracing/tracing_context.py b/src/pipecat/utils/tracing/tracing_context.py
index c84299701..63f0b84ae 100644
--- a/src/pipecat/utils/tracing/tracing_context.py
+++ b/src/pipecat/utils/tracing/tracing_context.py
@@ -11,6 +11,8 @@ conversation and turn span contexts. Each PipelineTask creates its own
TracingContext, ensuring concurrent pipelines do not interfere with each other.
"""
+from __future__ import annotations
+
import uuid
from typing import TYPE_CHECKING, Optional
@@ -35,12 +37,12 @@ class TracingContext:
def __init__(self):
"""Initialize the tracing context with empty state."""
- self._conversation_context: Optional["Context"] = None
- self._turn_context: Optional["Context"] = None
- self._conversation_id: Optional[str] = None
+ self._conversation_context: Context | None = None
+ self._turn_context: Context | None = None
+ self._conversation_id: str | None = None
def set_conversation_context(
- self, span_context: Optional["SpanContext"], conversation_id: Optional[str] = None
+ self, span_context: SpanContext | None, conversation_id: str | None = None
):
"""Set the current conversation context.
@@ -59,7 +61,7 @@ class TracingContext:
else:
self._conversation_context = None
- def get_conversation_context(self) -> Optional["Context"]:
+ def get_conversation_context(self) -> Context | None:
"""Get the OpenTelemetry context for the current conversation.
Returns:
@@ -67,7 +69,7 @@ class TracingContext:
"""
return self._conversation_context
- def set_turn_context(self, span_context: Optional["SpanContext"]):
+ def set_turn_context(self, span_context: SpanContext | None):
"""Set the current turn context.
Args:
@@ -82,7 +84,7 @@ class TracingContext:
else:
self._turn_context = None
- def get_turn_context(self) -> Optional["Context"]:
+ def get_turn_context(self) -> Context | None:
"""Get the OpenTelemetry context for the current turn.
Returns:
@@ -91,7 +93,7 @@ class TracingContext:
return self._turn_context
@property
- def conversation_id(self) -> Optional[str]:
+ def conversation_id(self) -> str | None:
"""Get the ID for the current conversation.
Returns:
diff --git a/src/pipecat/utils/tracing/turn_trace_observer.py b/src/pipecat/utils/tracing/turn_trace_observer.py
index 83c2bcdc2..b5f199e15 100644
--- a/src/pipecat/utils/tracing/turn_trace_observer.py
+++ b/src/pipecat/utils/tracing/turn_trace_observer.py
@@ -11,7 +11,9 @@ turn, integrating with the turn tracking system to provide hierarchical tracing
of conversation flows.
"""
-from typing import TYPE_CHECKING, Dict, Optional
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Optional
from loguru import logger
@@ -46,9 +48,9 @@ class TurnTraceObserver(BaseObserver):
self,
turn_tracker: TurnTrackingObserver,
latency_tracker: UserBotLatencyObserver,
- conversation_id: Optional[str] = None,
- additional_span_attributes: Optional[dict] = None,
- tracing_context: Optional[TracingContext] = None,
+ conversation_id: str | None = None,
+ additional_span_attributes: dict | None = None,
+ tracing_context: TracingContext | None = None,
**kwargs,
):
"""Initialize the turn trace observer.
@@ -65,13 +67,13 @@ class TurnTraceObserver(BaseObserver):
self._turn_tracker = turn_tracker
self._latency_tracker = latency_tracker
self._tracing_context = tracing_context or TracingContext()
- self._current_span: Optional["Span"] = None
+ self._current_span: Span | None = None
self._current_turn_number: int = 0
- self._trace_context_map: Dict[int, "SpanContext"] = {}
+ self._trace_context_map: dict[int, SpanContext] = {}
self._tracer = trace.get_tracer("pipecat.turn") if is_tracing_available() else None
# Conversation tracking properties
- self._conversation_span: Optional["Span"] = None
+ self._conversation_span: Span | None = None
self._conversation_id = conversation_id
self._additional_span_attributes = additional_span_attributes or {}
@@ -115,7 +117,7 @@ class TurnTraceObserver(BaseObserver):
if isinstance(data.frame, StartFrame) and not self._conversation_span:
self.start_conversation_tracing(self._conversation_id)
- def start_conversation_tracing(self, conversation_id: Optional[str] = None):
+ def start_conversation_tracing(self, conversation_id: str | None = None):
"""Start a new conversation span.
Args:
@@ -230,7 +232,7 @@ class TurnTraceObserver(BaseObserver):
logger.debug(f"Ended tracing for Turn {turn_number}")
- def get_current_turn_context(self) -> Optional["SpanContext"]:
+ def get_current_turn_context(self) -> SpanContext | None:
"""Get the span context for the current turn.
This can be used by services to create child spans.
@@ -243,7 +245,7 @@ class TurnTraceObserver(BaseObserver):
return self._current_span.get_span_context()
- def get_turn_context(self, turn_number: int) -> Optional["SpanContext"]:
+ def get_turn_context(self, turn_number: int) -> SpanContext | None:
"""Get the span context for a specific turn.
This can be used by services to create child spans.
diff --git a/tests/test_direct_functions.py b/tests/test_direct_functions.py
index 5c926cf02..d94172e92 100644
--- a/tests/test_direct_functions.py
+++ b/tests/test_direct_functions.py
@@ -52,7 +52,7 @@ class TestDirectFunction(unittest.TestCase):
self.assertEqual(func.properties, {})
async def my_function_simple_params(
- params: FunctionCallParams, name: str, age: int, height: Union[float, None]
+ params: FunctionCallParams, name: str, age: int, height: float | None
):
return {"status": "success"}, None
@@ -70,7 +70,7 @@ class TestDirectFunction(unittest.TestCase):
params: FunctionCallParams,
address_lines: list[str],
nickname: str | int | float,
- extra: Optional[dict[str, str]],
+ extra: dict[str, str] | None,
):
return {"status": "success"}, None
@@ -134,7 +134,7 @@ class TestDirectFunction(unittest.TestCase):
self.assertEqual(func.required, [])
async def my_function_simple_params(
- params: FunctionCallParams, name: str, age: int, height: Union[float, None] = None
+ params: FunctionCallParams, name: str, age: int, height: float | None = None
):
return {"status": "success"}, None
@@ -143,9 +143,9 @@ class TestDirectFunction(unittest.TestCase):
async def my_function_complex_params(
params: FunctionCallParams,
- address_lines: Optional[list[str]],
+ address_lines: list[str] | None,
nickname: str | int = "Bud",
- extra: Optional[dict[str, str]] = None,
+ extra: dict[str, str] | None = None,
):
return {"status": "success"}, None
@@ -154,7 +154,7 @@ class TestDirectFunction(unittest.TestCase):
def test_property_descriptions_are_set_from_function(self):
async def my_function(
- params: FunctionCallParams, name: str, age: int, height: Union[float, None]
+ params: FunctionCallParams, name: str, age: int, height: float | None
):
"""
This is a test function.
diff --git a/tests/test_frame_processor.py b/tests/test_frame_processor.py
index a875741e3..4c39d4d89 100644
--- a/tests/test_frame_processor.py
+++ b/tests/test_frame_processor.py
@@ -7,7 +7,6 @@
import asyncio
import unittest
from dataclasses import dataclass, field
-from typing import List
from pipecat.frames.frames import (
DataFrame,
@@ -35,7 +34,7 @@ class BroadcastTestFrame(DataFrame):
text: str = ""
value: int = 0
- items: List[str] = field(default_factory=list)
+ items: list[str] = field(default_factory=list)
class TestFrameProcessor(unittest.IsolatedAsyncioTestCase):
@@ -191,8 +190,8 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase):
async def test_broadcast_frame(self):
"""Test that broadcast_frame creates two separate frames with fresh IDs."""
- downstream_frames: List[Frame] = []
- upstream_frames: List[Frame] = []
+ downstream_frames: list[Frame] = []
+ upstream_frames: list[Frame] = []
class BroadcastTestProcessor(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
@@ -205,7 +204,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase):
await self.push_frame(frame, direction)
class CaptureProcessor(FrameProcessor):
- def __init__(self, capture_list: List[Frame], direction: FrameDirection):
+ def __init__(self, capture_list: list[Frame], direction: FrameDirection):
super().__init__()
self._capture_list = capture_list
self._capture_direction = direction
@@ -256,9 +255,9 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase):
async def test_broadcast_frame_instance(self):
"""Test that broadcast_frame_instance shallow-copies all fields except id and name."""
- downstream_frames: List[Frame] = []
- upstream_frames: List[Frame] = []
- original_frame: List[Frame] = []
+ downstream_frames: list[Frame] = []
+ upstream_frames: list[Frame] = []
+ original_frame: list[Frame] = []
class BroadcastInstanceTestProcessor(FrameProcessor):
async def process_frame(self, frame: Frame, direction: FrameDirection):
@@ -273,7 +272,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase):
await self.push_frame(frame, direction)
class CaptureProcessor(FrameProcessor):
- def __init__(self, capture_list: List[Frame], direction: FrameDirection):
+ def __init__(self, capture_list: list[Frame], direction: FrameDirection):
super().__init__()
self._capture_list = capture_list
self._capture_direction = direction
@@ -346,7 +345,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase):
This test simulates issue #3524 where an InterruptionFrame during slow
processing would cause terminal frames to be lost, freezing the pipeline.
"""
- received_frames: List[Frame] = []
+ received_frames: list[Frame] = []
class DelayAndInterruptProcessor(FrameProcessor):
"""This processor delays processing and then generates an interruption.
@@ -398,7 +397,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase):
Similar to test_terminal_frames_survive_interruption but specifically
for StopFrame.
"""
- received_frames: List[Frame] = []
+ received_frames: list[Frame] = []
class DelayAndInterruptProcessor(FrameProcessor):
"""This processor delays processing and then generates an interruption."""
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index de62d9739..8851071c4 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -289,7 +289,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase):
task.run(PipelineTaskParams(loop=asyncio.get_event_loop())),
timeout=1.0,
)
- except asyncio.TimeoutError:
+ except TimeoutError:
pass
assert upstream_received
@@ -317,7 +317,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase):
task.run(PipelineTaskParams(loop=asyncio.get_event_loop())),
timeout=1.0,
)
- except asyncio.TimeoutError:
+ except TimeoutError:
pass
assert upstream_received
@@ -346,7 +346,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase):
task.run(PipelineTaskParams(loop=asyncio.get_event_loop())),
timeout=1.0,
)
- except asyncio.TimeoutError:
+ except TimeoutError:
pass
assert "First" in upstream_texts
@@ -382,7 +382,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase):
task.run(PipelineTaskParams(loop=asyncio.get_event_loop())),
timeout=1.0,
)
- except asyncio.TimeoutError:
+ except TimeoutError:
pass
assert heartbeats_counter == expected_heartbeats
@@ -417,7 +417,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase):
task.run(PipelineTaskParams(loop=asyncio.get_event_loop())),
timeout=0.6,
)
- except asyncio.TimeoutError:
+ except TimeoutError:
pass
log_text = log_output.getvalue()
@@ -441,7 +441,7 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase):
task.run(PipelineTaskParams(loop=asyncio.get_event_loop())),
timeout=0.3,
)
- except asyncio.TimeoutError:
+ except TimeoutError:
assert True
else:
assert False
diff --git a/tests/test_service_language.py b/tests/test_service_language.py
index b4a3daaa9..7fee19aaf 100644
--- a/tests/test_service_language.py
+++ b/tests/test_service_language.py
@@ -10,7 +10,8 @@ Verifies that Language enums, raw strings (e.g. "de-DE"), and unrecognized
strings are all resolved correctly at both init time and runtime update time.
"""
-from typing import AsyncGenerator, Optional
+from collections.abc import AsyncGenerator
+from typing import Optional
from unittest.mock import patch
import pytest
@@ -45,7 +46,7 @@ class _TestTTSService(TTSService):
async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]:
yield # pragma: no cover
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
return resolve_language(language, _LANGUAGE_MAP, use_base_code=True)
@@ -64,7 +65,7 @@ class _TestSTTService(STTService):
async def process_audio_frame(self, frame, direction):
pass # pragma: no cover
- def language_to_service_language(self, language: Language) -> Optional[str]:
+ def language_to_service_language(self, language: Language) -> str | None:
return resolve_language(language, _LANGUAGE_MAP, use_base_code=True)
diff --git a/tests/test_tts_frame_ordering.py b/tests/test_tts_frame_ordering.py
index 2d7755808..bdcc588be 100644
--- a/tests/test_tts_frame_ordering.py
+++ b/tests/test_tts_frame_ordering.py
@@ -24,8 +24,8 @@ verifies TTSTextFrame ordering relative to LLMFullResponseEndFrame.
import asyncio
import unittest
+from collections.abc import AsyncGenerator, Sequence
from dataclasses import dataclass
-from typing import AsyncGenerator, List, Sequence, Tuple
import pytest
@@ -215,7 +215,7 @@ class MockWebSocketPauseTTSService(TTSService):
def _assert_group_ordering(
down_frames: Sequence[Frame],
- expected_groups: List[Tuple[str, str]],
+ expected_groups: list[tuple[str, str]],
) -> None:
"""Assert two (or more) TTS+FooFrame groups are in strict order.
@@ -240,7 +240,7 @@ def _assert_group_ordering(
)
# Build groups: everything up to and including each FooFrame.
- groups: List[List[Frame]] = []
+ groups: list[list[Frame]] = []
prev = 0
for idx in foo_indices:
groups.append(relevant[prev : idx + 1])
@@ -298,7 +298,7 @@ def _assert_group_ordering(
_GROUPS = [("test 1", "1"), ("test 2", "2")]
-def _make_frames_no_sleep() -> List[Frame]:
+def _make_frames_no_sleep() -> list[Frame]:
"""Return two TTSSpeakFrame+FooFrame pairs sent back-to-back.
Only correct for services that pause downstream processing until the audio
diff --git a/tests/test_vad_controller.py b/tests/test_vad_controller.py
index 30f2fe56c..9ed7e9a1b 100644
--- a/tests/test_vad_controller.py
+++ b/tests/test_vad_controller.py
@@ -6,7 +6,6 @@
import asyncio
import unittest
-from typing import List
from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADParams, VADState
from pipecat.audio.vad.vad_controller import VADController
@@ -125,7 +124,7 @@ class TestVADController(unittest.IsolatedAsyncioTestCase):
analyzer = MockVADAnalyzer()
controller = VADController(analyzer)
- pushed_frames: List[tuple] = []
+ pushed_frames: list[tuple] = []
@controller.event_handler("on_push_frame")
async def on_push_frame(_controller, frame: Frame, direction: FrameDirection):
@@ -143,7 +142,7 @@ class TestVADController(unittest.IsolatedAsyncioTestCase):
analyzer = MockVADAnalyzer()
controller = VADController(analyzer)
- broadcast_calls: List[tuple] = []
+ broadcast_calls: list[tuple] = []
@controller.event_handler("on_broadcast_frame")
async def on_broadcast_frame(_controller, frame_cls, **kwargs):
@@ -192,7 +191,7 @@ class TestVADController(unittest.IsolatedAsyncioTestCase):
analyzer = MockVADAnalyzer()
controller = VADController(analyzer)
- broadcast_calls: List[tuple] = []
+ broadcast_calls: list[tuple] = []
@controller.event_handler("on_broadcast_frame")
async def on_broadcast_frame(_controller, frame_cls, **kwargs):
diff --git a/tests/test_vad_processor.py b/tests/test_vad_processor.py
index afb6e1482..d6c40e5b4 100644
--- a/tests/test_vad_processor.py
+++ b/tests/test_vad_processor.py
@@ -5,7 +5,6 @@
#
import unittest
-from typing import List
from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADState
from pipecat.frames.frames import (
@@ -22,7 +21,7 @@ from pipecat.tests.utils import run_test
class MockVADAnalyzer(VADAnalyzer):
"""A mock VAD analyzer that returns states from a predefined sequence."""
- def __init__(self, states: List[VADState]):
+ def __init__(self, states: list[VADState]):
super().__init__(sample_rate=16000)
self._states = list(states)
self._call_index = 0