diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json new file mode 100644 index 000000000..c628e79e5 --- /dev/null +++ b/.claude-plugin/marketplace.json @@ -0,0 +1,27 @@ +{ + "name": "pipecat-dev-skills", + "owner": { + "name": "Pipecat" + }, + "metadata": { + "description": "Development workflow skills for contributing to the Pipecat project", + "version": "1.0.0" + }, + "plugins": [ + { + "name": "pipecat-dev", + "description": "Development workflow skills for contributing to the Pipecat project", + "version": "1.0.0", + "source": "./", + "skills": [ + "./.claude/skills/changelog", + "./.claude/skills/cleanup", + "./.claude/skills/code-review", + "./.claude/skills/docstring", + "./.claude/skills/pr-description", + "./.claude/skills/pr-submit", + "./.claude/skills/update-docs" + ] + } + ] +} diff --git a/.claude/skills/cleanup/SKILL.md b/.claude/skills/cleanup/SKILL.md index f7dd6ea98..91a61db39 100644 --- a/.claude/skills/cleanup/SKILL.md +++ b/.claude/skills/cleanup/SKILL.md @@ -1,6 +1,6 @@ # Code Cleanup Skill -The **Code Cleanup Skill** reviews, refactors, and documents code changes in your current branch, ensuring alignment with **Pipecat’s architecture, coding standards, and example patterns**. +The **Code Cleanup Skill** reviews, refactors, and documents code changes in your current branch, ensuring alignment with **Pipecat's architecture, coding standards, and example patterns**. It focuses on **readability, correctness, performance, and consistency**, while avoiding breaking changes. --- @@ -28,9 +28,9 @@ This skill analyzes all changes introduced in your branch and performs the follo Invoke the skill using any of the following commands: -- “Clean up my branch code” -- “Refactor the changes in my branch” -- “Review and improve my branch code” +- "Clean up my branch code" +- "Refactor the changes in my branch" +- "Review and improve my branch code" - `/cleanup` --- diff --git a/.claude/skills/docstring/SKILL.md b/.claude/skills/docstring/SKILL.md index 1c1e3c905..129d83763 100644 --- a/.claude/skills/docstring/SKILL.md +++ b/.claude/skills/docstring/SKILL.md @@ -3,21 +3,20 @@ name: docstring description: Document a Python module and its classes using Google style --- -Document a Python module and its classes using Google-style docstrings following project conventions. The class name is provided as an argument. +Document a Python module or class using Google-style docstrings following project conventions. The argument can be a class name or a module path. ## Instructions -1. First, find the class in the codebase: - ``` - Search for "class ClassName" in src/pipecat/ - ``` +1. Determine what to document based on the argument: -2. If multiple files contain that class name: - - List all matches with their file paths - - Ask the user which one they want to document - - Wait for confirmation before proceeding + **If a module path is provided** (e.g. `src/pipecat/audio/vad/vad_analyzer.py`): + - Use that file directly -3. Once the file is identified, read the module to understand its structure: + **If a class name is provided** (e.g. `VADAnalyzer`): + - Search for `class ClassName` in `src/pipecat/` + - If multiple files contain that class name, list all matches with their file paths, ask the user which one they want to document, and wait for confirmation + +2. Once the file is identified, read the module to understand its structure: - Identify all classes, functions, and important type aliases - Understand the purpose of each component diff --git a/.github/workflows/coverage.yaml b/.github/workflows/coverage.yaml index df9c388bf..26d03861b 100644 --- a/.github/workflows/coverage.yaml +++ b/.github/workflows/coverage.yaml @@ -37,11 +37,12 @@ jobs: uv sync --group dev \ --extra anthropic \ --extra aws \ + --extra deepgram \ --extra google \ --extra langchain \ --extra livekit \ - --extra local-smart-turn-v3 \ --extra piper \ + --extra sagemaker \ --extra tracing \ --extra websocket diff --git a/.github/workflows/generate-changelog.yml b/.github/workflows/generate-changelog.yml index 005eb94f1..496b3381c 100644 --- a/.github/workflows/generate-changelog.yml +++ b/.github/workflows/generate-changelog.yml @@ -86,7 +86,7 @@ jobs: fi # Validate fragment types - VALID_TYPES="added changed deprecated removed fixed security other" + VALID_TYPES="added changed deprecated removed fixed performance security other" INVALID_FRAGMENTS="" for file in changelog/*.md; do diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 5941448f3..b22d502c4 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -41,11 +41,12 @@ jobs: uv sync --group dev \ --extra anthropic \ --extra aws \ + --extra deepgram \ --extra google \ --extra langchain \ --extra livekit \ - --extra local-smart-turn-v3 \ --extra piper \ + --extra sagemaker \ --extra tracing \ --extra websocket diff --git a/.github/workflows/update-docs.yml b/.github/workflows/update-docs.yml new file mode 100644 index 000000000..d26862766 --- /dev/null +++ b/.github/workflows/update-docs.yml @@ -0,0 +1,147 @@ +name: Update Documentation on PR Merge + +on: + pull_request_target: + types: [closed] + branches: [main] + paths: + - "src/pipecat/services/**" + - "src/pipecat/transports/**" + - "src/pipecat/serializers/**" + - "src/pipecat/processors/**" + - "src/pipecat/audio/**" + - "src/pipecat/turns/**" + - "src/pipecat/observers/**" + - "src/pipecat/pipeline/**" + workflow_dispatch: + inputs: + pr_number: + description: "PR number to generate docs for" + required: true + type: string + +jobs: + update-docs: + if: >- + github.event_name == 'workflow_dispatch' || + github.event.pull_request.merged == true + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + contents: read + pull-requests: read + id-token: write + steps: + - name: Checkout pipecat + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Checkout docs + uses: actions/checkout@v4 + with: + repository: pipecat-ai/docs + token: ${{ secrets.DOCS_SYNC_TOKEN }} + path: _docs + + - name: Resolve PR number + id: pr + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + echo "number=${{ inputs.pr_number }}" >> "$GITHUB_OUTPUT" + else + echo "number=${{ github.event.pull_request.number }}" >> "$GITHUB_OUTPUT" + fi + + - name: Update documentation + uses: anthropics/claude-code-action@v1 + env: + DOCS_SYNC_TOKEN: ${{ secrets.DOCS_SYNC_TOKEN }} + with: + anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} + github_token: ${{ secrets.GITHUB_TOKEN }} + prompt: | + You are updating documentation for the pipecat-ai/docs repository based on + changes merged in PR #${{ steps.pr.outputs.number }} of pipecat-ai/pipecat. + + ## Setup + + 1. Read the skill instructions at `.claude/skills/update-docs/SKILL.md` + 2. Read the source-to-doc mapping at `.claude/skills/update-docs/SOURCE_DOC_MAPPING.md` + 3. The docs repository is checked out at `./_docs/` + + ## Get the diff + + Run `gh pr diff ${{ steps.pr.outputs.number }}` to see what changed in the PR. + Also run `gh pr diff ${{ steps.pr.outputs.number }} --name-only` to get the list of changed files. + Filter to source files matching the directories listed in SKILL.md Step 3. + + If no relevant source files were changed, exit with "No documentation changes needed." + + ## Follow the skill instructions + + Apply the SKILL.md workflow (Steps 3-9) with these adaptations for automation: + + ### Docs path + Use `./_docs/` — it's already checked out. Do not ask for a path. + + ### Branch management + - Branch name: `docs/pr-${{ steps.pr.outputs.number }}` + - Work inside `./_docs/` for all doc edits and git operations + - Check if the branch already exists on the remote: + ```bash + cd _docs && git fetch origin docs/pr-${{ steps.pr.outputs.number }} 2>/dev/null + ``` + - If it exists: check it out (supports workflow re-runs) + - If not: create it from main + + ### Git config + Before committing in `_docs`, set: + ```bash + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + ``` + + ### No interactive questions + Do not ask questions. If you encounter gaps (unmapped files, missing sections, + ambiguous changes), note them in the PR body under "## Gaps identified". + + ### Creating the docs PR + After committing all changes in `_docs`, push and create a PR: + ```bash + cd _docs + git push -u origin docs/pr-${{ steps.pr.outputs.number }} + GH_TOKEN=$DOCS_SYNC_TOKEN gh pr create \ + --repo pipecat-ai/docs \ + --label auto-docs \ + --title "docs: update for pipecat PR #${{ steps.pr.outputs.number }}" \ + --body "$(cat <<'BODY' + Automated documentation update for [pipecat PR #${{ steps.pr.outputs.number }}](https://github.com/pipecat-ai/pipecat/pull/${{ steps.pr.outputs.number }}). + + ## Changes + + + ## Gaps identified + + BODY + )" + ``` + + ### Re-run handling + If `gh pr create` fails because a PR from that branch already exists, + push the updated commits and use `gh pr edit` to update the body instead. + + ### No-op + If after analyzing the diff you determine no documentation changes are needed + (e.g., only skip-listed files changed, or changes don't affect public API docs), + exit cleanly without creating a branch or PR. Output "No documentation changes needed." + + ## Important rules + - Only modify files inside `./_docs/` — never modify pipecat source code + - Follow the conservative editing rules from SKILL.md Step 6 + - Read each doc page fully before editing (SKILL.md Guidelines) + - Use `GH_TOKEN=$DOCS_SYNC_TOKEN` for all `gh` commands targeting pipecat-ai/docs + claude_args: | + --model claude-sonnet-4-5-20250929 + --max-turns 30 + --allowedTools "Read,Write,Edit,Glob,Grep,Bash" diff --git a/CHANGELOG.md b/CHANGELOG.md index ab41e8163..c917ec992 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,215 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 +## [0.0.103] - 2026-02-20 + +### Added + +- Added `"timestampTransportStrategy": "ASYNC"` to `InworldAITTSService`. This + allows timestamps info to trail audio chunks arrival, resulting in much + better first audio chunk latency + (PR [#3625](https://github.com/pipecat-ai/pipecat/pull/3625)) + +- Added model-specific `InputParams` to `RimeTTSService`: arcana params + (`repetition_penalty`, `temperature`, `top_p`) and mistv2 params + (`no_text_normalization`, `save_oovs`, `segment`). Model, voice, and param + changes now trigger WebSocket reconnection. + (PR [#3642](https://github.com/pipecat-ai/pipecat/pull/3642)) + +- Added `write_transport_frame()` hook to `BaseOutputTransport` allowing + transport subclasses to handle custom frame types that flow through the audio + queue. + (PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719)) + +- Added `DailySIPTransferFrame` and `DailySIPReferFrame` to the Daily + transport. These frames queue SIP transfer and SIP REFER operations with + audio, so the operation executes only after the bot finishes its current + utterance. + (PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719)) + +- Added keepalive support to `SarvamSTTService` to prevent idle connection + timeouts (e.g. when used behind a `ServiceSwitcher`). + (PR [#3730](https://github.com/pipecat-ai/pipecat/pull/3730)) + +- Added `UserIdleTimeoutUpdateFrame` to enable or disable user idle detection + at runtime by updating the timeout dynamically. + (PR [#3748](https://github.com/pipecat-ai/pipecat/pull/3748)) + +- Added `broadcast_sibling_id` field to the base `Frame` class. This field is + automatically set by `broadcast_frame()` and `broadcast_frame_instance()` to + the ID of the paired frame pushed in the opposite direction, allowing + receivers to identify broadcast pairs. + (PR [#3774](https://github.com/pipecat-ai/pipecat/pull/3774)) + +- Added `ignored_sources` parameter to `RTVIObserverParams` and + `add_ignored_source()`/`remove_ignored_source()` methods to `RTVIObserver` to + suppress RTVI messages from specific pipeline processors (e.g. a silent + evaluation LLM). + (PR [#3779](https://github.com/pipecat-ai/pipecat/pull/3779)) + +- Added `DeepgramSageMakerTTSService` for running Deepgram TTS models deployed + on AWS SageMaker endpoints via HTTP/2 bidirectional streaming. Supports the + Deepgram TTS protocol (Speak, Flush, Clear, Close), interruption handling, + and per-turn TTFB metrics. + (PR [#3785](https://github.com/pipecat-ai/pipecat/pull/3785)) + +### Changed + +- ⚠️ `RimeTTSService` now defaults to `model="arcana"` and the + `wss://users-ws.rime.ai/ws3` endpoint. `InputParams` defaults changed from + mistv2-specific values to `None` — only explicitly-set params are sent as + query params. + (PR [#3642](https://github.com/pipecat-ai/pipecat/pull/3642)) + +- `AICFilter` now shares read-only AIC models via a singleton `AICModelManager` + in `aic_filter.py`. + - Multiple filters using the same model path or `(model_id, + model_download_dir)` share one loaded model, with reference counting and + concurrent load deduplication. + - Model file I/O runs off the event loop so the filter does not block. + (PR [#3684](https://github.com/pipecat-ai/pipecat/pull/3684)) + +- Added `X-User-Agent` and `X-Request-Id` headers to `InworldTTSService` for + better traceability. + (PR [#3706](https://github.com/pipecat-ai/pipecat/pull/3706)) + +- `DailyUpdateRemoteParticipantsFrame` is no longer deprecated and is now + queued with audio like other transport frames. + (PR [#3719](https://github.com/pipecat-ai/pipecat/pull/3719)) + +- Bumped Pillow dependency upper bound from `<12` to `<13` to allow Pillow + 12.x. + (PR [#3728](https://github.com/pipecat-ai/pipecat/pull/3728)) + +- Moved STT keepalive mechanism from `WebsocketSTTService` to the `STTService` + base class, allowing any STT service (not just websocket-based ones) to use + idle-connection keepalive via the `keepalive_timeout` and + `keepalive_interval` parameters. + (PR [#3730](https://github.com/pipecat-ai/pipecat/pull/3730)) + +- Improved audio context management in `AudioContextTTSService` by moving + context ID tracking to the base class and adding + `reuse_context_id_within_turn` parameter to control concurrent TTS request + handling. + - Added helper methods: `has_active_audio_context()`, + `get_active_audio_context_id()`, `remove_active_audio_context()`, + `reset_active_audio_context()` + - Simplified Cartesia, ElevenLabs, Inworld, Rime, AsyncAI, and Gradium TTS + implementations by removing duplicate context management code + (PR [#3732](https://github.com/pipecat-ai/pipecat/pull/3732)) + +- `UserIdleController` is now always created with a default timeout of 0 + (disabled). The `user_idle_timeout` parameter changed from `Optional[float] = + None` to `float = 0` in `UserTurnProcessor`, `LLMUserAggregatorParams`, and + `UserIdleController`. + (PR [#3748](https://github.com/pipecat-ai/pipecat/pull/3748)) + +- Change the version specifier from `>=0.2.8` to `~=0.2.8` for the + `speechmatics-voice` package to ensure compatibility with future patch + versions. + (PR [#3761](https://github.com/pipecat-ai/pipecat/pull/3761)) + +- Updated `InworldTTSService` and `InworldHttpTTSService` to use `ASYNC` + timestamp transport strategy by default + (PR [#3765](https://github.com/pipecat-ai/pipecat/pull/3765)) + +- Added `start_time` and `end_time` parameters to `start_ttfb_metrics()`, + `stop_ttfb_metrics()`, `start_processing_metrics()`, and + `stop_processing_metrics()` in `FrameProcessor` and `FrameProcessorMetrics`, + allowing custom timestamps for metrics measurement. `STTService` now uses + these instead of custom TTFB tracking. + (PR [#3776](https://github.com/pipecat-ai/pipecat/pull/3776)) + +- Updated default Anthropic model from `claude-sonnet-4-5-20250929` to + `claude-sonnet-4-6`. + (PR [#3792](https://github.com/pipecat-ai/pipecat/pull/3792)) + +### Deprecated + +- Deprecated unused `Traceable`, `@traceable`, `@traced`, and + `AttachmentStrategy` in `pipecat.utils.tracing.class_decorators`. This module + will be removed in a future release. + (PR [#3733](https://github.com/pipecat-ai/pipecat/pull/3733)) + +### Fixed + +- Fixed race condition where `RTVIObserver` could send messages before + `DailyTransport` join completed. Outbound messages are now queued & delivered + after the transport is ready. + (PR [#3615](https://github.com/pipecat-ai/pipecat/pull/3615)) + +- Fixed async generator cleanup in OpenAI LLM streaming to prevent + `AttributeError` with uvloop on Python 3.12+ (MagicStack/uvloop#699). + (PR [#3698](https://github.com/pipecat-ai/pipecat/pull/3698)) + +- Fixed `SmallWebRTCTransport` input audio resampling to properly handle all + sample rates, including 8kHz audio. + (PR [#3713](https://github.com/pipecat-ai/pipecat/pull/3713)) + +- Fixed a race condition in `RTVIObserver` where bot output messages could be + sent before the bot-started-speaking event. + (PR [#3718](https://github.com/pipecat-ai/pipecat/pull/3718)) + +- Fixed Grok Realtime `session.updated` event parsing failure caused by the API + returning prefixed voice names (e.g. `"human_Ara"` instead of `"Ara"`). + (PR [#3720](https://github.com/pipecat-ai/pipecat/pull/3720)) + +- Fixed context ID reuse issue in `ElevenLabsTTSService`, `InworldTTSService`, + `RimeTTSService`, `CartesiaTTSService`, `AsyncAITTSService`, and + `PlayHTTTSService`. Services now properly reuse the same context ID across + multiple `run_tts()` invocations within a single LLM turn, preventing context + tracking issues and incorrect lifecycle signaling. + (PR [#3729](https://github.com/pipecat-ai/pipecat/pull/3729)) + +- Fixed word timestamp interleaving issue in `ElevenLabsTTSService` when + processing multiple sentences within a single LLM turn. + (PR [#3729](https://github.com/pipecat-ai/pipecat/pull/3729)) + +- Fixed tracing service decorators executing the wrapped function twice when + the function itself raised an exception (e.g., LLM rate limit, TTS timeout). + (PR [#3735](https://github.com/pipecat-ai/pipecat/pull/3735)) + +- Fixed `LLMUserAggregator` broadcasting mute events before `StartFrame` + reaches downstream processors. + (PR [#3737](https://github.com/pipecat-ai/pipecat/pull/3737)) + +- Fixed `UserIdleController` false idle triggers caused by gaps between user + and bot activity frames. The idle timer now starts only after + `BotStoppedSpeakingFrame` and is suppressed during active user turns and + function calls. + (PR [#3744](https://github.com/pipecat-ai/pipecat/pull/3744)) + +- Fixed incorrect `sample_rate` assignment in + `TavusInputTransport._on_participant_audio_data` (was using + `audio.audio_frames` instead of `audio.sample_rate`). + (PR [#3768](https://github.com/pipecat-ai/pipecat/pull/3768)) + +- Fixed `RTVIObserver` not processing upstream-only frames. Previously, all + upstream frames were filtered out to avoid duplicate messages from + broadcasted frames. Now only upstream copies of broadcasted frames are + skipped. + (PR [#3774](https://github.com/pipecat-ai/pipecat/pull/3774)) + +- Fixed mutable default arguments in `LLMContextAggregatorPair.__init__()` that + could cause shared state across instances. + (PR [#3782](https://github.com/pipecat-ai/pipecat/pull/3782)) + +- Fixed `DeepgramSageMakerSTTService` to properly track finalize lifecycle + using `request_finalize()` / `confirm_finalize()` and use `is_final` (instead + of `is_final and speech_final`) for final transcription detection, matching + `DeepgramSTTService` behavior. + (PR [#3784](https://github.com/pipecat-ai/pipecat/pull/3784)) + +- Fixed a race condition in `AudioContextTTSService` where the audio context + could time out between consecutive TTS requests within the same turn, causing + audio to be discarded. + (PR [#3787](https://github.com/pipecat-ai/pipecat/pull/3787)) + +- Fixed `push_interruption_task_frame_and_wait()` hanging indefinitely when the + `InterruptionFrame` does not reach the pipeline sink within the timeout. + Added a `timeout` keyword argument to customize the wait duration. + (PR [#3789](https://github.com/pipecat-ai/pipecat/pull/3789)) + ## [0.0.102] - 2026-02-10 ### Added diff --git a/CLAUDE.md b/CLAUDE.md index 7b79fa168..7727975b3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -25,7 +25,7 @@ uv run pytest tests/test_name.py uv run pytest tests/test_name.py::test_function_name # Preview changelog -towncrier build --draft --version Unreleased +uv run towncrier build --draft --version Unreleased # Lint and format check uv run ruff check @@ -74,7 +74,7 @@ All data flows as **Frame** objects through a pipeline of **FrameProcessors**: - **Context Aggregation**: `LLMContext` accumulates messages for LLM calls; `UserResponse` aggregates user input - **Turn Management**: Turn management is done through `LLMUserAggregator` and -`LLMAssistantAggregator`, created with `LLMContextAggregatorPair` + `LLMAssistantAggregator`, created with `LLMContextAggregatorPair` - **User turn strategies**: Detection of when the user starts and stops speaking is done via user turn start/stop strategies. They push `UserStartedSpeakingFrame` and `UserStoppedSpeakingFrame` respectively. @@ -90,23 +90,26 @@ All data flows as **Frame** objects through a pipeline of **FrameProcessors**: ### Key Directories -| Directory | Purpose | -|---------------------------|----------------------------------------------------| -| `src/pipecat/frames/` | Frame definitions (100+ types) | -| `src/pipecat/processors/` | FrameProcessor base + aggregators, filters, audio | -| `src/pipecat/pipeline/` | Pipeline orchestration | -| `src/pipecat/services/` | AI service integrations (60+ providers) | -| `src/pipecat/transports/` | Transport layer (Daily, LiveKit, WebSocket, Local) | -| `src/pipecat/serializers/`| Frame serialization for WebSocket protocols | -| `src/pipecat/observers/` | Pipeline observers for monitoring frame flow | -| `src/pipecat/audio/` | VAD, filters, mixers, turn detection, DTMF | -| `src/pipecat/turns/` | User turn management | +| Directory | Purpose | +| -------------------------- | -------------------------------------------------- | +| `src/pipecat/frames/` | Frame definitions (100+ types) | +| `src/pipecat/processors/` | FrameProcessor base + aggregators, filters, audio | +| `src/pipecat/pipeline/` | Pipeline orchestration | +| `src/pipecat/services/` | AI service integrations (60+ providers) | +| `src/pipecat/transports/` | Transport layer (Daily, LiveKit, WebSocket, Local) | +| `src/pipecat/serializers/` | Frame serialization for WebSocket protocols | +| `src/pipecat/observers/` | Pipeline observers for monitoring frame flow | +| `src/pipecat/audio/` | VAD, filters, mixers, turn detection, DTMF | +| `src/pipecat/turns/` | User turn management | ## Code Style - **Docstrings**: Google-style. Classes describe purpose; `__init__` has `Args:` section; dataclasses use `Parameters:` section. - **Linting**: Ruff (line length 100). Pre-commit hooks enforce formatting. - **Type hints**: Required for complex async code. +- **Dataclass vs Pydantic**: Use `@dataclass` for frames and internal pipeline data (high-frequency, no validation needed). Use Pydantic `BaseModel` for configuration, parameters, metrics, and external API data (benefits from validation and serialization). Specifically: + - `@dataclass`: Frame types, context aggregator pairs, internal data containers + - `BaseModel`: Service `InputParams`, transport/VAD/turn params, metrics data, API request/response models, serializer params ### Docstring Example @@ -152,4 +155,3 @@ When adding a new service: ## Testing Test utilities live in `src/pipecat/tests/utils.py`. Use `run_test()` to send frames through a pipeline and assert expected output frames in each direction. Use `SleepFrame(sleep=N)` to add delays between frames. - diff --git a/COMMUNITY_INTEGRATIONS.md b/COMMUNITY_INTEGRATIONS.md index a26836a52..ff8d08ea5 100644 --- a/COMMUNITY_INTEGRATIONS.md +++ b/COMMUNITY_INTEGRATIONS.md @@ -25,7 +25,6 @@ Your repository must contain these components: - **Source code** - Complete implementation following Pipecat patterns - **Foundational example** - Single file example showing basic usage (see [Pipecat examples](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational)) - **README.md** - Must include: - - Introduction and explanation of your integration - Installation instructions - Usage instructions with Pipecat Pipeline @@ -110,7 +109,6 @@ Once your PR is submitted, post in the `#community-integrations` Discord channel #### Key requirements: - **Frame sequence:** Output must follow this frame sequence pattern: - - `LLMFullResponseStartFrame` - Signals the start of an LLM response - `LLMTextFrame` - Contains LLM content, typically streamed as tokens - `LLMFullResponseEndFrame` - Signals the end of an LLM response @@ -235,22 +233,79 @@ def can_generate_metrics(self) -> bool: ### Dynamic Settings Updates -STT, LLM, and TTS services support `ServiceUpdateSettingsFrame` for dynamic configuration changes. The base STTService has an `_update_settings()` method that handles settings, and the private `_settings` `Dict` is used to store settings and provide access to the subclass. +STT, LLM, and TTS services support runtime configuration changes via `*UpdateSettingsFrame`s (e.g. `STTUpdateSettingsFrame`, `TTSUpdateSettingsFrame`, `LLMUpdateSettingsFrame`). + +Each service declares a settings dataclass that extends the appropriate base (`STTSettings`, `TTSSettings`, `LLMSettings`). Fields default to `NOT_GIVEN` so that update objects can represent sparse deltas: ```python -async def set_language(self, language: Language): - """Set the recognition language and reconnect. +from dataclasses import dataclass, field - Args: - language: The language to use for speech recognition. +from pipecat.services.settings import STTSettings, NOT_GIVEN + +@dataclass +class MySTTSettings(STTSettings): + """Settings for my STT service. + + Parameters: + region: Cloud region for the service. """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = language - await self._disconnect() - await self._connect() + + region: str = field(default_factory=lambda: NOT_GIVEN) ``` -Note that, in this example, Deepgram requires the websocket connection be disconnected and reconnected to reinitialize the service with the new value. Consider if your service requires reconnection. +The service stores its current settings in `self._settings` and declares the type with a class-level annotation for editor support: + +```python +class MySTTService(STTService): + _settings: MySTTSettings + + def __init__(self, *, model: str, language: str, region: str, **kwargs): + # An initial value should be provided for every settings field. + # This will be validated at service start. + # (If you track sample_rate, it can be a placeholder value like 0; see + # "Sample Rate Handling"). + super().__init__( + settings=MySTTSettings(model=model, language=language, region=region), **kwargs + ) +``` + +To react to runtime setting changes, override `_update_settings`. The base implementation applies the delta to `self._settings` and returns a `dict` mapping each changed field name to its **pre-update** value. Your override should call `super()` first, then act on the changed fields. A common implementation might look like: + +```python +async def _update_settings(self, update: STTSettings) -> dict[str, Any]: + """Apply a settings update, reconfiguring the recognizer if needed.""" + changed = await super()._update_settings(update) + + if not changed: + return changed + + await self._disconnect() + await self._connect() + + return changed +``` + +The dict keys work like a set for membership tests (`"language" in changed`) and truthiness (`if changed`). Use `changed.keys() - {"language"}` for set difference, or `changed["language"]` to inspect the previous value of a field. + +Note that, in this example, the service requires a reconnect to apply the new language. Consider, for each setting, whether your service requires reconnection or can apply changes in-place. + +If your service can't yet apply certain settings at runtime, call `self._warn_unhandled_updated_settings(changed)` with any unhandled field names so users get a clear log message: + +```python +async def _update_settings(self, update: STTSettings) -> dict[str, Any]: + changed = await super()._update_settings(update) + + if not changed: + return changed + + if "language" in changed: + await self._update_language() + else: + # TODO: this should be temporary - handle changes to other settings soon! + self._warn_unhandled_updated_settings(changed.keys() - {"language"}) + + return changed +``` ### Sample Rate Handling @@ -260,7 +315,7 @@ Sample rates are set via PipelineParams and passed to each frame processor at in async def start(self, frame: StartFrame): """Start the service.""" await super().start(frame) - self._settings["output_format"]["sample_rate"] = self.sample_rate + self._settings.output_sample_rate = self.sample_rate await self._connect() ``` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 937532ec9..936a652fa 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -49,12 +49,12 @@ Every pull request that makes a user-facing change should include a changelog en ``` 2. Choose the appropriate type: - - `added.md` - New features - `changed.md` - Changes in existing functionality - `deprecated.md` - Soon-to-be removed features - `removed.md` - Removed features - `fixed.md` - Bug fixes + - `performance.md` - Performance improvements - `security.md` - Security fixes - `other.md` - Other changes (documentation, dependencies, etc.) @@ -80,7 +80,6 @@ Every pull request that makes a user-facing change should include a changelog en ```markdown - Updated service configuration: - - Changed default timeout to 30 seconds - Added retry logic for failed connections ``` @@ -105,7 +104,6 @@ changelog/1234.changed.2.md ```markdown - Updated service configuration: - - Changed default timeout to 30 seconds - Added retry logic for failed connections ``` diff --git a/README.md b/README.md index 38a6aa8b3..6a8c8c1fa 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,16 @@ Looking for help debugging your pipeline and processors? Check out [Whisker](htt Love terminal applications? Check out [Tail](https://github.com/pipecat-ai/tail), a terminal dashboard for Pipecat. +### 🤖 Claude Code Skills + +Use [Pipecat Skills](https://github.com/pipecat-ai/skills) with [Claude Code](https://claude.ai/code) to scaffold projects, deploy to Pipecat Cloud, and more. Install the marketplace with: + +``` +claude plugin marketplace add pipecat-ai/skills +``` + +and install any of the available plugins. + ### 📺️ Pipecat TV Channel Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.youtube.com/playlist?list=PLzU2zoMTQIHjqC3v4q2XVSR3hGSzwKFwH) channel. @@ -71,19 +81,19 @@ Catch new features, interviews, and how-tos on our [Pipecat TV](https://www.yout ## 🧩 Available services -| Category | Services | -| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Hathora](https://docs.pipecat.ai/server/services/stt/hathora), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | -| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) | -| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [PlayHT](https://docs.pipecat.ai/server/services/tts/playht), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | -| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), | -| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | -| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) | +| Category | Services | +| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Speech-to-Text | [AssemblyAI](https://docs.pipecat.ai/server/services/stt/assemblyai), [AWS](https://docs.pipecat.ai/server/services/stt/aws), [Azure](https://docs.pipecat.ai/server/services/stt/azure), [Cartesia](https://docs.pipecat.ai/server/services/stt/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/stt/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/stt/elevenlabs), [Fal Wizper](https://docs.pipecat.ai/server/services/stt/fal), [Gladia](https://docs.pipecat.ai/server/services/stt/gladia), [Google](https://docs.pipecat.ai/server/services/stt/google), [Gradium](https://docs.pipecat.ai/server/services/stt/gradium), [Groq (Whisper)](https://docs.pipecat.ai/server/services/stt/groq), [Hathora](https://docs.pipecat.ai/server/services/stt/hathora), [NVIDIA Riva](https://docs.pipecat.ai/server/services/stt/riva), [OpenAI (Whisper)](https://docs.pipecat.ai/server/services/stt/openai), [SambaNova (Whisper)](https://docs.pipecat.ai/server/services/stt/sambanova), [Sarvam](https://docs.pipecat.ai/server/services/stt/sarvam), [Soniox](https://docs.pipecat.ai/server/services/stt/soniox), [Speechmatics](https://docs.pipecat.ai/server/services/stt/speechmatics), [Whisper](https://docs.pipecat.ai/server/services/stt/whisper) | +| LLMs | [Anthropic](https://docs.pipecat.ai/server/services/llm/anthropic), [AWS](https://docs.pipecat.ai/server/services/llm/aws), [Azure](https://docs.pipecat.ai/server/services/llm/azure), [Cerebras](https://docs.pipecat.ai/server/services/llm/cerebras), [DeepSeek](https://docs.pipecat.ai/server/services/llm/deepseek), [Fireworks AI](https://docs.pipecat.ai/server/services/llm/fireworks), [Gemini](https://docs.pipecat.ai/server/services/llm/gemini), [Grok](https://docs.pipecat.ai/server/services/llm/grok), [Groq](https://docs.pipecat.ai/server/services/llm/groq), [Mistral](https://docs.pipecat.ai/server/services/llm/mistral), [NVIDIA NIM](https://docs.pipecat.ai/server/services/llm/nim), [Ollama](https://docs.pipecat.ai/server/services/llm/ollama), [OpenAI](https://docs.pipecat.ai/server/services/llm/openai), [OpenRouter](https://docs.pipecat.ai/server/services/llm/openrouter), [Perplexity](https://docs.pipecat.ai/server/services/llm/perplexity), [Qwen](https://docs.pipecat.ai/server/services/llm/qwen), [SambaNova](https://docs.pipecat.ai/server/services/llm/sambanova) [Together AI](https://docs.pipecat.ai/server/services/llm/together) | +| Text-to-Speech | [Async](https://docs.pipecat.ai/server/services/tts/asyncai), [AWS](https://docs.pipecat.ai/server/services/tts/aws), [Azure](https://docs.pipecat.ai/server/services/tts/azure), [Camb AI](https://docs.pipecat.ai/server/services/tts/camb), [Cartesia](https://docs.pipecat.ai/server/services/tts/cartesia), [Deepgram](https://docs.pipecat.ai/server/services/tts/deepgram), [ElevenLabs](https://docs.pipecat.ai/server/services/tts/elevenlabs), [Fish](https://docs.pipecat.ai/server/services/tts/fish), [Google](https://docs.pipecat.ai/server/services/tts/google), [Gradium](https://docs.pipecat.ai/server/services/tts/gradium), [Groq](https://docs.pipecat.ai/server/services/tts/groq), [Hathora](https://docs.pipecat.ai/server/services/tts/hathora), [Hume](https://docs.pipecat.ai/server/services/tts/hume), [Inworld](https://docs.pipecat.ai/server/services/tts/inworld), [LMNT](https://docs.pipecat.ai/server/services/tts/lmnt), [MiniMax](https://docs.pipecat.ai/server/services/tts/minimax), [Neuphonic](https://docs.pipecat.ai/server/services/tts/neuphonic), [NVIDIA Riva](https://docs.pipecat.ai/server/services/tts/riva), [OpenAI](https://docs.pipecat.ai/server/services/tts/openai), [Piper](https://docs.pipecat.ai/server/services/tts/piper), [Resemble](https://docs.pipecat.ai/server/services/tts/resemble), [Rime](https://docs.pipecat.ai/server/services/tts/rime), [Sarvam](https://docs.pipecat.ai/server/services/tts/sarvam), [Speechmatics](https://docs.pipecat.ai/server/services/tts/speechmatics), [XTTS](https://docs.pipecat.ai/server/services/tts/xtts) | +| Speech-to-Speech | [AWS Nova Sonic](https://docs.pipecat.ai/server/services/s2s/aws), [Gemini Multimodal Live](https://docs.pipecat.ai/server/services/s2s/gemini), [Grok Voice Agent](https://docs.pipecat.ai/server/services/s2s/grok), [OpenAI Realtime](https://docs.pipecat.ai/server/services/s2s/openai), [Ultravox](https://docs.pipecat.ai/server/services/s2s/ultravox), | +| Transport | [Daily (WebRTC)](https://docs.pipecat.ai/server/services/transport/daily), [FastAPI Websocket](https://docs.pipecat.ai/server/services/transport/fastapi-websocket), [SmallWebRTCTransport](https://docs.pipecat.ai/server/services/transport/small-webrtc), [WebSocket Server](https://docs.pipecat.ai/server/services/transport/websocket-server), Local | +| Serializers | [Exotel](https://docs.pipecat.ai/server/utilities/serializers/exotel), [Plivo](https://docs.pipecat.ai/server/utilities/serializers/plivo), [Twilio](https://docs.pipecat.ai/server/utilities/serializers/twilio), [Telnyx](https://docs.pipecat.ai/server/utilities/serializers/telnyx), [Vonage](https://docs.pipecat.ai/server/utilities/serializers/vonage) | | Video | [HeyGen](https://docs.pipecat.ai/server/services/video/heygen), [LemonSlice](https://lemonslice.com/docs/self-managed/overview), [Tavus](https://docs.pipecat.ai/server/services/video/tavus), [Simli](https://docs.pipecat.ai/server/services/video/simli) | -| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) | -| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | -| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) | -| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | +| Memory | [mem0](https://docs.pipecat.ai/server/services/memory/mem0) | +| Vision & Image | [fal](https://docs.pipecat.ai/server/services/image-generation/fal), [Google Imagen](https://docs.pipecat.ai/server/services/image-generation/google-imagen), [Moondream](https://docs.pipecat.ai/server/services/vision/moondream) | +| Audio Processing | [Silero VAD](https://docs.pipecat.ai/server/utilities/audio/silero-vad-analyzer), [Krisp](https://docs.pipecat.ai/server/utilities/audio/krisp-filter), [Koala](https://docs.pipecat.ai/server/utilities/audio/koala-filter), [ai-coustics](https://docs.pipecat.ai/server/utilities/audio/aic-filter) | +| Analytics & Metrics | [OpenTelemetry](https://docs.pipecat.ai/server/utilities/opentelemetry), [Sentry](https://docs.pipecat.ai/server/services/analytics/sentry) | 📚 [View full services documentation →](https://docs.pipecat.ai/server/services/supported-services) @@ -163,6 +173,15 @@ You can get started with Pipecat running on your local machine, then move your a > **Note**: Some extras (local, gstreamer) require system dependencies. See documentation if you encounter build errors. +### Claude Code Skills + +Install development workflow skills for contributing to Pipecat with [Claude Code](https://claude.ai/code): + +``` +claude plugin marketplace add pipecat-ai/pipecat +claude plugin install pipecat-dev@pipecat-dev-skills +``` + ### Running tests To run all tests, from the root directory: diff --git a/changelog/3615.fixed.md b/changelog/3615.fixed.md deleted file mode 100644 index b14dfd70f..000000000 --- a/changelog/3615.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed race condition where `RTVIObserver` could send messages before `DailyTransport` join completed. Outbound messages are now queued & delivered after the transport is ready. diff --git a/changelog/3625.added.md b/changelog/3625.added.md deleted file mode 100644 index ddf787567..000000000 --- a/changelog/3625.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `"timestampTransportStrategy": "ASYNC"` to `InworldAITTSService`. This allows timestamps info to trail audio chunks arrival, resulting in much better first audio chunk latency diff --git a/changelog/3642.added.md b/changelog/3642.added.md deleted file mode 100644 index 47668bf59..000000000 --- a/changelog/3642.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added model-specific `InputParams` to `RimeTTSService`: arcana params (`repetition_penalty`, `temperature`, `top_p`) and mistv2 params (`no_text_normalization`, `save_oovs`, `segment`). Model, voice, and param changes now trigger WebSocket reconnection. diff --git a/changelog/3642.changed.md b/changelog/3642.changed.md deleted file mode 100644 index 96a43fbb8..000000000 --- a/changelog/3642.changed.md +++ /dev/null @@ -1 +0,0 @@ -- ⚠️ `RimeTTSService` now defaults to `model="arcana"` and the `wss://users-ws.rime.ai/ws3` endpoint. `InputParams` defaults changed from mistv2-specific values to `None` — only explicitly-set params are sent as query params. diff --git a/changelog/3684.changed.md b/changelog/3684.changed.md deleted file mode 100644 index 1bdb2c89c..000000000 --- a/changelog/3684.changed.md +++ /dev/null @@ -1,3 +0,0 @@ -- `AICFilter` now shares read-only AIC models via a singleton `AICModelManager` in `aic_filter.py`. - - Multiple filters using the same model path or `(model_id, model_download_dir)` share one loaded model, with reference counting and concurrent load deduplication. - - Model file I/O runs off the event loop so the filter does not block. diff --git a/changelog/3696.added.md b/changelog/3696.added.md new file mode 100644 index 000000000..39726d930 --- /dev/null +++ b/changelog/3696.added.md @@ -0,0 +1 @@ +- Added `TextAggregationMetricsData` metric measuring the time from the first LLM token to the first complete sentence, representing the latency cost of sentence aggregation in the TTS pipeline. diff --git a/changelog/3696.changed.md b/changelog/3696.changed.md new file mode 100644 index 000000000..a495560ba --- /dev/null +++ b/changelog/3696.changed.md @@ -0,0 +1 @@ +- Added `text_aggregation_mode` parameter to `TTSService` and all TTS subclasses with a new `TextAggregationMode` enum (`SENTENCE`, `TOKEN`). All text now flows through text aggregators regardless of mode, enabling pattern detection and tag handling in TOKEN mode. diff --git a/changelog/3696.deprecated.md b/changelog/3696.deprecated.md new file mode 100644 index 000000000..7b371fc21 --- /dev/null +++ b/changelog/3696.deprecated.md @@ -0,0 +1 @@ +- ⚠️ Deprecated `aggregate_sentences` parameter on `TTSService` and all TTS subclasses. Use `text_aggregation_mode=TextAggregationMode.SENTENCE` or `text_aggregation_mode=TextAggregationMode.TOKEN` instead. diff --git a/changelog/3698.fixed.md b/changelog/3698.fixed.md deleted file mode 100644 index c040e9efb..000000000 --- a/changelog/3698.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed async generator cleanup in OpenAI LLM streaming to prevent `AttributeError` with uvloop on Python 3.12+ (MagicStack/uvloop#699). diff --git a/changelog/3706.changed.md b/changelog/3706.changed.md deleted file mode 100644 index 0c9876bdc..000000000 --- a/changelog/3706.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Added `X-User-Agent` and `X-Request-Id` headers to `InworldTTSService` for better traceability. diff --git a/changelog/3713.fixed.md b/changelog/3713.fixed.md deleted file mode 100644 index 241f0e56a..000000000 --- a/changelog/3713.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `SmallWebRTCTransport` input audio resampling to properly handle all sample rates, including 8kHz audio. diff --git a/changelog/3714.added.md b/changelog/3714.added.md new file mode 100644 index 000000000..efa54b7d5 --- /dev/null +++ b/changelog/3714.added.md @@ -0,0 +1,19 @@ +- Added support for using strongly-typed objects instead of dicts for updating service settings at runtime. + + Instead of, say: + + ```python + await task.queue_frame( + STTUpdateSettingsFrame(settings={"language": Language.ES}) + ) + ``` + + you'd do: + + ```python + await task.queue_frame( + STTUpdateSettingsFrame(delta=DeepgramSTTSettings(language=Language.ES)) + ) + ``` + + Each service now vends strongly-typed classes like `DeepgramSTTSettings` representing the service's runtime-updatable settings. diff --git a/changelog/3714.changed.md b/changelog/3714.changed.md new file mode 100644 index 000000000..bcfb5cbf7 --- /dev/null +++ b/changelog/3714.changed.md @@ -0,0 +1 @@ +- ⚠️ Refactored runtime-updatable service settings to use strongly-typed classes (`TTSSettings`, `STTSettings`, `LLMSettings`, and service-specific subclasses) instead of plain dicts. Each service's `_settings` now holds these strongly-typed objects. For service maintainers, see changes in COMMUNITY_INTEGRATIONS.md. diff --git a/changelog/3714.deprecated.2.md b/changelog/3714.deprecated.2.md new file mode 100644 index 000000000..d386fa5a4 --- /dev/null +++ b/changelog/3714.deprecated.2.md @@ -0,0 +1 @@ +- Dict-based `*UpdateSettingsFrame(settings={...})` is deprecated in favor of passing typed settings delta objects with `*UpdateSettingsFrame(delta={...})`. diff --git a/changelog/3714.deprecated.md b/changelog/3714.deprecated.md new file mode 100644 index 000000000..75337a642 --- /dev/null +++ b/changelog/3714.deprecated.md @@ -0,0 +1,3 @@ +- Deprecated `set_model()`, `set_voice()`, and `set_language()` on AI services in favor of runtime updates via `TTSUpdateSettingsFrame`, `STTUpdateSettingsFrame`, and `LLMUpdateSettingsFrame`. + + ⚠️ Note, too, a subtle behavior change in these deprecated methods. Whereas previously only `set_language()` caused the service to actually react to the update (e.g. by reconnecting to a remote service so it an pick up the change), now all these methods do. This change was made as part of a refactor making them all work the same way under the hood. diff --git a/changelog/3718.fixed.md b/changelog/3718.fixed.md deleted file mode 100644 index 68e1d2682..000000000 --- a/changelog/3718.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed a race condition in `RTVIObserver` where bot output messages could be sent before the bot-started-speaking event. diff --git a/changelog/3719.added.2.md b/changelog/3719.added.2.md deleted file mode 100644 index 77d8956d7..000000000 --- a/changelog/3719.added.2.md +++ /dev/null @@ -1 +0,0 @@ -- Added `write_transport_frame()` hook to `BaseOutputTransport` allowing transport subclasses to handle custom frame types that flow through the audio queue. diff --git a/changelog/3719.added.md b/changelog/3719.added.md deleted file mode 100644 index bc1c2d6b1..000000000 --- a/changelog/3719.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `DailySIPTransferFrame` and `DailySIPReferFrame` to the Daily transport. These frames queue SIP transfer and SIP REFER operations with audio, so the operation executes only after the bot finishes its current utterance. diff --git a/changelog/3719.changed.md b/changelog/3719.changed.md deleted file mode 100644 index f42d0303b..000000000 --- a/changelog/3719.changed.md +++ /dev/null @@ -1 +0,0 @@ -- `DailyUpdateRemoteParticipantsFrame` is no longer deprecated and is now queued with audio like other transport frames. diff --git a/changelog/3720.fixed.md b/changelog/3720.fixed.md deleted file mode 100644 index c3cb69d34..000000000 --- a/changelog/3720.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed Grok Realtime `session.updated` event parsing failure caused by the API returning prefixed voice names (e.g. `"human_Ara"` instead of `"Ara"`). diff --git a/changelog/3728.changed.md b/changelog/3728.changed.md deleted file mode 100644 index bc5ccc74d..000000000 --- a/changelog/3728.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Bumped Pillow dependency upper bound from `<12` to `<13` to allow Pillow 12.x. diff --git a/changelog/3729.fixed.2.md b/changelog/3729.fixed.2.md deleted file mode 100644 index 6d4f33d93..000000000 --- a/changelog/3729.fixed.2.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed context ID reuse issue in `ElevenLabsTTSService`, `InworldTTSService`, `RimeTTSService`, `CartesiaTTSService`, `AsyncAITTSService`, and `PlayHTTTSService`. Services now properly reuse the same context ID across multiple `run_tts()` invocations within a single LLM turn, preventing context tracking issues and incorrect lifecycle signaling. diff --git a/changelog/3729.fixed.md b/changelog/3729.fixed.md deleted file mode 100644 index b8be759fb..000000000 --- a/changelog/3729.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed word timestamp interleaving issue in `ElevenLabsTTSService` when processing multiple sentences within a single LLM turn. diff --git a/changelog/3730.added.md b/changelog/3730.added.md deleted file mode 100644 index e3ac64278..000000000 --- a/changelog/3730.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added keepalive support to `SarvamSTTService` to prevent idle connection timeouts (e.g. when used behind a `ServiceSwitcher`). diff --git a/changelog/3730.changed.md b/changelog/3730.changed.md deleted file mode 100644 index 697bc863c..000000000 --- a/changelog/3730.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Moved STT keepalive mechanism from `WebsocketSTTService` to the `STTService` base class, allowing any STT service (not just websocket-based ones) to use idle-connection keepalive via the `keepalive_timeout` and `keepalive_interval` parameters. diff --git a/changelog/3732.changed.md b/changelog/3732.changed.md deleted file mode 100644 index 22681cf04..000000000 --- a/changelog/3732.changed.md +++ /dev/null @@ -1,3 +0,0 @@ -- Improved audio context management in `AudioContextTTSService` by moving context ID tracking to the base class and adding `reuse_context_id_within_turn` parameter to control concurrent TTS request handling. - - Added helper methods: `has_active_audio_context()`, `get_active_audio_context_id()`, `remove_active_audio_context()`, `reset_active_audio_context()` - - Simplified Cartesia, ElevenLabs, Inworld, Rime, AsyncAI, and Gradium TTS implementations by removing duplicate context management code diff --git a/changelog/3733.deprecated.md b/changelog/3733.deprecated.md deleted file mode 100644 index 8b1fb29bb..000000000 --- a/changelog/3733.deprecated.md +++ /dev/null @@ -1 +0,0 @@ -- Deprecated unused `Traceable`, `@traceable`, `@traced`, and `AttachmentStrategy` in `pipecat.utils.tracing.class_decorators`. This module will be removed in a future release. diff --git a/changelog/3735.fixed.md b/changelog/3735.fixed.md deleted file mode 100644 index 02de936c7..000000000 --- a/changelog/3735.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed tracing service decorators executing the wrapped function twice when the function itself raised an exception (e.g., LLM rate limit, TTS timeout). diff --git a/changelog/3737.fixed.md b/changelog/3737.fixed.md deleted file mode 100644 index 6dee96f82..000000000 --- a/changelog/3737.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `LLMUserAggregator` broadcasting mute events before `StartFrame` reaches downstream processors. diff --git a/changelog/3744.fixed.md b/changelog/3744.fixed.md deleted file mode 100644 index d2b3f665f..000000000 --- a/changelog/3744.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `UserIdleController` false idle triggers caused by gaps between user and bot activity frames. The idle timer now starts only after `BotStoppedSpeakingFrame` and is suppressed during active user turns and function calls. diff --git a/changelog/3748.added.md b/changelog/3748.added.md deleted file mode 100644 index 223f8bf4b..000000000 --- a/changelog/3748.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `UserIdleTimeoutUpdateFrame` to enable or disable user idle detection at runtime by updating the timeout dynamically. diff --git a/changelog/3748.changed.md b/changelog/3748.changed.md deleted file mode 100644 index 61be61c6b..000000000 --- a/changelog/3748.changed.md +++ /dev/null @@ -1 +0,0 @@ -- `UserIdleController` is now always created with a default timeout of 0 (disabled). The `user_idle_timeout` parameter changed from `Optional[float] = None` to `float = 0` in `UserTurnProcessor`, `LLMUserAggregatorParams`, and `UserIdleController`. diff --git a/changelog/3761.changed.md b/changelog/3761.changed.md deleted file mode 100644 index 71618502c..000000000 --- a/changelog/3761.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Change the version specifier from `>=0.2.8` to `~=0.2.8` for the `speechmatics-voice` package to ensure compatibility with future patch versions. diff --git a/changelog/3764.added.md b/changelog/3764.added.md new file mode 100644 index 000000000..5da82f0c1 --- /dev/null +++ b/changelog/3764.added.md @@ -0,0 +1 @@ +- Added support for specifying private endpoints for Azure Speech-to-Text, enabling use in private networks behind firewalls. \ No newline at end of file diff --git a/changelog/3765.changed.md b/changelog/3765.changed.md deleted file mode 100644 index 5d3e758d5..000000000 --- a/changelog/3765.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Updated `InworldTTSService` and `InworldHttpTTSService` to use `ASYNC` timestamp transport strategy by default diff --git a/changelog/3768.fixed.md b/changelog/3768.fixed.md deleted file mode 100644 index 4c8d6438e..000000000 --- a/changelog/3768.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed incorrect `sample_rate` assignment in `TavusInputTransport._on_participant_audio_data` (was using `audio.audio_frames` instead of `audio.sample_rate`). diff --git a/changelog/3774.added.md b/changelog/3774.added.md deleted file mode 100644 index e72599e60..000000000 --- a/changelog/3774.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `broadcast_sibling_id` field to the base `Frame` class. This field is automatically set by `broadcast_frame()` and `broadcast_frame_instance()` to the ID of the paired frame pushed in the opposite direction, allowing receivers to identify broadcast pairs. diff --git a/changelog/3774.fixed.md b/changelog/3774.fixed.md deleted file mode 100644 index a839f56ed..000000000 --- a/changelog/3774.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `RTVIObserver` not processing upstream-only frames. Previously, all upstream frames were filtered out to avoid duplicate messages from broadcasted frames. Now only upstream copies of broadcasted frames are skipped. diff --git a/changelog/3776.changed.md b/changelog/3776.changed.md deleted file mode 100644 index 87b5d6128..000000000 --- a/changelog/3776.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Added `start_time` and `end_time` parameters to `start_ttfb_metrics()`, `stop_ttfb_metrics()`, `start_processing_metrics()`, and `stop_processing_metrics()` in `FrameProcessor` and `FrameProcessorMetrics`, allowing custom timestamps for metrics measurement. `STTService` now uses these instead of custom TTFB tracking. diff --git a/changelog/3779.added.md b/changelog/3779.added.md deleted file mode 100644 index 8800cfc04..000000000 --- a/changelog/3779.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `ignored_sources` parameter to `RTVIObserverParams` and `add_ignored_source()`/`remove_ignored_source()` methods to `RTVIObserver` to suppress RTVI messages from specific pipeline processors (e.g. a silent evaluation LLM). diff --git a/changelog/3782.fixed.md b/changelog/3782.fixed.md deleted file mode 100644 index 7d21fdeab..000000000 --- a/changelog/3782.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed mutable default arguments in `LLMContextAggregatorPair.__init__()` that could cause shared state across instances. diff --git a/changelog/3784.fixed.md b/changelog/3784.fixed.md deleted file mode 100644 index e88431f16..000000000 --- a/changelog/3784.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `DeepgramSageMakerSTTService` to properly track finalize lifecycle using `request_finalize()` / `confirm_finalize()` and use `is_final` (instead of `is_final and speech_final`) for final transcription detection, matching `DeepgramSTTService` behavior. diff --git a/changelog/3785.added.md b/changelog/3785.added.md deleted file mode 100644 index 90a4172d4..000000000 --- a/changelog/3785.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `DeepgramSageMakerTTSService` for running Deepgram TTS models deployed on AWS SageMaker endpoints via HTTP/2 bidirectional streaming. Supports the Deepgram TTS protocol (Speak, Flush, Clear, Close), interruption handling, and per-turn TTFB metrics. diff --git a/changelog/3786.changed.md b/changelog/3786.changed.md new file mode 100644 index 000000000..ed8e7e444 --- /dev/null +++ b/changelog/3786.changed.md @@ -0,0 +1 @@ +- Word timestamp support has been moved from `WordTTSService` into `TTSService` via a new `supports_word_timestamps` parameter. Services that previously extended `WordTTSService`, `AudioContextWordTTSService`, or `WebsocketWordTTSService` now pass `supports_word_timestamps=True` to their parent `__init__` instead. diff --git a/changelog/3786.deprecated.md b/changelog/3786.deprecated.md new file mode 100644 index 000000000..7ac5a5b9c --- /dev/null +++ b/changelog/3786.deprecated.md @@ -0,0 +1,5 @@ +- Deprecated `WordTTSService`, `WebsocketWordTTSService`, `AudioContextWordTTSService`, and `InterruptibleWordTTSService`. Use their non-word counterparts with `supports_word_timestamps=True` instead: + - `WordTTSService` → `TTSService(supports_word_timestamps=True)` + - `WebsocketWordTTSService` → `WebsocketTTSService(supports_word_timestamps=True)` + - `AudioContextWordTTSService` → `AudioContextTTSService(supports_word_timestamps=True)` + - `InterruptibleWordTTSService` → `InterruptibleTTSService(supports_word_timestamps=True)` diff --git a/changelog/3787.fixed.md b/changelog/3787.fixed.md deleted file mode 100644 index ff11ada71..000000000 --- a/changelog/3787.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed a race condition in `AudioContextTTSService` where the audio context could time out between consecutive TTS requests within the same turn, causing audio to be discarded. diff --git a/changelog/3789.fixed.md b/changelog/3789.fixed.md deleted file mode 100644 index 1bf2be1a3..000000000 --- a/changelog/3789.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed `push_interruption_task_frame_and_wait()` hanging indefinitely when the `InterruptionFrame` does not reach the pipeline sink within the timeout. Added a `timeout` keyword argument to customize the wait duration. diff --git a/changelog/3794.fixed.md b/changelog/3794.fixed.md new file mode 100644 index 000000000..e2b3c7c00 --- /dev/null +++ b/changelog/3794.fixed.md @@ -0,0 +1 @@ +- Added `LLMSpecificMessage` handling in `LLMContextSummarizationUtil` to skip provider-specific messages during context summarization. diff --git a/changelog/3795.fixed.md b/changelog/3795.fixed.md new file mode 100644 index 000000000..8c231abac --- /dev/null +++ b/changelog/3795.fixed.md @@ -0,0 +1 @@ +- Treated `response_cancel_not_active` as a non-fatal error in realtime services (`OpenAIRealtimeLLMService`, `GrokRealtimeLLMService`, `OpenAIRealtimeBetaLLMService`) to prevent WebSocket disconnection when cancelling an inactive response. \ No newline at end of file diff --git a/changelog/3803.fixed.md b/changelog/3803.fixed.md new file mode 100644 index 000000000..73d7c3f19 --- /dev/null +++ b/changelog/3803.fixed.md @@ -0,0 +1 @@ +- Fixed Poetry compatibility by inlining `local-smart-turn-v3` dependencies (`transformers`, `onnxruntime`) into core dependencies instead of using a self-referential extra. diff --git a/changelog/3803.removed.md b/changelog/3803.removed.md new file mode 100644 index 000000000..867c3cfcc --- /dev/null +++ b/changelog/3803.removed.md @@ -0,0 +1 @@ +- Removed `local-smart-turn-v3` optional extra from `pyproject.toml`. The `transformers` and `onnxruntime` packages are now always installed as core dependencies since they are required by the default turn stop strategy, `TurnAnalyzerUserTurnStopStrategy` which uses `LocalSmartTurnAnalyzerV3`. diff --git a/changelog/3806.added.md b/changelog/3806.added.md new file mode 100644 index 000000000..eeddc9825 --- /dev/null +++ b/changelog/3806.added.md @@ -0,0 +1 @@ +- Added `output_medium` parameter to `AgentInputParams` and `OneShotInputParams` in Ultravox service to control initial output medium (text or voice) at call creation time. diff --git a/changelog/3806.changed.2.md b/changelog/3806.changed.2.md new file mode 100644 index 000000000..9d6dfdf76 --- /dev/null +++ b/changelog/3806.changed.2.md @@ -0,0 +1 @@ +- Improved Ultravox TTFB measurement accuracy by using VAD speech end time instead of `UserStoppedSpeakingFrame` timing. diff --git a/changelog/3806.changed.md b/changelog/3806.changed.md new file mode 100644 index 000000000..c8e2fb68c --- /dev/null +++ b/changelog/3806.changed.md @@ -0,0 +1 @@ +- Aligned `UltravoxRealtimeLLMService` frame handling with OpenAI/Gemini realtime services: added `InterruptionFrame` handling with metrics cleanup, processing metrics at response boundaries, and improved agent transcript handling for both voice and text output modalities. diff --git a/changelog/3807.changed.md b/changelog/3807.changed.md new file mode 100644 index 000000000..cc99f29fb --- /dev/null +++ b/changelog/3807.changed.md @@ -0,0 +1 @@ +- Updated `OpenAIRealtimeLLMService` default model to `gpt-realtime-1.5`. \ No newline at end of file diff --git a/changelog/3808.fixed.md b/changelog/3808.fixed.md new file mode 100644 index 000000000..6bf105bf6 --- /dev/null +++ b/changelog/3808.fixed.md @@ -0,0 +1 @@ +- Fixed `SentryMetrics` method signatures to match updated `FrameProcessorMetrics` base class, resolving `TypeError` when using `start_time`/`end_time` keyword arguments. diff --git a/changelog/3809.added.md b/changelog/3809.added.md new file mode 100644 index 000000000..99047dc76 --- /dev/null +++ b/changelog/3809.added.md @@ -0,0 +1 @@ +- Added `TurnMetricsData` as a generic metrics class for turn detection, with e2e processing time measurement. `KrispVivaTurn` now emits `TurnMetricsData` with `e2e_processing_time_ms` tracking the interval from VAD speech-to-silence transition to turn completion. diff --git a/changelog/3809.changed.md b/changelog/3809.changed.md new file mode 100644 index 000000000..479eaf6ed --- /dev/null +++ b/changelog/3809.changed.md @@ -0,0 +1 @@ +- Added `api_key` parameter to `KrispVivaSDKManager`, `KrispVivaTurn`, and `KrispVivaFilter` for Krisp SDK v1.6.1+ licensing. Falls back to `KRISP_VIVA_API_KEY` environment variable. diff --git a/changelog/3809.deprecated.md b/changelog/3809.deprecated.md new file mode 100644 index 000000000..f1498ec0b --- /dev/null +++ b/changelog/3809.deprecated.md @@ -0,0 +1 @@ +- Deprecated `SmartTurnMetricsData` in favor of `TurnMetricsData`. `BaseSmartTurn` now emits `TurnMetricsData` directly. diff --git a/changelog/3811.changed.md b/changelog/3811.changed.md new file mode 100644 index 000000000..eb3eb492e --- /dev/null +++ b/changelog/3811.changed.md @@ -0,0 +1 @@ +- Bumped `nltk` minimum version from 3.9.1 to 3.9.3 to resolve a security vulnerability. diff --git a/changelog/3813.fixed.md b/changelog/3813.fixed.md new file mode 100644 index 000000000..9d9115e77 --- /dev/null +++ b/changelog/3813.fixed.md @@ -0,0 +1 @@ +- Fixed STT TTFB metrics not being reported for `SonioxSTTService` and `AWSTranscribeSTTService` due to missing `can_generate_metrics()` override. diff --git a/changelog/3814.added.md b/changelog/3814.added.md new file mode 100644 index 000000000..b6b2ebbf8 --- /dev/null +++ b/changelog/3814.added.md @@ -0,0 +1 @@ +- Added `on_audio_context_interrupted()` and `on_audio_context_completed()` callbacks to `AudioContextTTSService`. Subclasses can override these to perform provider-specific cleanup instead of overriding `_handle_interruption()`. diff --git a/changelog/3814.fixed.md b/changelog/3814.fixed.md new file mode 100644 index 000000000..ecd4871f6 --- /dev/null +++ b/changelog/3814.fixed.md @@ -0,0 +1 @@ +- Fixed an issue where `AudioContextTTSService`-based providers (AsyncAI, ElevenLabs, Inworld, Rime) did not close or clean up their server-side audio contexts after normal speech completion, only on interruption. diff --git a/changelog/3819.changed.md b/changelog/3819.changed.md new file mode 100644 index 000000000..7b43c399c --- /dev/null +++ b/changelog/3819.changed.md @@ -0,0 +1,4 @@ +- `ServiceSettingsUpdateFrame`s are now `UninterruptibleFrame`s. Generally speaking, you don't want a user interruption to prevent a service setting change from going into effect. Note that you usually don't use `ServiceSettingsUpdateFrame` directly, you use one of its subclasses: + - `LLMUpdateSettingsFrame` + - `TTSUpdateSettingsFrame` + - `STTUpdateSettingsFrame` diff --git a/changelog/3822.fixed.md b/changelog/3822.fixed.md new file mode 100644 index 000000000..48218845f --- /dev/null +++ b/changelog/3822.fixed.md @@ -0,0 +1 @@ +- Fixed STT TTFB metrics measuring timeout expiry time instead of actual transcript arrival time. \ No newline at end of file diff --git a/changelog/3825.fixed.md b/changelog/3825.fixed.md new file mode 100644 index 000000000..7cd9ba508 --- /dev/null +++ b/changelog/3825.fixed.md @@ -0,0 +1 @@ +- Fixed `InterimTranscriptionFrame` and `TranslationFrame` being unintentionally pushed downstream in `LLMUserAggregator`. They are now consumed like `TranscriptionFrame`. diff --git a/changelog/3828.fixed.md b/changelog/3828.fixed.md new file mode 100644 index 000000000..dd2ee257d --- /dev/null +++ b/changelog/3828.fixed.md @@ -0,0 +1 @@ +- Fixed misleading "Empty audio frame received for STT service" warnings when using audio filters (e.g. `RNNoiseFilter`, `KrispVivaFilter`, `AICFilter`) that buffer audio internally. diff --git a/changelog/3837.fixed.md b/changelog/3837.fixed.md new file mode 100644 index 000000000..767e79f45 --- /dev/null +++ b/changelog/3837.fixed.md @@ -0,0 +1 @@ +- Fixed issues with `RimeNonJsonTTSService` where trailing punctuation is sometimes vocalized diff --git a/changelog/3838.removed.md b/changelog/3838.removed.md new file mode 100644 index 000000000..fa811cb71 --- /dev/null +++ b/changelog/3838.removed.md @@ -0,0 +1 @@ +- ⚠️ Removed `PlayHTTTSService` and `PlayHTHttpTTSService`. PlayHT has been shut down and is no longer available. diff --git a/changelog/3845.fixed.md b/changelog/3845.fixed.md new file mode 100644 index 000000000..423853700 --- /dev/null +++ b/changelog/3845.fixed.md @@ -0,0 +1 @@ +- Fixed `TTSSpeakFrame` not committing spoken text to the conversation context when used outside of an LLM response (e.g., bot greetings or injected speech). \ No newline at end of file diff --git a/changelog/3850.fixed.md b/changelog/3850.fixed.md new file mode 100644 index 000000000..cfbdc6cf7 --- /dev/null +++ b/changelog/3850.fixed.md @@ -0,0 +1 @@ +- Removed verbose per-chunk audio logging from `GenesysAudioHookSerializer` that flooded production logs. diff --git a/changelog/3855.added.2.md b/changelog/3855.added.2.md new file mode 100644 index 000000000..01cd23efe --- /dev/null +++ b/changelog/3855.added.2.md @@ -0,0 +1 @@ +- Added optional `llm` field to `LLMContextSummarizationConfig` for routing summarization to a dedicated LLM service (e.g., a cheaper/faster model) instead of the pipeline's primary model. diff --git a/changelog/3855.added.3.md b/changelog/3855.added.3.md new file mode 100644 index 000000000..b93fdec60 --- /dev/null +++ b/changelog/3855.added.3.md @@ -0,0 +1 @@ +- Added `summarization_timeout` to `LLMContextSummarizationConfig` (default 120s) to prevent hung LLM calls from permanently blocking future summarizations. diff --git a/changelog/3855.added.4.md b/changelog/3855.added.4.md new file mode 100644 index 000000000..b712b4ac9 --- /dev/null +++ b/changelog/3855.added.4.md @@ -0,0 +1 @@ +- Added `on_summary_applied` event to `LLMContextSummarizer` for observability, providing message counts before and after context summarization. diff --git a/changelog/3855.added.md b/changelog/3855.added.md new file mode 100644 index 000000000..79d37eeba --- /dev/null +++ b/changelog/3855.added.md @@ -0,0 +1 @@ +- Added `summary_message_template` to `LLMContextSummarizationConfig` for customizing how summaries are formatted when injected into context (e.g., wrapping in XML tags). diff --git a/changelog/3855.changed.md b/changelog/3855.changed.md new file mode 100644 index 000000000..2eac6785a --- /dev/null +++ b/changelog/3855.changed.md @@ -0,0 +1 @@ +- Updated context summarization to use `user` role instead of `assistant` for summary messages. diff --git a/changelog/3857.fixed.md b/changelog/3857.fixed.md new file mode 100644 index 000000000..869c54111 --- /dev/null +++ b/changelog/3857.fixed.md @@ -0,0 +1 @@ +- Fixed `LocalSmartTurnAnalyzerV3` producing incorrect end-of-turn predictions at non-16kHz sample rates (e.g. 8kHz Twilio telephony) by adding automatic resampling to 16kHz before Whisper feature extraction. diff --git a/changelog/3863.added.2.md b/changelog/3863.added.2.md new file mode 100644 index 000000000..9c0ab90ba --- /dev/null +++ b/changelog/3863.added.2.md @@ -0,0 +1 @@ +- Added `LLMContextSummaryConfig` (summary generation params: `target_context_tokens`, `min_messages_after_summary`, `summarization_prompt`) and `LLMAutoContextSummarizationConfig` (auto-trigger thresholds: `max_context_tokens`, `max_unsummarized_messages`, plus a nested `summary_config`). These replace the monolithic `LLMContextSummarizationConfig`. diff --git a/changelog/3863.added.md b/changelog/3863.added.md new file mode 100644 index 000000000..d6214aed0 --- /dev/null +++ b/changelog/3863.added.md @@ -0,0 +1 @@ +- Added `LLMSummarizeContextFrame` to trigger on-demand context summarization from anywhere in the pipeline (e.g. a function call tool). Accepts an optional `config: LLMContextSummaryConfig` to override summary generation settings per request. diff --git a/changelog/3863.changed.md b/changelog/3863.changed.md new file mode 100644 index 000000000..faf5712d8 --- /dev/null +++ b/changelog/3863.changed.md @@ -0,0 +1 @@ +- ⚠️ Renamed `LLMAssistantAggregatorParams` fields: `enable_context_summarization` → `enable_auto_context_summarization` and `context_summarization_config` → `auto_context_summarization_config` (now accepts `LLMAutoContextSummarizationConfig`). The old names still work with a `DeprecationWarning` for one release cycle. diff --git a/changelog/3863.deprecated.md b/changelog/3863.deprecated.md new file mode 100644 index 000000000..ba2311fbd --- /dev/null +++ b/changelog/3863.deprecated.md @@ -0,0 +1 @@ +- Deprecated `LLMContextSummarizationConfig`. Use `LLMAutoContextSummarizationConfig` with a nested `LLMContextSummaryConfig` instead. The old class emits a `DeprecationWarning`. diff --git a/changelog/3865.changed.md b/changelog/3865.changed.md new file mode 100644 index 000000000..7a70eb0d7 --- /dev/null +++ b/changelog/3865.changed.md @@ -0,0 +1 @@ +- `ElevenLabsRealtimeSTTService` now sets `TranscriptionFrame.finalized` to `True` when using `CommitStrategy.MANUAL`. diff --git a/changelog/3867.fixed.md b/changelog/3867.fixed.md new file mode 100644 index 000000000..41ee584a2 --- /dev/null +++ b/changelog/3867.fixed.md @@ -0,0 +1 @@ +- Fixed `PipelineTask` double-inserting `RTVIProcessor` into the frame chain when the user provides both an `RTVIProcessor` in the pipeline and a custom `RTVIObserver` subclass in observers. diff --git a/changelog/3868.changed.md b/changelog/3868.changed.md new file mode 100644 index 000000000..4f019cca2 --- /dev/null +++ b/changelog/3868.changed.md @@ -0,0 +1 @@ +- Updated numba version pin from == to >=0.61.2 diff --git a/changelog/3873.added.md b/changelog/3873.added.md new file mode 100644 index 000000000..ed01b8e5d --- /dev/null +++ b/changelog/3873.added.md @@ -0,0 +1 @@ +- Added support for the `speed_alpha` parameter to the `arcana` model in `RimeTTSService`. diff --git a/changelog/3879.changed.md b/changelog/3879.changed.md new file mode 100644 index 000000000..2b69f63ce --- /dev/null +++ b/changelog/3879.changed.md @@ -0,0 +1 @@ +- Updated tracing code to use `ServiceSettings` dataclass API (`given_fields()`, attribute access) instead of dict-style access (`.items()`, `in`, subscript). diff --git a/changelog/3881.added.2.md b/changelog/3881.added.2.md new file mode 100644 index 000000000..a5bda94c1 --- /dev/null +++ b/changelog/3881.added.2.md @@ -0,0 +1 @@ +- Added `ClientConnectedFrame`, a new `SystemFrame` pushed by all transports (Daily, LiveKit, FastAPI WebSocket, WebSocket Server, SmallWebRTC, HeyGen, Tavus) when a client connects. Enables observers to track transport readiness timing. diff --git a/changelog/3881.added.3.md b/changelog/3881.added.3.md new file mode 100644 index 000000000..cad26e876 --- /dev/null +++ b/changelog/3881.added.3.md @@ -0,0 +1 @@ +Added `BotConnectedFrame` for SFU transports and `on_transport_timing_report` event to `StartupTimingObserver` with bot and client connection timing. diff --git a/changelog/3881.added.md b/changelog/3881.added.md new file mode 100644 index 000000000..cbf6d0293 --- /dev/null +++ b/changelog/3881.added.md @@ -0,0 +1 @@ +- Added `StartupTimingObserver` for measuring how long each processor's `start()` method takes during pipeline startup. Also measures transport readiness — the time from `StartFrame` to first client connection — via the `on_transport_readiness_measured` event. Useful for diagnosing cold start slowness and identifying initialization bottlenecks. diff --git a/changelog/3883.added.md b/changelog/3883.added.md new file mode 100644 index 000000000..84360a891 --- /dev/null +++ b/changelog/3883.added.md @@ -0,0 +1 @@ +- Added optional `direction` parameter to `PipelineTask.queue_frame()` and `PipelineTask.queue_frames()`, allowing frames to be pushed upstream from the end of the pipeline. diff --git a/changelog/3886.other.md b/changelog/3886.other.md new file mode 100644 index 000000000..0e9fdafed --- /dev/null +++ b/changelog/3886.other.md @@ -0,0 +1 @@ +- Standardized Sarvam STT/TTS User-Agent header handling to consistently send Pipecat SDK identity in websocket requests. \ No newline at end of file diff --git a/changelog/3888.fixed.md b/changelog/3888.fixed.md new file mode 100644 index 000000000..99e9ad0e0 --- /dev/null +++ b/changelog/3888.fixed.md @@ -0,0 +1 @@ +- Fixed turn completion instructions being lost when `LLMMessagesUpdateFrame` replaces the LLM context. When `filter_incomplete_user_turns` is enabled, the turn completion system message is now re-injected after context replacement. diff --git a/changelog/3893.fixed.md b/changelog/3893.fixed.md new file mode 100644 index 000000000..0209571e3 --- /dev/null +++ b/changelog/3893.fixed.md @@ -0,0 +1 @@ +- Fixed Azure TTS and STT services silently swallowing cancellation errors (invalid API key, network failures, rate limiting) instead of propagating them as `ErrorFrame`s to the pipeline. diff --git a/changelog/3896.added.md b/changelog/3896.added.md new file mode 100644 index 000000000..08921c004 --- /dev/null +++ b/changelog/3896.added.md @@ -0,0 +1 @@ +- Added `broadcast_interruption()` to `FrameProcessor`. This method pushes an `InterruptionFrame` both upstream and downstream directly from the calling processor, avoiding the round-trip through the pipeline task that `push_interruption_task_frame_and_wait()` required. diff --git a/changelog/3896.changed.md b/changelog/3896.changed.md new file mode 100644 index 000000000..3b7e4f807 --- /dev/null +++ b/changelog/3896.changed.md @@ -0,0 +1 @@ +- ⚠️ Removed `event` field and `complete()` method from `InterruptionFrame`. Removed `event` field from `InterruptionTaskFrame`. These are no longer needed since `broadcast_interruption()` does not require a round-trip completion signal. diff --git a/changelog/3896.deprecated.md b/changelog/3896.deprecated.md new file mode 100644 index 000000000..421e10e92 --- /dev/null +++ b/changelog/3896.deprecated.md @@ -0,0 +1 @@ +- Deprecated `push_interruption_task_frame_and_wait()` in `FrameProcessor`. Use `broadcast_interruption()` instead. The old method now delegates to `broadcast_interruption()` and logs a deprecation warning. diff --git a/docs/api/README.md b/docs/api/README.md index 22b62d45e..e181bc898 100644 --- a/docs/api/README.md +++ b/docs/api/README.md @@ -42,7 +42,7 @@ This script: - Creates a fresh virtual environment - Installs all dependencies as specified in requirements files -- Handles conflicting dependencies (like grpcio versions for Riva and PlayHT) +- Handles conflicting dependencies (like grpcio versions for Riva) - Builds the documentation in an isolated environment - Provides detailed logging of the build process @@ -74,7 +74,6 @@ start _build/html/index.html ├── index.rst # Main documentation entry point ├── requirements-base.txt # Base documentation dependencies ├── requirements-riva.txt # Riva-specific dependencies -├── requirements-playht.txt # PlayHT-specific dependencies ├── build-docs.sh # Local build script └── rtd-test.py # ReadTheDocs test build script ``` diff --git a/env.example b/env.example index da52b84dc..81f3f895d 100644 --- a/env.example +++ b/env.example @@ -104,6 +104,7 @@ INWORLD_API_KEY=... KRISP_MODEL_PATH=... # Krisp Viva +KRISP_VIVA_API_KEY=... KRISP_VIVA_FILTER_MODEL_PATH=... KRISP_VIVA_TURN_MODEL_PATH=... @@ -150,10 +151,6 @@ KOALA_ACCESS_KEY=... # Piper PIPER_BASE_URL=... -# PlayHT -PLAYHT_USER_ID=... -PLAYHT_API_KEY=... - # Plivo PLIVO_AUTH_ID=... PLIVO_AUTH_TOKEN=... diff --git a/examples/foundational/07-interruptible.py b/examples/foundational/07-interruptible.py index c5964506a..074e091ea 100644 --- a/examples/foundational/07-interruptible.py +++ b/examples/foundational/07-interruptible.py @@ -24,6 +24,7 @@ from pipecat.runner.utils import create_transport from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.tts_service import TextAggregationMode from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -56,6 +57,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): tts = CartesiaTTSService( api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + # Alternatively, you can use TextAggregationMode.TOKEN to stream tokens instead of + # sentencesfor faster response times. + # text_aggregation_mode=TextAggregationMode.TOKEN, ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) diff --git a/examples/foundational/07c-interruptible-deepgram-flux.py b/examples/foundational/07c-interruptible-deepgram-flux.py index e51a30c1b..d2bcceaf7 100644 --- a/examples/foundational/07c-interruptible-deepgram-flux.py +++ b/examples/foundational/07c-interruptible-deepgram-flux.py @@ -10,6 +10,7 @@ import os from dotenv import load_dotenv from loguru import logger +from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -72,7 +73,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): context = LLMContext(messages) user_aggregator, assistant_aggregator = LLMContextAggregatorPair( context, - user_params=LLMUserAggregatorParams(user_turn_strategies=ExternalUserTurnStrategies()), + user_params=LLMUserAggregatorParams( + user_turn_strategies=ExternalUserTurnStrategies(), + vad_analyzer=SileroVADAnalyzer(), + ), ) pipeline = Pipeline( diff --git a/examples/foundational/07g-interruptible-openai-http.py b/examples/foundational/07g-interruptible-openai-http.py index 325fd4ae4..65b2f8b9b 100644 --- a/examples/foundational/07g-interruptible-openai-http.py +++ b/examples/foundational/07g-interruptible-openai-http.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner diff --git a/examples/foundational/07p-interruptible-krisp-viva.py b/examples/foundational/07p-interruptible-krisp-viva.py index 259f02aa5..24929a825 100644 --- a/examples/foundational/07p-interruptible-krisp-viva.py +++ b/examples/foundational/07p-interruptible-krisp-viva.py @@ -31,6 +31,8 @@ from pipecat.audio.filters.krisp_viva_filter import KrispVivaFilter from pipecat.audio.turn.krisp_viva_turn import KrispVivaTurn from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame +from pipecat.metrics.metrics import TurnMetricsData +from pipecat.observers.loggers.metrics_log_observer import MetricsLogObserver from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -41,32 +43,37 @@ from pipecat.processors.aggregators.llm_response_universal import ( ) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService from pipecat.services.deepgram.stt import DeepgramSTTService -from pipecat.services.deepgram.tts import DeepgramTTSService from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy +from pipecat.turns.user_turn_strategies import UserTurnStrategies load_dotenv(override=True) # We use lambdas to defer transport parameter creation until the transport # type is selected at runtime. + +krisp_viva_filter = KrispVivaFilter() + transport_params = { "daily": lambda: DailyParams( audio_in_enabled=True, audio_out_enabled=True, - audio_in_filter=KrispVivaFilter(), + audio_in_filter=krisp_viva_filter, ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, - audio_in_filter=KrispVivaFilter(), + audio_in_filter=krisp_viva_filter, ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, - audio_in_filter=KrispVivaFilter(), + audio_in_filter=krisp_viva_filter, ), } @@ -76,7 +83,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY"), voice="aura-helios-en") + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), voice_id="71a7ad14-091c-4e8e-a314-022ece01c121" + ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) @@ -117,6 +126,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): enable_usage_metrics=True, ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + observers=[MetricsLogObserver(include_metrics={TurnMetricsData})], ) @transport.event_handler("on_client_connected") diff --git a/examples/foundational/07r-interruptible-nvidia.py b/examples/foundational/07r-interruptible-nvidia.py index 18e0b5d5f..d3e34c61f 100644 --- a/examples/foundational/07r-interruptible-nvidia.py +++ b/examples/foundational/07r-interruptible-nvidia.py @@ -55,7 +55,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = NvidiaSTTService(api_key=os.getenv("NVIDIA_API_KEY")) llm = NvidiaLLMService( - api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.1-405b-instruct" + api_key=os.getenv("NVIDIA_API_KEY"), + model="meta/llama-3.3-70b-instruct", ) tts = NvidiaTTSService(api_key=os.getenv("NVIDIA_API_KEY")) diff --git a/examples/foundational/14a-function-calling-anthropic.py b/examples/foundational/14a-function-calling-anthropic.py index 165d4b220..36030bc2b 100644 --- a/examples/foundational/14a-function-calling-anthropic.py +++ b/examples/foundational/14a-function-calling-anthropic.py @@ -72,10 +72,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady ) - llm = AnthropicLLMService( - api_key=os.getenv("ANTHROPIC_API_KEY"), - model="claude-3-7-sonnet-latest", - ) + llm = AnthropicLLMService(api_key=os.getenv("ANTHROPIC_API_KEY")) llm.register_function("get_weather", get_weather) llm.register_function("get_restaurant_recommendation", fetch_restaurant_recommendation) diff --git a/examples/foundational/14n-function-calling-perplexity.py b/examples/foundational/14n-function-calling-perplexity.py index 40041aa34..2f1a18d52 100644 --- a/examples/foundational/14n-function-calling-perplexity.py +++ b/examples/foundational/14n-function-calling-perplexity.py @@ -65,7 +65,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady ) - llm = PerplexityLLMService(api_key=os.getenv("PERPLEXITY_API_KEY"), model="sonar") + llm = PerplexityLLMService(api_key=os.getenv("PERPLEXITY_API_KEY")) messages = [ { diff --git a/examples/foundational/14o-function-calling-gemini-openai-format.py b/examples/foundational/14o-function-calling-gemini-openai-format.py index c87c5278e..c3772eb2c 100644 --- a/examples/foundational/14o-function-calling-gemini-openai-format.py +++ b/examples/foundational/14o-function-calling-gemini-openai-format.py @@ -12,12 +12,15 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame, TTSSpeakFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport @@ -42,20 +45,14 @@ transport_params = { "daily": lambda: DailyParams( audio_in_enabled=True, audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - turn_analyzer=LocalSmartTurnAnalyzerV3(), ), "twilio": lambda: FastAPIWebsocketParams( audio_in_enabled=True, audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - turn_analyzer=LocalSmartTurnAnalyzerV3(), ), "webrtc": lambda: TransportParams( audio_in_enabled=True, audio_out_enabled=True, - vad_analyzer=SileroVADAnalyzer(), - turn_analyzer=LocalSmartTurnAnalyzerV3(), ), } @@ -104,17 +101,20 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ] context = OpenAILLMContext(messages, tools) - context_aggregator = llm.create_context_aggregator(context) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) pipeline = Pipeline( [ transport.input(), stt, - context_aggregator.user(), + user_aggregator, llm, tts, transport.output(), - context_aggregator.assistant(), + assistant_aggregator, ] ) diff --git a/examples/foundational/14s-function-calling-sambanova.py b/examples/foundational/14s-function-calling-sambanova.py index 79c43a473..76eb390c0 100644 --- a/examples/foundational/14s-function-calling-sambanova.py +++ b/examples/foundational/14s-function-calling-sambanova.py @@ -70,10 +70,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady ) - llm = SambaNovaLLMService( - api_key=os.getenv("SAMBANOVA_API_KEY"), - model="Llama-4-Maverick-17B-128E-Instruct", - ) + llm = SambaNovaLLMService(api_key=os.getenv("SAMBANOVA_API_KEY")) # You can also register a function_name of None to get all functions # sent to the same callback with an additional function_name parameter. llm.register_function("get_current_weather", fetch_weather_from_api) diff --git a/examples/foundational/29-turn-tracking-observer.py b/examples/foundational/29-turn-tracking-observer.py index 321197db2..4af28f1ed 100644 --- a/examples/foundational/29-turn-tracking-observer.py +++ b/examples/foundational/29-turn-tracking-observer.py @@ -12,6 +12,7 @@ from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame +from pipecat.observers.startup_timing_observer import StartupTimingObserver from pipecat.observers.user_bot_latency_observer import UserBotLatencyObserver from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner @@ -87,8 +88,8 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): ] ) - # Create latency tracking observer latency_observer = UserBotLatencyObserver() + startup_observer = StartupTimingObserver() task = PipelineTask( pipeline, @@ -97,14 +98,25 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): enable_usage_metrics=True, ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, - observers=[latency_observer], + observers=[latency_observer, startup_observer], ) - # Log latency measurements using the event handler @latency_observer.event_handler("on_latency_measured") async def on_latency_measured(observer, latency_seconds): logger.info(f"⏱️ User-to-bot latency: {latency_seconds:.3f}s") + @startup_observer.event_handler("on_startup_timing_report") + async def on_startup_timing_report(observer, report): + logger.info(f"Total startup: {report.total_duration_secs:.3f}s") + for timing in report.processor_timings: + logger.info(f" {timing.processor_name}: {timing.duration_secs:.3f}s") + + @startup_observer.event_handler("on_transport_timing_report") + async def on_transport_timing_report(observer, report): + if report.bot_connected_secs is not None: + logger.info(f"Bot connected: {report.bot_connected_secs:.3f}s") + logger.info(f"Client connected: {report.client_connected_secs:.3f}s") + turn_observer = task.turn_tracking_observer if turn_observer: diff --git a/examples/foundational/35-pattern-pair-voice-switching.py b/examples/foundational/35-pattern-pair-voice-switching.py index 4b269ac3e..cacc04459 100644 --- a/examples/foundational/35-pattern-pair-voice-switching.py +++ b/examples/foundational/35-pattern-pair-voice-switching.py @@ -117,7 +117,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): # First flush any existing audio to finish the current context await tts.flush_audio() # Then set the new voice - tts.set_voice(VOICE_IDS[voice_name]) + await tts.set_voice(VOICE_IDS[voice_name]) logger.info(f"Switched to {voice_name} voice") else: logger.warning(f"Unknown voice: {voice_name}") diff --git a/examples/foundational/38b-smart-turn-local.py b/examples/foundational/38b-smart-turn-local.py index 2872a0e76..dc62010fb 100644 --- a/examples/foundational/38b-smart-turn-local.py +++ b/examples/foundational/38b-smart-turn-local.py @@ -12,6 +12,8 @@ from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer from pipecat.frames.frames import LLMRunFrame +from pipecat.metrics.metrics import TurnMetricsData +from pipecat.observers.loggers.metrics_log_observer import MetricsLogObserver from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -77,7 +79,6 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): pipeline = Pipeline( [ transport.input(), # Transport user input - rtvi, stt, user_aggregator, # User responses llm, # LLM @@ -94,17 +95,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): enable_usage_metrics=True, ), idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + observers=[MetricsLogObserver(include_metrics={TurnMetricsData})], ) - @task.rtvi.event_handler("on_client_ready") - async def on_client_ready(rtvi): - # Kick off the conversation - messages.append({"role": "system", "content": "Please introduce yourself to the user."}) - await task.queue_frames([LLMRunFrame()]) - @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") + # Kick off the conversation + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/50-ultravox-realtime.py b/examples/foundational/50-ultravox-realtime.py index 5038cbb4c..0908c518c 100644 --- a/examples/foundational/50-ultravox-realtime.py +++ b/examples/foundational/50-ultravox-realtime.py @@ -12,11 +12,18 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, + LLMUserAggregatorParams, + UserTurnStoppedMessage, +) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport from pipecat.services.llm_service import FunctionCallParams @@ -24,6 +31,8 @@ from pipecat.services.ultravox.llm import OneShotInputParams, UltravoxRealtimeLL from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.user_stop import SpeechTimeoutUserTurnStopStrategy +from pipecat.turns.user_turn_strategies import UserTurnStrategies # Load environment variables load_dotenv(override=True) @@ -168,8 +177,21 @@ There is also a secret menu that changes daily. If the user asks about it, use t llm.register_function("get_secret_menu", get_secret_menu) - # Necessary to complete the function call lifecycle in Pipecat. - user_aggregator, assistant_aggregator = LLMContextAggregatorPair(LLMContext([])) + context = LLMContext([]) + + # Necessary to complete the function call lifecycle in Pipecat and + # to produce user and assistant turn stopped events. + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams( + user_turn_strategies=UserTurnStrategies( + stop=[SpeechTimeoutUserTurnStopStrategy()], + ), + # Set the VAD analyzer to create reliable TTFB measurements and + # user stop events. + vad_analyzer=SileroVADAnalyzer(), + ), + ) # Build the pipeline pipeline = Pipeline( @@ -177,8 +199,8 @@ There is also a secret menu that changes daily. If the user asks about it, use t transport.input(), user_aggregator, llm, - assistant_aggregator, transport.output(), + assistant_aggregator, ] ) @@ -203,6 +225,18 @@ There is also a secret menu that changes daily. If the user asks about it, use t logger.info(f"Client disconnected") await task.cancel() + @user_aggregator.event_handler("on_user_turn_stopped") + async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}user: {message.content}" + logger.info(f"Transcript: {line}") + + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + # Run the pipeline runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) await runner.run(task) diff --git a/examples/foundational/50a-ultravox-realtime-text.py b/examples/foundational/50a-ultravox-realtime-text.py new file mode 100644 index 000000000..8b876048a --- /dev/null +++ b/examples/foundational/50a-ultravox-realtime-text.py @@ -0,0 +1,263 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import datetime +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.adapters.schemas.function_schema import FunctionSchema +from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.audio.vad.vad_analyzer import VADParams +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, + LLMUserAggregatorParams, + UserTurnStoppedMessage, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.inworld.tts import InworldTTSService +from pipecat.services.llm_service import FunctionCallParams +from pipecat.services.ultravox.llm import OneShotInputParams, UltravoxRealtimeLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.user_stop import SpeechTimeoutUserTurnStopStrategy +from pipecat.turns.user_turn_strategies import UserTurnStrategies + +# Load environment variables +load_dotenv(override=True) + + +# We use lambdas to defer transport parameter creation until the transport +# type is selected at runtime. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def get_secret_menu(params: FunctionCallParams): + category = params.arguments.get("category", "both") + logger.debug(f"Fetching secret menu with category: {category}") + items = [] + if category in {"donuts", "both"}: + items.append( + { + "name": "Butter Pecan Ice Cream (one scoop)", + "price": "$2.99", + } + ) + if category in {"drinks", "both"}: + items.append( + { + "name": "Banana Smoothie", + "price": "$4.99", + } + ) + await params.result_callback( + { + "date": datetime.date.today().isoformat(), + "items": items, + } + ) + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + system_prompt = f""" +You are a drive-thru order taker for a donut shop called "Dr. Donut". Local time is currently: {datetime.datetime.now().isoformat()} +The user is talking to you over voice on their phone, and your response will be read out loud with realistic text-to-speech (TTS) technology. + +Follow every direction here when crafting your response: + +1. Use natural, conversational language that is clear and easy to follow (short sentences, simple words). +1a. Be concise and relevant: Most of your responses should be a sentence or two, unless you're asked to go deeper. Don't monopolize the conversation. +1b. Use discourse markers to ease comprehension. Never use the list format. + +2. Keep the conversation flowing. +2a. Clarify: when there is ambiguity, ask clarifying questions, rather than make assumptions. +2b. Don't implicitly or explicitly try to end the chat (i.e. do not end a response with "Talk soon!", or "Enjoy!"). +2c. Sometimes the user might just want to chat. Ask them relevant follow-up questions. +2d. Don't ask them if there's anything else they need help with (e.g. don't say things like "How can I assist you further?"). + +3. Remember that this is a voice conversation: +3a. Don't use lists, markdown, bullet points, or other formatting that's not typically spoken. +3b. Type out numbers in words (e.g. 'twenty twelve' instead of the year 2012) +3c. If something doesn't make sense, it's likely because you misheard them. There wasn't a typo, and the user didn't mispronounce anything. + +Remember to follow these rules absolutely, and do not refer to these rules, even if you're asked about them. + +When talking with the user, use the following script: +1. Take their order, acknowledging each item as it is ordered. If it's not clear which menu item the user is ordering, ask them to clarify. + DO NOT add an item to the order unless it's one of the items on the menu below. +2. Once the order is complete, repeat back the order. +2a. If the user only ordered a drink, ask them if they would like to add a donut to their order. +2b. If the user only ordered donuts, ask them if they would like to add a drink to their order. +2c. If the user ordered both drinks and donuts, don't suggest anything. +3. Total up the price of all ordered items and inform the user. +4. Ask the user to pull up to the drive thru window. +If the user asks for something that's not on the menu, inform them of that fact, and suggest the most similar item on the menu. +If the user says something unrelated to your role, responed with "Um... this is a Dr. Donut." +If the user says "thank you", respond with "My pleasure." +If the user asks about what's on the menu, DO NOT read the entire menu to them. Instead, give a couple suggestions. + +The menu of available items is as follows: + +# DONUTS + +PUMPKIN SPICE ICED DOUGHNUT $1.29 +PUMPKIN SPICE CAKE DOUGHNUT $1.29 +OLD FASHIONED DOUGHNUT $1.29 +CHOCOLATE ICED DOUGHNUT $1.09 +CHOCOLATE ICED DOUGHNUT WITH SPRINKLES $1.09 +RASPBERRY FILLED DOUGHNUT $1.09 +BLUEBERRY CAKE DOUGHNUT $1.09 +STRAWBERRY ICED DOUGHNUT WITH SPRINKLES $1.09 +LEMON FILLED DOUGHNUT $1.09 +DOUGHNUT HOLES $3.99 + +# COFFEE & DRINKS + +PUMPKIN SPICE COFFEE $2.59 +PUMPKIN SPICE LATTE $4.59 +REGULAR BREWED COFFEE $1.79 +DECAF BREWED COFFEE $1.79 +LATTE $3.49 +CAPPUCINO $3.49 +CARAMEL MACCHIATO $3.49 +MOCHA LATTE $3.49 +CARAMEL MOCHA LATTE $3.49 + +There is also a secret menu that changes daily. If the user asks about it, use the get_secret_menu tool to look up today's secret menu items. +""" + + secret_menu_function = FunctionSchema( + name="get_secret_menu", + description="Get today's secret menu items", + properties={ + "category": { + "type": "string", + "enum": ["donuts", "drinks", "both"], + "description": "The category of secret menu items to retrieve. Defaults to both.", + }, + }, + required=[], + ) + + llm = UltravoxRealtimeLLMService( + params=OneShotInputParams( + api_key=os.getenv("ULTRAVOX_API_KEY"), + system_prompt=system_prompt, + temperature=0.3, + max_duration=datetime.timedelta(minutes=3), + output_medium="text", + ), + one_shot_selected_tools=ToolsSchema(standard_tools=[secret_menu_function]), + ) + + llm.register_function("get_secret_menu", get_secret_menu) + + tts = InworldTTSService( + api_key=os.getenv("INWORLD_API_KEY", ""), + voice_id="Ashley", + model="inworld-tts-1", + temperature=1.1, + ) + + context = LLMContext([]) + + # Necessary to complete the function call lifecycle in Pipecat and + # to produce user and assistant turn stopped events. + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams( + user_turn_strategies=UserTurnStrategies( + stop=[SpeechTimeoutUserTurnStopStrategy()], + ), + # Set the VAD analyzer to emulate timing of the model. + vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.5)), + ), + ) + + # Build the pipeline + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + # Configure the pipeline task + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + # Handle client connection event + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + + # Handle client disconnection events + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + @user_aggregator.event_handler("on_user_turn_stopped") + async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}user: {message.content}" + logger.info(f"Transcript: {line}") + + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + + # Run the pipeline + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/52-live-translation.py b/examples/foundational/52-live-translation.py index 30583c1b8..861d23e37 100644 --- a/examples/foundational/52-live-translation.py +++ b/examples/foundational/52-live-translation.py @@ -11,6 +11,7 @@ from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import TTSSpeakFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -110,6 +111,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") + await task.queue_frames( + [ + TTSSpeakFrame( + text="Hello, welcome to live translation. Everything you say will be automatically translated to Spanish. Let's begin!", + append_to_context=True, + ), + ] + ) @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): diff --git a/examples/foundational/54-context-summarization-openai.py b/examples/foundational/54-context-summarization-openai.py index 652a3af13..ff6701bec 100644 --- a/examples/foundational/54-context-summarization-openai.py +++ b/examples/foundational/54-context-summarization-openai.py @@ -20,14 +20,13 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_context_summarizer import SummaryAppliedEvent from pipecat.processors.aggregators.llm_response_universal import ( LLMAssistantAggregatorParams, LLMContextAggregatorPair, @@ -42,9 +41,10 @@ from pipecat.services.openai.llm import OpenAILLMService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams -from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy -from pipecat.turns.user_turn_strategies import UserTurnStrategies -from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig +from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, + LLMContextSummaryConfig, +) load_dotenv(override=True) @@ -120,24 +120,36 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): user_aggregator, assistant_aggregator = LLMContextAggregatorPair( context, user_params=LLMUserAggregatorParams( - user_turn_strategies=UserTurnStrategies( - stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] - ), - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + vad_analyzer=SileroVADAnalyzer(), ), assistant_params=LLMAssistantAggregatorParams( - enable_context_summarization=True, + enable_auto_context_summarization=True, # Optional: customize context summarization behavior # Using low limits to demonstrate the feature quickly - context_summarization_config=LLMContextSummarizationConfig( + auto_context_summarization_config=LLMAutoContextSummarizationConfig( max_context_tokens=1000, # Trigger summarization at 1000 tokens - target_context_tokens=800, # Target context size for the summarization max_unsummarized_messages=10, # Or when 10 new messages accumulate - min_messages_after_summary=2, # Keep last 2 messages uncompressed + summary_config=LLMContextSummaryConfig( + target_context_tokens=800, # Target context size for the summarization + min_messages_after_summary=2, # Keep last 2 messages uncompressed + ), ), ), ) + # Listen for summarization events + summarizer = assistant_aggregator._summarizer + if summarizer: + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event: SummaryAppliedEvent): + logger.info( + f"Context summarized: {event.original_message_count} messages -> " + f"{event.new_message_count} messages " + f"({event.summarized_message_count} summarized, " + f"{event.preserved_message_count} preserved)" + ) + pipeline = Pipeline( [ transport.input(), # Transport user input diff --git a/examples/foundational/54a-context-summarization-google.py b/examples/foundational/54a-context-summarization-google.py index a7fe4ba5e..7d2a91310 100644 --- a/examples/foundational/54a-context-summarization-google.py +++ b/examples/foundational/54a-context-summarization-google.py @@ -20,14 +20,13 @@ from loguru import logger from pipecat.adapters.schemas.function_schema import FunctionSchema from pipecat.adapters.schemas.tools_schema import ToolsSchema -from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3 from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.frames.frames import LLMRunFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_context_summarizer import SummaryAppliedEvent from pipecat.processors.aggregators.llm_response_universal import ( LLMAssistantAggregatorParams, LLMContextAggregatorPair, @@ -42,9 +41,10 @@ from pipecat.services.llm_service import FunctionCallParams from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams -from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy -from pipecat.turns.user_turn_strategies import UserTurnStrategies -from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig +from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, + LLMContextSummaryConfig, +) load_dotenv(override=True) @@ -120,24 +120,36 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): user_aggregator, assistant_aggregator = LLMContextAggregatorPair( context, user_params=LLMUserAggregatorParams( - user_turn_strategies=UserTurnStrategies( - stop=[TurnAnalyzerUserTurnStopStrategy(turn_analyzer=LocalSmartTurnAnalyzerV3())] - ), - vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)), + vad_analyzer=SileroVADAnalyzer(), ), assistant_params=LLMAssistantAggregatorParams( - enable_context_summarization=True, + enable_auto_context_summarization=True, # Optional: customize context summarization behavior # Using low limits to demonstrate the feature quickly - context_summarization_config=LLMContextSummarizationConfig( + auto_context_summarization_config=LLMAutoContextSummarizationConfig( max_context_tokens=1000, # Trigger summarization at 1000 tokens - target_context_tokens=800, # Target context size for the summarization max_unsummarized_messages=10, # Or when 10 new messages accumulate - min_messages_after_summary=2, # Keep last 2 messages uncompressed + summary_config=LLMContextSummaryConfig( + target_context_tokens=800, # Target context size for the summarization + min_messages_after_summary=2, # Keep last 2 messages uncompressed + ), ), ), ) + # Listen for summarization events + summarizer = assistant_aggregator._summarizer + if summarizer: + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event: SummaryAppliedEvent): + logger.info( + f"Context summarized: {event.original_message_count} messages -> " + f"{event.new_message_count} messages " + f"({event.summarized_message_count} summarized, " + f"{event.preserved_message_count} preserved)" + ) + pipeline = Pipeline( [ transport.input(), # Transport user input diff --git a/examples/foundational/54b-context-summarization-manual-openai.py b/examples/foundational/54b-context-summarization-manual-openai.py new file mode 100644 index 000000000..c1ff83ef0 --- /dev/null +++ b/examples/foundational/54b-context-summarization-manual-openai.py @@ -0,0 +1,172 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Example demonstrating manual context summarization via a function call. + +This example shows how to trigger context summarization on demand rather than +automatically. The user can ask the bot to "summarize the conversation" and the +bot will call a function that pushes an LLMSummarizeContextFrame into the +pipeline, causing the LLM service to compress the conversation history. + +Unlike example 54, automatic summarization is NOT enabled here. Summarization +only happens when the user explicitly requests it through the function call. +""" + +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.adapters.schemas.function_schema import FunctionSchema +from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMSummarizeContextFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.llm_service import FunctionCallParams +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.turns.user_stop import TurnAnalyzerUserTurnStopStrategy +from pipecat.turns.user_turn_strategies import UserTurnStrategies + +load_dotenv(override=True) + +# We use lambdas to defer transport parameter creation until the transport +# type is selected at runtime. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def summarize_conversation(params: FunctionCallParams): + """Trigger manual context summarization via a pipeline frame.""" + logger.info("Tool called: summarize_conversation") + await params.result_callback({"status": "summarization_requested"}) + await params.llm.queue_frame(LLMSummarizeContextFrame()) + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info("Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + llm.register_function("summarize_conversation", summarize_conversation) + + summarize_function = FunctionSchema( + name="summarize_conversation", + description=( + "Summarize and compress the conversation history. " + "Call this when the user asks you to summarize the conversation " + "or when you want to free up context space." + ), + properties={}, + required=[], + ) + tools = ToolsSchema(standard_tools=[summarize_function]) + + messages = [ + { + "role": "system", + "content": ( + "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your " + "capabilities in a succinct way. Your output will be spoken aloud, so avoid " + "special characters that can't easily be spoken, such as emojis or bullet points. " + "Respond to what the user said in a creative and helpful way. " + "If the user asks you to summarize the conversation, call the " + "summarize_conversation function. After summarization, briefly acknowledge " + "that the conversation history has been compressed." + ), + }, + ] + + context = LLMContext(messages, tools=tools) + + # Automatic summarization is NOT enabled here (enable_auto_context_summarization + # defaults to False). The summarizer is still created internally so that + # LLMSummarizeContextFrame frames pushed via the function call are handled. + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), # Transport user input + stt, + user_aggregator, # User responses + llm, # LLM + tts, # TTS + transport.output(), # Transport bot output + assistant_aggregator, # Assistant spoken responses + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info("Client connected") + # Kick off the conversation. + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info("Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/54c-context-summarization-dedicated-llm.py b/examples/foundational/54c-context-summarization-dedicated-llm.py new file mode 100644 index 000000000..1dce3890f --- /dev/null +++ b/examples/foundational/54c-context-summarization-dedicated-llm.py @@ -0,0 +1,236 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Example demonstrating advanced context summarization configuration. + +This example shows how to customize context summarization with: +- A dedicated cheap/fast LLM for generating summaries (Gemini Flash) +- A custom summary message template (XML tags) +- A custom summarization prompt +- A summarization timeout +- The on_summary_applied event for observability +""" + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.adapters.schemas.function_schema import FunctionSchema +from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_context_summarizer import SummaryAppliedEvent +from pipecat.processors.aggregators.llm_response_universal import ( + LLMAssistantAggregatorParams, + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google import GoogleLLMService +from pipecat.services.llm_service import FunctionCallParams +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams +from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, + LLMContextSummaryConfig, +) + +load_dotenv(override=True) + +# We use lambdas to defer transport parameter creation until the transport +# type is selected at runtime. +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + +# Custom summarization prompt tailored to the application +CUSTOM_SUMMARIZATION_PROMPT = """Summarize this conversation, preserving: +- Key decisions and agreements +- Important facts and user preferences +- Any pending action items or unresolved questions + +Be concise. Use clear, factual statements grouped by topic. +Omit greetings, small talk, and resolved tangents.""" + + +# Tool functions for the LLM +async def get_current_weather(params: FunctionCallParams): + """Get the current weather.""" + logger.info("Tool called: get_current_weather") + await asyncio.sleep(1) # Simulate some processing + await params.result_callback({"conditions": "nice", "temperature": "75"}) + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info("Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + # Primary LLM for conversation (could be any provider) + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + # Dedicated cheap/fast LLM for summarization only + summarization_llm = GoogleLLMService( + api_key=os.getenv("GOOGLE_API_KEY"), + model="gemini-2.5-flash", + ) + + # Register tool functions + llm.register_function("get_current_weather", get_current_weather) + + weather_function = FunctionSchema( + name="get_current_weather", + description="Get the current weather", + properties={ + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "format": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "The temperature unit to use. Infer this from the user's location.", + }, + }, + required=["location", "format"], + ) + tools = ToolsSchema(standard_tools=[weather_function]) + + messages = [ + { + "role": "system", + "content": ( + "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate " + "your capabilities in a succinct way. Your output will be spoken aloud, " + "so avoid special characters that can't easily be spoken. Respond to what " + "the user said in a creative and helpful way. You have access to tools to " + "get the current weather - use them when relevant.\n\n" + "When you see a block, it contains a compressed summary " + "of earlier conversation. Use it as reference but don't mention it to the user." + ), + }, + ] + + context = LLMContext(messages, tools=tools) + + # Create aggregators with custom summarization + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams( + vad_analyzer=SileroVADAnalyzer(), + ), + assistant_params=LLMAssistantAggregatorParams( + enable_auto_context_summarization=True, + auto_context_summarization_config=LLMAutoContextSummarizationConfig( + # Trigger thresholds (low values to demonstrate quickly) + max_context_tokens=1000, + max_unsummarized_messages=10, + summary_config=LLMContextSummaryConfig( + # Summary generation + target_context_tokens=800, + min_messages_after_summary=2, + summarization_prompt=CUSTOM_SUMMARIZATION_PROMPT, + # Custom summary format - wrap in XML tags so the system + # prompt can identify summaries vs. live conversation + summary_message_template="\n{summary}\n", + # Use a dedicated cheap LLM for summarization instead of + # the primary conversation model + llm=summarization_llm, + # Cancel summarization if it takes longer than 60 seconds + summarization_timeout=60.0, + ), + ), + ), + ) + + # Listen for summarization events + summarizer = assistant_aggregator._summarizer + if summarizer: + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event: SummaryAppliedEvent): + logger.info( + f"Context summarized: {event.original_message_count} messages -> " + f"{event.new_message_count} messages " + f"({event.summarized_message_count} summarized, " + f"{event.preserved_message_count} preserved)" + ) + + pipeline = Pipeline( + [ + transport.input(), # Transport user input + stt, + user_aggregator, # User responses + llm, # LLM + tts, # TTS + transport.output(), # Transport bot output + assistant_aggregator, # Assistant spoken responses + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info("Client connected") + # Kick off the conversation. + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info("Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55a-update-settings-deepgram-flux-stt.py b/examples/foundational/55a-update-settings-deepgram-flux-stt.py new file mode 100644 index 000000000..a482e513c --- /dev/null +++ b/examples/foundational/55a-update-settings-deepgram-flux-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.flux.stt import DeepgramFluxSTTService, DeepgramFluxSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramFluxSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Deepgram Flux STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=DeepgramFluxSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py new file mode 100644 index 000000000..e8094183a --- /dev/null +++ b/examples/foundational/55a-update-settings-deepgram-sagemaker-stt.py @@ -0,0 +1,148 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from deepgram import LiveOptions +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt_sagemaker import ( + DeepgramSageMakerSTTService, + DeepgramSageMakerSTTSettings, +) +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSageMakerSTTService( + endpoint_name=os.getenv("SAGEMAKER_STT_ENDPOINT_NAME"), + region=os.getenv("AWS_REGION"), + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + # NOTE: after this change, the bot will only respond if you speak Spanish + await asyncio.sleep(10) + logger.info("Updating Deepgram SageMaker STT settings: language=es, punctuate=False") + await task.queue_frame( + STTUpdateSettingsFrame( + delta=DeepgramSageMakerSTTSettings( + language=Language.ES, + live_options=LiveOptions(punctuate=False), + ) + ) + ) + + # Old-style dict update (for backward-compat testing): + # await asyncio.sleep(10) + # logger.info("Updating Deepgram SageMaker STT settings via dict: punctuate=False, filler_words=True") + # await task.queue_frame( + # STTUpdateSettingsFrame(settings={"punctuate": False, "filler_words": True}) + # ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55a-update-settings-deepgram-stt.py b/examples/foundational/55a-update-settings-deepgram-stt.py new file mode 100644 index 000000000..8808f6f4c --- /dev/null +++ b/examples/foundational/55a-update-settings-deepgram-stt.py @@ -0,0 +1,142 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from deepgram import LiveOptions +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService, DeepgramSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + # NOTE: after this change, the bot will only respond if you speak Spanish + await asyncio.sleep(10) + logger.info("Updating Deepgram STT settings: language=es, punctuate=False") + await task.queue_frame( + STTUpdateSettingsFrame( + delta=DeepgramSTTSettings( + language=Language.ES, + live_options=LiveOptions(punctuate=False), + ) + ) + ) + + # Old-style dict update (for backward-compat testing): + # await asyncio.sleep(10) + # logger.info("Updating Deepgram STT settings via dict: punctuate=False, filler_words=True") + # await task.queue_frame( + # STTUpdateSettingsFrame(settings={"punctuate": False, "filler_words": True}) + # ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55b-update-settings-azure-stt.py b/examples/foundational/55b-update-settings-azure-stt.py new file mode 100644 index 000000000..96e4041d0 --- /dev/null +++ b/examples/foundational/55b-update-settings-azure-stt.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.azure.stt import AzureSTTService, AzureSTTSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = AzureSTTService( + api_key=os.getenv("AZURE_SPEECH_API_KEY"), + region=os.getenv("AZURE_SPEECH_REGION"), + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Azure STT settings: language=es") + await task.queue_frame(STTUpdateSettingsFrame(delta=AzureSTTSettings(language=Language.ES))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55c-update-settings-google-stt.py b/examples/foundational/55c-update-settings-google-stt.py new file mode 100644 index 000000000..dede5b173 --- /dev/null +++ b/examples/foundational/55c-update-settings-google-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.google.stt import GoogleSTTService, GoogleSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = GoogleSTTService(credentials=os.getenv("GOOGLE_TEST_CREDENTIALS")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Google STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=GoogleSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55d-update-settings-assemblyai-stt.py b/examples/foundational/55d-update-settings-assemblyai-stt.py new file mode 100644 index 000000000..d37c3ec7b --- /dev/null +++ b/examples/foundational/55d-update-settings-assemblyai-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.assemblyai.stt import AssemblyAISTTService, AssemblyAISTTSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = AssemblyAISTTService(api_key=os.getenv("ASSEMBLYAI_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating AssemblyAI STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=AssemblyAISTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55e-update-settings-gladia-stt.py b/examples/foundational/55e-update-settings-gladia-stt.py new file mode 100644 index 000000000..e5bd5486a --- /dev/null +++ b/examples/foundational/55e-update-settings-gladia-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.gladia.stt import GladiaSTTService, GladiaSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = GladiaSTTService(api_key=os.getenv("GLADIA_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Gladia STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=GladiaSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py b/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py new file mode 100644 index 000000000..c3f0a6325 --- /dev/null +++ b/examples/foundational/55f-update-settings-elevenlabs-realtime-stt.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.elevenlabs.stt import ( + ElevenLabsRealtimeSTTService, + ElevenLabsRealtimeSTTSettings, +) +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = ElevenLabsRealtimeSTTService(api_key=os.getenv("ELEVENLABS_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating ElevenLabs Realtime STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=ElevenLabsRealtimeSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55g-update-settings-elevenlabs-stt.py b/examples/foundational/55g-update-settings-elevenlabs-stt.py new file mode 100644 index 000000000..9435bc1ac --- /dev/null +++ b/examples/foundational/55g-update-settings-elevenlabs-stt.py @@ -0,0 +1,133 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.elevenlabs.stt import ElevenLabsSTTService, ElevenLabsSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = ElevenLabsSTTService( + api_key=os.getenv("ELEVENLABS_API_KEY"), + aiohttp_session=session, + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating ElevenLabs STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=ElevenLabsSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55h-update-settings-speechmatics-stt.py b/examples/foundational/55h-update-settings-speechmatics-stt.py new file mode 100644 index 000000000..c362d2f9f --- /dev/null +++ b/examples/foundational/55h-update-settings-speechmatics-stt.py @@ -0,0 +1,153 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.speechmatics.stt import SpeechmaticsSTTService, SpeechmaticsSTTSettings +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = SpeechmaticsSTTService( + api_key=os.getenv("SPEECHMATICS_API_KEY"), + params=SpeechmaticsSTTService.InputParams( + enable_diarization=True, + speaker_active_format="<{speaker_id}>{text}", + speaker_passive_format="<{speaker_id}>{text}", + ), + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Speechmatics STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=SpeechmaticsSTTSettings(language=Language.ES)) + ) + + await asyncio.sleep(10) + logger.info("Updating Speechmatics STT settings: focus_speakers=['S1']") + await task.queue_frame( + STTUpdateSettingsFrame(delta=SpeechmaticsSTTSettings(focus_speakers=["S1"])) + ) + + await asyncio.sleep(10) + logger.info( + "Updating Speechmatics STT settings: speaker_active_format={text}" + ) + await task.queue_frame( + STTUpdateSettingsFrame( + delta=SpeechmaticsSTTSettings( + speaker_active_format="{text}" + ) + ) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55i-update-settings-whisper-api-stt.py b/examples/foundational/55i-update-settings-whisper-api-stt.py new file mode 100644 index 000000000..741601c83 --- /dev/null +++ b/examples/foundational/55i-update-settings-whisper-api-stt.py @@ -0,0 +1,132 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.openai.stt import OpenAISTTService +from pipecat.services.whisper.base_stt import BaseWhisperSTTSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + # This file is meant to exercise Whisper API-based STT services, so we use + # OpenAI's Whisper STT as an example here. Here we could've also used: + # - SambaNova + # - Groq + stt = OpenAISTTService( + api_key=os.getenv("OPENAI_API_KEY"), + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating OpenAI STT settings: language="es"') + await task.queue_frame(STTUpdateSettingsFrame(delta=BaseWhisperSTTSettings(language="es"))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55j-update-settings-sarvam-stt.py b/examples/foundational/55j-update-settings-sarvam-stt.py new file mode 100644 index 000000000..cab9656f8 --- /dev/null +++ b/examples/foundational/55j-update-settings-sarvam-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.sarvam.stt import SarvamSTTService, SarvamSTTSettings +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = SarvamSTTService(api_key=os.getenv("SARVAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Sarvam STT settings: language=en-IN") + await task.queue_frame( + STTUpdateSettingsFrame(delta=SarvamSTTSettings(language=Language.EN_IN)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55k-update-settings-soniox-stt.py b/examples/foundational/55k-update-settings-soniox-stt.py new file mode 100644 index 000000000..85b5d2ba4 --- /dev/null +++ b/examples/foundational/55k-update-settings-soniox-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.soniox.stt import SonioxSTTService, SonioxSTTSettings +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = SonioxSTTService(api_key=os.getenv("SONIOX_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Soniox STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=SonioxSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55l-update-settings-aws-transcribe-stt.py b/examples/foundational/55l-update-settings-aws-transcribe-stt.py new file mode 100644 index 000000000..3bfeb2faf --- /dev/null +++ b/examples/foundational/55l-update-settings-aws-transcribe-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.aws.stt import AWSTranscribeSTTService, AWSTranscribeSTTSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = AWSTranscribeSTTService() + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating AWS Transcribe STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=AWSTranscribeSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55m-update-settings-cartesia-stt.py b/examples/foundational/55m-update-settings-cartesia-stt.py new file mode 100644 index 000000000..a87847a5a --- /dev/null +++ b/examples/foundational/55m-update-settings-cartesia-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.stt import CartesiaSTTService, CartesiaSTTSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Cartesia STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=CartesiaSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55n-update-settings-cartesia-http-tts.py b/examples/foundational/55n-update-settings-cartesia-http-tts.py new file mode 100644 index 000000000..02d3bca2a --- /dev/null +++ b/examples/foundational/55n-update-settings-cartesia-http-tts.py @@ -0,0 +1,133 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import ( + CartesiaHttpTTSService, + CartesiaTTSSettings, + GenerationConfig, +) +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaHttpTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Cartesia HTTP TTS settings: speed increased to 1.5") + await task.queue_frame( + TTSUpdateSettingsFrame( + delta=CartesiaTTSSettings(generation_config=GenerationConfig(speed=1.5)) + ) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/07e-interruptible-playht-http.py b/examples/foundational/55n-update-settings-cartesia-tts.py similarity index 85% rename from examples/foundational/07e-interruptible-playht-http.py rename to examples/foundational/55n-update-settings-cartesia-tts.py index c56de3b9f..04e9d8fee 100644 --- a/examples/foundational/07e-interruptible-playht-http.py +++ b/examples/foundational/55n-update-settings-cartesia-tts.py @@ -4,14 +4,14 @@ # SPDX-License-Identifier: BSD 2-Clause License # - +import asyncio import os from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import LLMRunFrame +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -22,9 +22,9 @@ from pipecat.processors.aggregators.llm_response_universal import ( ) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService, CartesiaTTSSettings, GenerationConfig from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.services.playht.tts import PlayHTHttpTTSService from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -54,10 +54,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - tts = PlayHTHttpTTSService( - user_id=os.getenv("PLAYHT_USER_ID"), - api_key=os.getenv("PLAYHT_API_KEY"), - voice_url="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json", + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) @@ -103,6 +102,14 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) + await asyncio.sleep(10) + logger.info("Updating Cartesia TTS settings: speed increased to 1.5") + await task.queue_frame( + TTSUpdateSettingsFrame( + delta=CartesiaTTSSettings(generation_config=GenerationConfig(speed=1.5)) + ) + ) + @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") diff --git a/examples/foundational/55o-update-settings-elevenlabs-http-tts.py b/examples/foundational/55o-update-settings-elevenlabs-http-tts.py new file mode 100644 index 000000000..2ca51730f --- /dev/null +++ b/examples/foundational/55o-update-settings-elevenlabs-http-tts.py @@ -0,0 +1,132 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.elevenlabs.tts import ElevenLabsHttpTTSService, ElevenLabsHttpTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = ElevenLabsHttpTTSService( + api_key=os.getenv("ELEVENLABS_API_KEY"), + voice_id=os.getenv("ELEVENLABS_VOICE_ID"), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating ElevenLabs TTS settings: speed=0.7") + await task.queue_frame( + TTSUpdateSettingsFrame(delta=ElevenLabsHttpTTSSettings(speed=0.7)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55o-update-settings-elevenlabs-tts.py b/examples/foundational/55o-update-settings-elevenlabs-tts.py new file mode 100644 index 000000000..ddbfd8b8f --- /dev/null +++ b/examples/foundational/55o-update-settings-elevenlabs-tts.py @@ -0,0 +1,134 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.elevenlabs.tts import ElevenLabsTTSService, ElevenLabsTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = ElevenLabsTTSService( + api_key=os.getenv("ELEVENLABS_API_KEY"), + voice_id=os.getenv("ELEVENLABS_VOICE_ID"), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating ElevenLabs TTS settings: speed=0.7") + await task.queue_frame(TTSUpdateSettingsFrame(delta=ElevenLabsTTSSettings(speed=0.7))) + + await asyncio.sleep(10) + logger.info("Updating ElevenLabs TTS settings: switching to a different voice") + await task.queue_frame( + TTSUpdateSettingsFrame( + delta=ElevenLabsTTSSettings(voice=os.getenv("ELEVENLABS_VOICE_ID_ALT")) + ) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55p-update-settings-openai-tts.py b/examples/foundational/55p-update-settings-openai-tts.py new file mode 100644 index 000000000..fcc24fb76 --- /dev/null +++ b/examples/foundational/55p-update-settings-openai-tts.py @@ -0,0 +1,123 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.openai.tts import OpenAITTSService, OpenAITTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = OpenAITTSService(api_key=os.getenv("OPENAI_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + audio_out_sample_rate=24000, + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating OpenAI TTS settings: speed=2.0") + await task.queue_frame(TTSUpdateSettingsFrame(delta=OpenAITTSSettings(speed=2.0))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55q-update-settings-deepgram-http-tts.py b/examples/foundational/55q-update-settings-deepgram-http-tts.py new file mode 100644 index 000000000..d94bf631a --- /dev/null +++ b/examples/foundational/55q-update-settings-deepgram-http-tts.py @@ -0,0 +1,137 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.deepgram.tts import DeepgramHttpTTSService, DeepgramTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = DeepgramHttpTTSService( + api_key=os.getenv("DEEPGRAM_API_KEY"), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Deepgram TTS settings: voice="aura-2-aries-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-aries-en")) + ) + + await asyncio.sleep(10) + logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-luna-en")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55q-update-settings-deepgram-sagemaker-tts.py b/examples/foundational/55q-update-settings-deepgram-sagemaker-tts.py new file mode 100644 index 000000000..85087d0d2 --- /dev/null +++ b/examples/foundational/55q-update-settings-deepgram-sagemaker-tts.py @@ -0,0 +1,137 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.deepgram.tts_sagemaker import ( + DeepgramSageMakerTTSService, + DeepgramSageMakerTTSSettings, +) +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = DeepgramSageMakerTTSService( + endpoint_name=os.getenv("SAGEMAKER_TTS_ENDPOINT_NAME"), + region=os.getenv("AWS_REGION"), + voice="aura-2-helena-en", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Deepgram SageMaker TTS settings: voice="aura-2-aries-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=DeepgramSageMakerTTSSettings(voice="aura-2-aries-en")) + ) + + await asyncio.sleep(10) + logger.info('Updating Deepgram SageMaker TTS settings: voice="aura-2-luna-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=DeepgramSageMakerTTSSettings(voice="aura-2-luna-en")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55q-update-settings-deepgram-tts.py b/examples/foundational/55q-update-settings-deepgram-tts.py new file mode 100644 index 000000000..e205ffa73 --- /dev/null +++ b/examples/foundational/55q-update-settings-deepgram-tts.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.deepgram.tts import DeepgramTTSService, DeepgramTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = DeepgramTTSService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Deepgram TTS settings: voice="aura-2-aries-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-aries-en")) + ) + + await asyncio.sleep(10) + logger.info('Updating Deepgram TTS settings: voice="aura-2-luna-en"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=DeepgramTTSSettings(voice="aura-2-luna-en")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55r-update-settings-azure-http-tts.py b/examples/foundational/55r-update-settings-azure-http-tts.py new file mode 100644 index 000000000..0e4df5e7c --- /dev/null +++ b/examples/foundational/55r-update-settings-azure-http-tts.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.azure.tts import AzureHttpTTSService, AzureTTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = AzureHttpTTSService( + api_key=os.getenv("AZURE_SPEECH_API_KEY"), + region=os.getenv("AZURE_SPEECH_REGION"), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Azure TTS settings: rate="0.7", style="sad"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=AzureTTSSettings(rate="0.7", style="sad")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55r-update-settings-azure-tts.py b/examples/foundational/55r-update-settings-azure-tts.py new file mode 100644 index 000000000..a32dad5ed --- /dev/null +++ b/examples/foundational/55r-update-settings-azure-tts.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.azure.tts import AzureTTSService, AzureTTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = AzureTTSService( + api_key=os.getenv("AZURE_SPEECH_API_KEY"), + region=os.getenv("AZURE_SPEECH_REGION"), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Azure TTS settings: rate="0.7", style="sad"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=AzureTTSSettings(rate="0.7", style="sad")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55s-update-settings-google-http-tts.py b/examples/foundational/55s-update-settings-google-http-tts.py new file mode 100644 index 000000000..ae3070124 --- /dev/null +++ b/examples/foundational/55s-update-settings-google-http-tts.py @@ -0,0 +1,124 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google.tts import GoogleHttpTTSService, GoogleHttpTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = GoogleHttpTTSService(credentials=os.getenv("GOOGLE_TEST_CREDENTIALS")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Google HTTP TTS settings: speaking_rate=1.4") + await task.queue_frame( + TTSUpdateSettingsFrame(delta=GoogleHttpTTSSettings(speaking_rate=1.4)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55s-update-settings-google-stream-tts.py b/examples/foundational/55s-update-settings-google-stream-tts.py new file mode 100644 index 000000000..1aba64254 --- /dev/null +++ b/examples/foundational/55s-update-settings-google-stream-tts.py @@ -0,0 +1,124 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google.tts import GoogleStreamTTSSettings, GoogleTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = GoogleTTSService(credentials=os.getenv("GOOGLE_TEST_CREDENTIALS")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Google Stream TTS settings: speaking_rate=1.4") + await task.queue_frame( + TTSUpdateSettingsFrame(delta=GoogleStreamTTSSettings(speaking_rate=1.4)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55u-update-settings-rime-http-tts.py b/examples/foundational/55u-update-settings-rime-http-tts.py new file mode 100644 index 000000000..28e58ba08 --- /dev/null +++ b/examples/foundational/55u-update-settings-rime-http-tts.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.rime.tts import RimeHttpTTSService, RimeTTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = RimeHttpTTSService( + api_key=os.getenv("RIME_API_KEY"), voice_id="eva", aiohttp_session=session + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Rime TTS settings: voice=rex") + await task.queue_frame(TTSUpdateSettingsFrame(delta=RimeTTSSettings(voice="rex"))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55u-update-settings-rime-tts.py b/examples/foundational/55u-update-settings-rime-tts.py new file mode 100644 index 000000000..8992cb6db --- /dev/null +++ b/examples/foundational/55u-update-settings-rime-tts.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.rime.tts import RimeTTSService, RimeTTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = RimeTTSService( + api_key=os.getenv("RIME_API_KEY"), + voice_id="luna", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Rime TTS settings: voice=bond") + await task.queue_frame(TTSUpdateSettingsFrame(delta=RimeTTSSettings(voice="bond"))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55v-update-settings-lmnt-tts.py b/examples/foundational/55v-update-settings-lmnt-tts.py new file mode 100644 index 000000000..01bc15ddf --- /dev/null +++ b/examples/foundational/55v-update-settings-lmnt-tts.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.lmnt.tts import LmntTTSService, LmntTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = LmntTTSService( + api_key=os.getenv("LMNT_API_KEY"), + voice_id="lily", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating LMNT TTS settings: voice="tyler"') + await task.queue_frame(TTSUpdateSettingsFrame(delta=LmntTTSSettings(voice="tyler"))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55w-update-settings-fish-tts.py b/examples/foundational/55w-update-settings-fish-tts.py new file mode 100644 index 000000000..72a2160ba --- /dev/null +++ b/examples/foundational/55w-update-settings-fish-tts.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.fish.tts import FishAudioTTSService, FishAudioTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = FishAudioTTSService( + api_key=os.getenv("FISH_API_KEY"), + model="4ce7e917cedd4bc2bb2e6ff3a46acaa1", # Barack Obama + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Fish Audio TTS settings: prosody_speed=1.5") + await task.queue_frame( + TTSUpdateSettingsFrame(delta=FishAudioTTSSettings(prosody_speed=1.5)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55x-update-settings-minimax-tts.py b/examples/foundational/55x-update-settings-minimax-tts.py new file mode 100644 index 000000000..fdb486415 --- /dev/null +++ b/examples/foundational/55x-update-settings-minimax-tts.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.minimax.tts import MiniMaxHttpTTSService, MiniMaxTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = MiniMaxHttpTTSService( + api_key=os.getenv("MINIMAX_API_KEY", ""), + group_id=os.getenv("MINIMAX_GROUP_ID", ""), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating MiniMax TTS settings: speed=1.5, emotion="happy"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=MiniMaxTTSSettings(speed=1.5, emotion="happy")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55y-update-settings-groq-tts.py b/examples/foundational/55y-update-settings-groq-tts.py new file mode 100644 index 000000000..3531509f2 --- /dev/null +++ b/examples/foundational/55y-update-settings-groq-tts.py @@ -0,0 +1,122 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.groq.tts import GroqTTSService, GroqTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = GroqTTSService(api_key=os.getenv("GROQ_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Groq TTS settings: voice=troy") + await task.queue_frame(TTSUpdateSettingsFrame(delta=GroqTTSSettings(voice="troy"))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55z-update-settings-hume-tts.py b/examples/foundational/55z-update-settings-hume-tts.py new file mode 100644 index 000000000..493550469 --- /dev/null +++ b/examples/foundational/55z-update-settings-hume-tts.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.hume.tts import HumeTTSService, HumeTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = HumeTTSService( + api_key=os.getenv("HUME_API_KEY"), + voice_id="f898a92e-685f-43fa-985b-a46920f0650b", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Hume TTS settings: speed=2.0, description="Speak with excitement"') + await task.queue_frame( + TTSUpdateSettingsFrame( + delta=HumeTTSSettings(speed=2.0, description="Speak with excitement") + ) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55za-update-settings-neuphonic-http-tts.py b/examples/foundational/55za-update-settings-neuphonic-http-tts.py new file mode 100644 index 000000000..6e1d18e4a --- /dev/null +++ b/examples/foundational/55za-update-settings-neuphonic-http-tts.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.neuphonic.tts import NeuphonicHttpTTSService, NeuphonicTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + async with aiohttp.ClientSession() as session: + tts = NeuphonicHttpTTSService( + api_key=os.getenv("NEUPHONIC_API_KEY"), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Neuphonic HTTP TTS settings: speed=1.4") + await task.queue_frame(TTSUpdateSettingsFrame(delta=NeuphonicTTSSettings(speed=1.4))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55za-update-settings-neuphonic-tts.py b/examples/foundational/55za-update-settings-neuphonic-tts.py new file mode 100644 index 000000000..861167a20 --- /dev/null +++ b/examples/foundational/55za-update-settings-neuphonic-tts.py @@ -0,0 +1,122 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.neuphonic.tts import NeuphonicTTSService, NeuphonicTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = NeuphonicTTSService(api_key=os.getenv("NEUPHONIC_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Neuphonic TTS settings: speed=1.4") + await task.queue_frame(TTSUpdateSettingsFrame(delta=NeuphonicTTSSettings(speed=1.4))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zb-update-settings-inworld-http-tts.py b/examples/foundational/55zb-update-settings-inworld-http-tts.py new file mode 100644 index 000000000..99353b87f --- /dev/null +++ b/examples/foundational/55zb-update-settings-inworld-http-tts.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.inworld.tts import InworldHttpTTSService, InworldTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = InworldHttpTTSService(api_key=os.getenv("INWORLD_API_KEY"), aiohttp_session=session) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Inworld TTS settings: speaking_rate=1.5, temperature=0.8") + await task.queue_frame( + TTSUpdateSettingsFrame(delta=InworldTTSSettings(speaking_rate=1.5, temperature=0.8)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zb-update-settings-inworld-tts.py b/examples/foundational/55zb-update-settings-inworld-tts.py new file mode 100644 index 000000000..104001c15 --- /dev/null +++ b/examples/foundational/55zb-update-settings-inworld-tts.py @@ -0,0 +1,124 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.inworld.tts import InworldTTSService, InworldTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = InworldTTSService(api_key=os.getenv("INWORLD_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Inworld TTS settings: speaking_rate=1.5, temperature=0.8") + await task.queue_frame( + TTSUpdateSettingsFrame(delta=InworldTTSSettings(speaking_rate=1.5, temperature=0.8)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zc-update-settings-gemini-tts.py b/examples/foundational/55zc-update-settings-gemini-tts.py new file mode 100644 index 000000000..21b678047 --- /dev/null +++ b/examples/foundational/55zc-update-settings-gemini-tts.py @@ -0,0 +1,133 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google.tts import GeminiTTSService, GeminiTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = GeminiTTSService( + credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"), + model="gemini-2.5-flash-tts", + voice_id="Charon", + params=GeminiTTSService.InputParams( + language=Language.EN_US, + prompt="You are a helpful AI assistant. Speak in a natural, conversational tone.", + ), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Gemini TTS settings: prompt="Speak slowly and dramatically"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=GeminiTTSSettings(prompt="Speak slowly and dramatically")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zd-update-settings-aws-polly-tts.py b/examples/foundational/55zd-update-settings-aws-polly-tts.py new file mode 100644 index 000000000..4392e7b6f --- /dev/null +++ b/examples/foundational/55zd-update-settings-aws-polly-tts.py @@ -0,0 +1,122 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.aws.tts import AWSPollyTTSService, AWSPollyTTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = AWSPollyTTSService() + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating AWS Polly TTS settings: rate="fast"') + await task.queue_frame(TTSUpdateSettingsFrame(delta=AWSPollyTTSSettings(rate="fast"))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55ze-update-settings-sarvam-http-tts.py b/examples/foundational/55ze-update-settings-sarvam-http-tts.py new file mode 100644 index 000000000..7832a805a --- /dev/null +++ b/examples/foundational/55ze-update-settings-sarvam-http-tts.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.sarvam.tts import SarvamHttpTTSService, SarvamHttpTTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = SarvamHttpTTSService(api_key=os.getenv("SARVAM_API_KEY"), aiohttp_session=session) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Sarvam TTS settings: pace=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(delta=SarvamHttpTTSSettings(pace=1.5))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55ze-update-settings-sarvam-tts.py b/examples/foundational/55ze-update-settings-sarvam-tts.py new file mode 100644 index 000000000..e63c6046d --- /dev/null +++ b/examples/foundational/55ze-update-settings-sarvam-tts.py @@ -0,0 +1,122 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.sarvam.tts import SarvamTTSService, SarvamTTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = SarvamTTSService(api_key=os.getenv("SARVAM_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Sarvam TTS settings: pace=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(delta=SarvamTTSSettings(pace=1.5))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zf-update-settings-camb-tts.py b/examples/foundational/55zf-update-settings-camb-tts.py new file mode 100644 index 000000000..82cc4a638 --- /dev/null +++ b/examples/foundational/55zf-update-settings-camb-tts.py @@ -0,0 +1,123 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.camb.tts import CambTTSService, CambTTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CambTTSService(api_key=os.getenv("CAMB_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Camb TTS settings: language -> Spanish") + await task.queue_frame(TTSUpdateSettingsFrame(delta=CambTTSSettings(language=Language.ES))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zg-update-settings-hathora-tts.py b/examples/foundational/55zg-update-settings-hathora-tts.py new file mode 100644 index 000000000..80b9bfcce --- /dev/null +++ b/examples/foundational/55zg-update-settings-hathora-tts.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.hathora.tts import HathoraTTSService, HathoraTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = HathoraTTSService( + api_key=os.getenv("HATHORA_API_KEY"), + model="hexgrad-kokoro-82m", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Hathora TTS settings: speed=1.5") + await task.queue_frame(TTSUpdateSettingsFrame(delta=HathoraTTSSettings(speed=1.5))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zh-update-settings-resembleai-tts.py b/examples/foundational/55zh-update-settings-resembleai-tts.py new file mode 100644 index 000000000..44688ee25 --- /dev/null +++ b/examples/foundational/55zh-update-settings-resembleai-tts.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.resembleai.tts import ResembleAITTSService, ResembleAITTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = ResembleAITTSService( + api_key=os.getenv("RESEMBLE_API_KEY"), + voice_id=os.getenv("RESEMBLE_VOICE_UUID"), + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating ResembleAI TTS settings: voice (changed)") + await task.queue_frame( + TTSUpdateSettingsFrame( + delta=ResembleAITTSSettings(voice=os.getenv("RESEMBLE_VOICE_UUID_ALT")) + ) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zi-update-settings-azure-llm.py b/examples/foundational/55zi-update-settings-azure-llm.py new file mode 100644 index 000000000..43161b103 --- /dev/null +++ b/examples/foundational/55zi-update-settings-azure-llm.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.azure.llm import AzureLLMService +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = AzureLLMService( + api_key=os.getenv("AZURE_CHATGPT_API_KEY"), + endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"), + model=os.getenv("AZURE_CHATGPT_MODEL"), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Azure LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zi-update-settings-openai-llm.py b/examples/foundational/55zi-update-settings-openai-llm.py new file mode 100644 index 000000000..d84259cc3 --- /dev/null +++ b/examples/foundational/55zi-update-settings-openai-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating OpenAI LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zj-update-settings-anthropic-llm.py b/examples/foundational/55zj-update-settings-anthropic-llm.py new file mode 100644 index 000000000..354702880 --- /dev/null +++ b/examples/foundational/55zj-update-settings-anthropic-llm.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.anthropic.llm import AnthropicLLMService, AnthropicLLMSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = AnthropicLLMService(api_key=os.getenv("ANTHROPIC_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Anthropic LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=AnthropicLLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zk-update-settings-google-llm.py b/examples/foundational/55zk-update-settings-google-llm.py new file mode 100644 index 000000000..cd03a34cb --- /dev/null +++ b/examples/foundational/55zk-update-settings-google-llm.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google.llm import GoogleLLMService, GoogleLLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = GoogleLLMService(api_key=os.getenv("GOOGLE_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Google LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=GoogleLLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zk-update-settings-google-vertex-llm.py b/examples/foundational/55zk-update-settings-google-vertex-llm.py new file mode 100644 index 000000000..3feba582f --- /dev/null +++ b/examples/foundational/55zk-update-settings-google-vertex-llm.py @@ -0,0 +1,130 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.google.llm import GoogleLLMSettings +from pipecat.services.google.llm_vertex import GoogleVertexLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = GoogleVertexLLMService( + credentials=os.getenv("GOOGLE_VERTEX_TEST_CREDENTIALS"), + project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"), + location=os.getenv("GOOGLE_CLOUD_LOCATION"), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Google Vertex LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=GoogleLLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zl-update-settings-azure-realtime.py b/examples/foundational/55zl-update-settings-azure-realtime.py new file mode 100644 index 000000000..247bde14b --- /dev/null +++ b/examples/foundational/55zl-update-settings-azure-realtime.py @@ -0,0 +1,140 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService +from pipecat.services.openai.realtime import events +from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = AzureRealtimeLLMService( + api_key=os.getenv("AZURE_REALTIME_API_KEY"), + base_url=os.getenv("AZURE_REALTIME_BASE_URL"), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Azure Realtime LLM settings: output_modalities=['text']") + await task.queue_frame( + LLMUpdateSettingsFrame( + delta=OpenAIRealtimeLLMSettings( + session_properties=events.SessionProperties(output_modalities=["text"]) + ) + ) + ) + + await asyncio.sleep(10) + logger.info("Updating Azure Realtime LLM settings: output_modalities=['audio']") + await task.queue_frame( + LLMUpdateSettingsFrame( + delta=OpenAIRealtimeLLMSettings( + session_properties=events.SessionProperties(output_modalities=["audio"]) + ) + ) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zl-update-settings-openai-realtime.py b/examples/foundational/55zl-update-settings-openai-realtime.py new file mode 100644 index 000000000..f5c4afa26 --- /dev/null +++ b/examples/foundational/55zl-update-settings-openai-realtime.py @@ -0,0 +1,139 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.openai.realtime import events +from pipecat.services.openai.realtime.llm import ( + OpenAIRealtimeLLMService, + OpenAIRealtimeLLMSettings, +) +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = OpenAIRealtimeLLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating OpenAI Realtime LLM settings: output_modalities=['text']") + await task.queue_frame( + LLMUpdateSettingsFrame( + delta=OpenAIRealtimeLLMSettings( + session_properties=events.SessionProperties(output_modalities=["text"]) + ) + ) + ) + + await asyncio.sleep(10) + logger.info("Updating OpenAI Realtime LLM settings: output_modalities=['audio']") + await task.queue_frame( + LLMUpdateSettingsFrame( + delta=OpenAIRealtimeLLMSettings( + session_properties=events.SessionProperties(output_modalities=["audio"]) + ) + ) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zm-update-settings-gemini-live-vertex.py b/examples/foundational/55zm-update-settings-gemini-live-vertex.py new file mode 100644 index 000000000..96bd7a1c6 --- /dev/null +++ b/examples/foundational/55zm-update-settings-gemini-live-vertex.py @@ -0,0 +1,115 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.google.gemini_live.llm import GeminiLiveLLMSettings +from pipecat.services.google.gemini_live.llm_vertex import GeminiLiveVertexLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = GeminiLiveVertexLLMService( + credentials=os.getenv("GOOGLE_VERTEX_TEST_CREDENTIALS"), + project_id=os.getenv("GOOGLE_CLOUD_PROJECT_ID"), + location=os.getenv("GOOGLE_CLOUD_LOCATION"), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Gemini Live Vertex LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=GeminiLiveLLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zm-update-settings-gemini-live.py b/examples/foundational/55zm-update-settings-gemini-live.py new file mode 100644 index 000000000..a00343ac3 --- /dev/null +++ b/examples/foundational/55zm-update-settings-gemini-live.py @@ -0,0 +1,113 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.google.gemini_live.llm import ( + GeminiLiveLLMService, + GeminiLiveLLMSettings, +) +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = GeminiLiveLLMService(api_key=os.getenv("GOOGLE_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Gemini Live LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=GeminiLiveLLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zn-update-settings-ultravox-realtime.py b/examples/foundational/55zn-update-settings-ultravox-realtime.py new file mode 100644 index 000000000..5bcbded6b --- /dev/null +++ b/examples/foundational/55zn-update-settings-ultravox-realtime.py @@ -0,0 +1,143 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import datetime +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.adapters.schemas.tools_schema import ToolsSchema +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.ultravox.llm import ( + OneShotInputParams, + UltravoxRealtimeLLMService, + UltravoxRealtimeLLMSettings, +) +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + system_prompt = "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way." + + llm = UltravoxRealtimeLLMService( + params=OneShotInputParams( + api_key=os.getenv("ULTRAVOX_API_KEY"), + system_prompt=system_prompt, + temperature=0.3, + max_duration=datetime.timedelta(minutes=3), + ), + one_shot_selected_tools=ToolsSchema(standard_tools=[]), + ) + + messages = [ + { + "role": "system", + "content": system_prompt, + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Ultravox Realtime LLM settings: output_medium=text") + await task.queue_frame( + LLMUpdateSettingsFrame(delta=UltravoxRealtimeLLMSettings(output_medium="text")) + ) + + await asyncio.sleep(10) + logger.info("Updating Ultravox Realtime LLM settings: output_medium=voice") + await task.queue_frame( + LLMUpdateSettingsFrame(delta=UltravoxRealtimeLLMSettings(output_medium="voice")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zo-update-settings-grok-realtime.py b/examples/foundational/55zo-update-settings-grok-realtime.py new file mode 100644 index 000000000..9444f126a --- /dev/null +++ b/examples/foundational/55zo-update-settings-grok-realtime.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + AssistantTurnStoppedMessage, + LLMContextAggregatorPair, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.grok.realtime import events +from pipecat.services.grok.realtime.llm import ( + GrokRealtimeLLMService, + GrokRealtimeLLMSettings, +) +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + vad_analyzer=SileroVADAnalyzer(), + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = GrokRealtimeLLMService(api_key=os.getenv("GROK_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair(context) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @assistant_aggregator.event_handler("on_assistant_turn_stopped") + async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMessage): + timestamp = f"[{message.timestamp}] " if message.timestamp else "" + line = f"{timestamp}assistant: {message.content}" + logger.info(f"Transcript: {line}") + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Grok Realtime LLM settings: voice='Rex'") + await task.queue_frame( + LLMUpdateSettingsFrame( + delta=GrokRealtimeLLMSettings( + session_properties=events.SessionProperties(voice="Rex") + ) + ) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zp-update-settings-aws-bedrock-llm.py b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py new file mode 100644 index 000000000..3d3ee8fb5 --- /dev/null +++ b/examples/foundational/55zp-update-settings-aws-bedrock-llm.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.aws.llm import AWSBedrockLLMService, AWSBedrockLLMSettings +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = AWSBedrockLLMService( + aws_region="us-west-2", + model="us.anthropic.claude-haiku-4-5-20251001-v1:0", + params=AWSBedrockLLMService.InputParams(temperature=0.8), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating AWS Bedrock LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=AWSBedrockLLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zq-update-settings-fal-stt.py b/examples/foundational/55zq-update-settings-fal-stt.py new file mode 100644 index 000000000..c0f0a134a --- /dev/null +++ b/examples/foundational/55zq-update-settings-fal-stt.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.fal.stt import FalSTTService, FalSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = FalSTTService(api_key=os.getenv("FAL_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Fal STT settings: task="translate"') + await task.queue_frame(STTUpdateSettingsFrame(delta=FalSTTSettings(task="translate"))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zr-update-settings-gradium-stt.py b/examples/foundational/55zr-update-settings-gradium-stt.py new file mode 100644 index 000000000..636d27bd8 --- /dev/null +++ b/examples/foundational/55zr-update-settings-gradium-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.gradium.stt import GradiumSTTService, GradiumSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = GradiumSTTService( + api_key=os.getenv("GRADIUM_API_KEY"), + api_endpoint_base_url="wss://us.api.gradium.ai/api/speech/asr", + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Gradium STT settings: delay_in_frames=5") + await task.queue_frame(STTUpdateSettingsFrame(delta=GradiumSTTSettings(delay_in_frames=5))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zs-update-settings-hathora-stt.py b/examples/foundational/55zs-update-settings-hathora-stt.py new file mode 100644 index 000000000..7a033490a --- /dev/null +++ b/examples/foundational/55zs-update-settings-hathora-stt.py @@ -0,0 +1,131 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.hathora.stt import HathoraSTTService, HathoraSTTSettings +from pipecat.services.hathora.utils import ConfigOption +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = HathoraSTTService( + api_key=os.getenv("HATHORA_API_KEY"), model="nvidia-parakeet-tdt-0.6b-v3" + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Hathora STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=HathoraSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zt-update-settings-nvidia-segmented-stt.py b/examples/foundational/55zt-update-settings-nvidia-segmented-stt.py new file mode 100644 index 000000000..60a042c5f --- /dev/null +++ b/examples/foundational/55zt-update-settings-nvidia-segmented-stt.py @@ -0,0 +1,127 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.nvidia.stt import NvidiaSegmentedSTTService, NvidiaSegmentedSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = NvidiaSegmentedSTTService(api_key=os.getenv("NVIDIA_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating NVIDIA Segmented STT settings: profanity_filter=True") + await task.queue_frame( + STTUpdateSettingsFrame(delta=NvidiaSegmentedSTTSettings(profanity_filter=True)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zt-update-settings-nvidia-stt.py b/examples/foundational/55zt-update-settings-nvidia-stt.py new file mode 100644 index 000000000..415f10b12 --- /dev/null +++ b/examples/foundational/55zt-update-settings-nvidia-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.nvidia.stt import NvidiaSTTService, NvidiaSTTSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = NvidiaSTTService(api_key=os.getenv("NVIDIA_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating NVIDIA STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=NvidiaSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zu-update-settings-openai-realtime-stt.py b/examples/foundational/55zu-update-settings-openai-realtime-stt.py new file mode 100644 index 000000000..2bcd35f52 --- /dev/null +++ b/examples/foundational/55zu-update-settings-openai-realtime-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.openai.stt import OpenAIRealtimeSTTService, OpenAIRealtimeSTTSettings +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = OpenAIRealtimeSTTService(api_key=os.getenv("OPENAI_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating OpenAI Realtime STT settings: language=es") + await task.queue_frame( + STTUpdateSettingsFrame(delta=OpenAIRealtimeSTTSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zv-update-settings-asyncai-http-tts.py b/examples/foundational/55zv-update-settings-asyncai-http-tts.py new file mode 100644 index 000000000..9688f1bac --- /dev/null +++ b/examples/foundational/55zv-update-settings-asyncai-http-tts.py @@ -0,0 +1,133 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.asyncai.tts import AsyncAIHttpTTSService, AsyncAITTSSettings +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + async with aiohttp.ClientSession() as session: + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = AsyncAIHttpTTSService( + api_key=os.getenv("ASYNCAI_API_KEY", ""), + voice_id=os.getenv("ASYNCAI_VOICE_ID", "e0f39dc4-f691-4e78-bba5-5c636692cc04"), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating AsyncAI HTTP TTS settings: language=es") + await task.queue_frame( + TTSUpdateSettingsFrame(delta=AsyncAITTSSettings(language=Language.ES)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/07e-interruptible-playht.py b/examples/foundational/55zv-update-settings-asyncai-tts.py similarity index 80% rename from examples/foundational/07e-interruptible-playht.py rename to examples/foundational/55zv-update-settings-asyncai-tts.py index b42f8f6a2..fe096b4be 100644 --- a/examples/foundational/07e-interruptible-playht.py +++ b/examples/foundational/55zv-update-settings-asyncai-tts.py @@ -4,14 +4,14 @@ # SPDX-License-Identifier: BSD 2-Clause License # - +import asyncio import os from dotenv import load_dotenv from loguru import logger from pipecat.audio.vad.silero import SileroVADAnalyzer -from pipecat.frames.frames import LLMRunFrame +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask @@ -22,9 +22,9 @@ from pipecat.processors.aggregators.llm_response_universal import ( ) from pipecat.runner.types import RunnerArguments from pipecat.runner.utils import create_transport +from pipecat.services.asyncai.tts import AsyncAITTSService, AsyncAITTSSettings from pipecat.services.deepgram.stt import DeepgramSTTService from pipecat.services.openai.llm import OpenAILLMService -from pipecat.services.playht.tts import PlayHTTTSService from pipecat.transcriptions.language import Language from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams @@ -32,8 +32,6 @@ from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams load_dotenv(override=True) -# We use lambdas to defer transport parameter creation until the transport -# type is selected at runtime. transport_params = { "daily": lambda: DailyParams( audio_in_enabled=True, @@ -55,11 +53,9 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - tts = PlayHTTTSService( - user_id=os.getenv("PLAYHT_USER_ID"), - api_key=os.getenv("PLAYHT_API_KEY"), - voice_url="s3://voice-cloning-zero-shot/e46b4027-b38d-4d24-b292-38fbca2be0ef/original/manifest.json", - params=PlayHTTTSService.InputParams(language=Language.EN), + tts = AsyncAITTSService( + api_key=os.getenv("ASYNCAI_API_KEY", ""), + voice_id=os.getenv("ASYNCAI_VOICE_ID", "e0f39dc4-f691-4e78-bba5-5c636692cc04"), ) llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) @@ -79,13 +75,13 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): pipeline = Pipeline( [ - transport.input(), # Transport user input + transport.input(), stt, - user_aggregator, # User responses - llm, # LLM - tts, # TTS - transport.output(), # Transport bot output - assistant_aggregator, # Assistant spoken responses + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, ] ) @@ -101,10 +97,15 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): @transport.event_handler("on_client_connected") async def on_client_connected(transport, client): logger.info(f"Client connected") - # Kick off the conversation. messages.append({"role": "system", "content": "Please introduce yourself to the user."}) await task.queue_frames([LLMRunFrame()]) + await asyncio.sleep(10) + logger.info("Updating AsyncAI TTS settings: language=es") + await task.queue_frame( + TTSUpdateSettingsFrame(delta=AsyncAITTSSettings(language=Language.ES)) + ) + @transport.event_handler("on_client_disconnected") async def on_client_disconnected(transport, client): logger.info(f"Client disconnected") diff --git a/examples/foundational/55zw-update-settings-gradium-tts.py b/examples/foundational/55zw-update-settings-gradium-tts.py new file mode 100644 index 000000000..d1069bfa4 --- /dev/null +++ b/examples/foundational/55zw-update-settings-gradium-tts.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.gradium.tts import GradiumTTSService, GradiumTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = GradiumTTSService( + api_key=os.getenv("GRADIUM_API_KEY"), + voice_id="YTpq7expH9539ERJ", + url="wss://us.api.gradium.ai/api/speech/tts", + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Gradium TTS settings: voice="LFZvm12tW_z0xfGo"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=GradiumTTSSettings(voice="LFZvm12tW_z0xfGo")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zx-update-settings-cerebras-llm.py b/examples/foundational/55zx-update-settings-cerebras-llm.py new file mode 100644 index 000000000..6123487a3 --- /dev/null +++ b/examples/foundational/55zx-update-settings-cerebras-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.cerebras.llm import CerebrasLLMService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = CerebrasLLMService(api_key=os.getenv("CEREBRAS_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Cerebras LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zy-update-settings-deepseek-llm.py b/examples/foundational/55zy-update-settings-deepseek-llm.py new file mode 100644 index 000000000..60cbab30b --- /dev/null +++ b/examples/foundational/55zy-update-settings-deepseek-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.deepseek.llm import DeepSeekLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = DeepSeekLLMService(api_key=os.getenv("DEEPSEEK_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating DeepSeek LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zz-update-settings-fireworks-llm.py b/examples/foundational/55zz-update-settings-fireworks-llm.py new file mode 100644 index 000000000..97554ae19 --- /dev/null +++ b/examples/foundational/55zz-update-settings-fireworks-llm.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.fireworks.llm import FireworksLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = FireworksLLMService( + api_key=os.getenv("FIREWORKS_API_KEY"), + model="accounts/fireworks/models/gpt-oss-20b", + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Fireworks LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zza-update-settings-grok-llm.py b/examples/foundational/55zza-update-settings-grok-llm.py new file mode 100644 index 000000000..8ce081e66 --- /dev/null +++ b/examples/foundational/55zza-update-settings-grok-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.grok.llm import GrokLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = GrokLLMService(api_key=os.getenv("GROK_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Grok LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzb-update-settings-groq-llm.py b/examples/foundational/55zzb-update-settings-groq-llm.py new file mode 100644 index 000000000..afde4499d --- /dev/null +++ b/examples/foundational/55zzb-update-settings-groq-llm.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.groq.llm import GroqLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = GroqLLMService( + api_key=os.getenv("GROQ_API_KEY"), model="meta-llama/llama-4-maverick-17b-128e-instruct" + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Groq LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzc-update-settings-mistral-llm.py b/examples/foundational/55zzc-update-settings-mistral-llm.py new file mode 100644 index 000000000..7eba98e97 --- /dev/null +++ b/examples/foundational/55zzc-update-settings-mistral-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.mistral.llm import MistralLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = MistralLLMService(api_key=os.getenv("MISTRAL_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Mistral LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzd-update-settings-nvidia-llm.py b/examples/foundational/55zzd-update-settings-nvidia-llm.py new file mode 100644 index 000000000..ee57a3a24 --- /dev/null +++ b/examples/foundational/55zzd-update-settings-nvidia-llm.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.nvidia.llm import NvidiaLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = NvidiaLLMService( + api_key=os.getenv("NVIDIA_API_KEY"), model="meta/llama-3.1-405b-instruct" + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating NVIDIA LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zze-update-settings-ollama-llm.py b/examples/foundational/55zze-update-settings-ollama-llm.py new file mode 100644 index 000000000..e22719ec1 --- /dev/null +++ b/examples/foundational/55zze-update-settings-ollama-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.ollama.llm import OLLamaLLMService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OLLamaLLMService(model="llama3.2") # Update to the model you're running locally + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating OLLama LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzf-update-settings-openrouter-llm.py b/examples/foundational/55zzf-update-settings-openrouter-llm.py new file mode 100644 index 000000000..fc3732192 --- /dev/null +++ b/examples/foundational/55zzf-update-settings-openrouter-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.openrouter.llm import OpenRouterLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenRouterLLMService(api_key=os.getenv("OPENROUTER_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating OpenRouter LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzg-update-settings-perplexity-llm.py b/examples/foundational/55zzg-update-settings-perplexity-llm.py new file mode 100644 index 000000000..f55975685 --- /dev/null +++ b/examples/foundational/55zzg-update-settings-perplexity-llm.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.perplexity.llm import PerplexityLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = PerplexityLLMService(api_key=os.getenv("PERPLEXITY_API_KEY")) + + messages = [ + { + "role": "user", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way. Start by introducing yourself.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Perplexity LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzh-update-settings-qwen-llm.py b/examples/foundational/55zzh-update-settings-qwen-llm.py new file mode 100644 index 000000000..f31dc05a5 --- /dev/null +++ b/examples/foundational/55zzh-update-settings-qwen-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.qwen.llm import QwenLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = QwenLLMService(api_key=os.getenv("QWEN_API_KEY"), model="qwen2.5-72b-instruct") + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Qwen LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzi-update-settings-sambanova-llm.py b/examples/foundational/55zzi-update-settings-sambanova-llm.py new file mode 100644 index 000000000..96122cc03 --- /dev/null +++ b/examples/foundational/55zzi-update-settings-sambanova-llm.py @@ -0,0 +1,126 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.sambanova.llm import SambaNovaLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = SambaNovaLLMService(api_key=os.getenv("SAMBANOVA_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating SambaNova LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzj-update-settings-together-llm.py b/examples/foundational/55zzj-update-settings-together-llm.py new file mode 100644 index 000000000..710ef894a --- /dev/null +++ b/examples/foundational/55zzj-update-settings-together-llm.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.base_llm import OpenAILLMSettings +from pipecat.services.together.llm import TogetherLLMService +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = TogetherLLMService( + api_key=os.getenv("TOGETHER_API_KEY"), + model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating Together LLM settings: temperature=0.1") + await task.queue_frame(LLMUpdateSettingsFrame(delta=OpenAILLMSettings(temperature=0.1))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzk-update-settings-aws-nova-sonic-llm.py b/examples/foundational/55zzk-update-settings-aws-nova-sonic-llm.py new file mode 100644 index 000000000..301270797 --- /dev/null +++ b/examples/foundational/55zzk-update-settings-aws-nova-sonic-llm.py @@ -0,0 +1,124 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, LLMUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.aws.nova_sonic.llm import AWSNovaSonicLLMService, AWSNovaSonicLLMSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + llm = AWSNovaSonicLLMService( + secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"), + access_key_id=os.getenv("AWS_ACCESS_KEY_ID"), + region=os.getenv("AWS_REGION"), + ) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + { + "role": "user", + "content": "Tell me a fun fact!", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + user_aggregator, + llm, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info("Updating AWS Nova Sonic LLM settings: temperature=0.1") + await task.queue_frame( + LLMUpdateSettingsFrame(delta=AWSNovaSonicLLMSettings(temperature=0.1)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzl-update-settings-nvidia-tts.py b/examples/foundational/55zzl-update-settings-nvidia-tts.py new file mode 100644 index 000000000..a8bd50dcd --- /dev/null +++ b/examples/foundational/55zzl-update-settings-nvidia-tts.py @@ -0,0 +1,125 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.nvidia.tts import NvidiaTTSService, NvidiaTTSSettings +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.transcriptions.language import Language +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + tts = NvidiaTTSService(api_key=os.getenv("NVIDIA_API_KEY")) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating NVIDIA TTS settings: language="ES_US"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=NvidiaTTSSettings(language=Language.ES_US)) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzm-update-settings-speechmatics-tts.py b/examples/foundational/55zzm-update-settings-speechmatics-tts.py new file mode 100644 index 000000000..39ed792dd --- /dev/null +++ b/examples/foundational/55zzm-update-settings-speechmatics-tts.py @@ -0,0 +1,129 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +import aiohttp +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, TTSUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.deepgram.stt import DeepgramSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.speechmatics.tts import SpeechmaticsTTSService, SpeechmaticsTTSSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) + + async with aiohttp.ClientSession() as session: + tts = SpeechmaticsTTSService( + api_key=os.getenv("SPEECHMATICS_API_KEY"), + aiohttp_session=session, + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Speechmatics TTS settings: voice="theo"') + await task.queue_frame( + TTSUpdateSettingsFrame(delta=SpeechmaticsTTSSettings(voice="theo")) + ) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/examples/foundational/55zzn-update-settings-groq-stt.py b/examples/foundational/55zzn-update-settings-groq-stt.py new file mode 100644 index 000000000..b00ecda81 --- /dev/null +++ b/examples/foundational/55zzn-update-settings-groq-stt.py @@ -0,0 +1,128 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import asyncio +import os + +from dotenv import load_dotenv +from loguru import logger + +from pipecat.audio.vad.silero import SileroVADAnalyzer +from pipecat.frames.frames import LLMRunFrame, STTUpdateSettingsFrame +from pipecat.pipeline.pipeline import Pipeline +from pipecat.pipeline.runner import PipelineRunner +from pipecat.pipeline.task import PipelineParams, PipelineTask +from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_response_universal import ( + LLMContextAggregatorPair, + LLMUserAggregatorParams, +) +from pipecat.runner.types import RunnerArguments +from pipecat.runner.utils import create_transport +from pipecat.services.cartesia.tts import CartesiaTTSService +from pipecat.services.groq.stt import GroqSTTService +from pipecat.services.openai.llm import OpenAILLMService +from pipecat.services.whisper.base_stt import BaseWhisperSTTSettings +from pipecat.transports.base_transport import BaseTransport, TransportParams +from pipecat.transports.daily.transport import DailyParams +from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams + +load_dotenv(override=True) + +transport_params = { + "daily": lambda: DailyParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "twilio": lambda: FastAPIWebsocketParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), + "webrtc": lambda: TransportParams( + audio_in_enabled=True, + audio_out_enabled=True, + ), +} + + +async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): + logger.info(f"Starting bot") + + stt = GroqSTTService( + api_key=os.getenv("GROQ_API_KEY"), + ) + + tts = CartesiaTTSService( + api_key=os.getenv("CARTESIA_API_KEY"), + voice_id="71a7ad14-091c-4e8e-a314-022ece01c121", # British Reading Lady + ) + + llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY")) + + messages = [ + { + "role": "system", + "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be spoken aloud, so avoid special characters that can't easily be spoken, such as emojis or bullet points. Respond to what the user said in a creative and helpful way.", + }, + ] + + context = LLMContext(messages) + user_aggregator, assistant_aggregator = LLMContextAggregatorPair( + context, + user_params=LLMUserAggregatorParams(vad_analyzer=SileroVADAnalyzer()), + ) + + pipeline = Pipeline( + [ + transport.input(), + stt, + user_aggregator, + llm, + tts, + transport.output(), + assistant_aggregator, + ] + ) + + task = PipelineTask( + pipeline, + params=PipelineParams( + enable_metrics=True, + enable_usage_metrics=True, + ), + idle_timeout_secs=runner_args.pipeline_idle_timeout_secs, + ) + + @transport.event_handler("on_client_connected") + async def on_client_connected(transport, client): + logger.info(f"Client connected") + messages.append({"role": "system", "content": "Please introduce yourself to the user."}) + await task.queue_frames([LLMRunFrame()]) + + await asyncio.sleep(10) + logger.info('Updating Groq STT settings: language="es"') + await task.queue_frame(STTUpdateSettingsFrame(delta=BaseWhisperSTTSettings(language="es"))) + + @transport.event_handler("on_client_disconnected") + async def on_client_disconnected(transport, client): + logger.info(f"Client disconnected") + await task.cancel() + + runner = PipelineRunner(handle_sigint=runner_args.handle_sigint) + + await runner.run(task) + + +async def bot(runner_args: RunnerArguments): + """Main bot entry point compatible with Pipecat Cloud.""" + transport = await create_transport(runner_args, transport_params) + await run_bot(transport, runner_args) + + +if __name__ == "__main__": + from pipecat.runner.run import main + + main() diff --git a/pyproject.toml b/pyproject.toml index b312d553e..85e9ede36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "docstring_parser~=0.16", "loguru~=0.7.3", "Markdown>=3.7,<4", - "nltk>=3.9.1,<4", + "nltk>=3.9.3,<4", "numpy>=1.26.4,<3", "Pillow>=11.1.0,<13", "protobuf~=5.29.6", @@ -36,10 +36,12 @@ dependencies = [ "soxr~=0.5.0", "openai>=1.74.0,<3", # Pinning numba to resolve package dependencies - "numba==0.61.2", + "numba>=0.61.2", "wait_for2>=0.4.1; python_version<'3.12'", - # Pipecat optionals - "pipecat-ai[local-smart-turn-v3]", + # Required by LocalSmartTurnAnalyzerV3 + # Inlined here instead of using a self-referential extra for Poetry compatibility. + "transformers", + "onnxruntime~=1.23.2", ] [project.urls] @@ -85,7 +87,6 @@ livekit = [ "livekit~=1.0.13", "livekit-api~=1.0.5", "tenacity>=8.2.3,<10.0.0", lmnt = [ "pipecat-ai[websockets-base]" ] local = [ "pyaudio~=0.2.14" ] local-smart-turn = [ "coremltools>=8.0", "transformers", "torch>=2.5.0,<3", "torchaudio>=2.5.0,<3" ] -local-smart-turn-v3 = [ "transformers", "onnxruntime~=1.23.2" ] mcp = [ "mcp[cli]>=1.11.0,<2" ] mem0 = [ "mem0ai~=0.1.94" ] mistral = [] @@ -100,13 +101,12 @@ openpipe = [ "openpipe>=4.50.0,<6" ] openrouter = [] perplexity = [] piper = [ "piper-tts>=1.3.0,<2", "requests>=2.32.5,<3" ] -playht = [ "pipecat-ai[websockets-base]" ] qwen = [] remote-smart-turn = [] resembleai = [ "pipecat-ai[websockets-base]" ] rime = [ "pipecat-ai[websockets-base]" ] riva = [ "pipecat-ai[nvidia]" ] -runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.128.0", "pipecat-ai-small-webrtc-prebuilt>=2.2.0"] +runner = [ "python-dotenv>=1.0.0,<2.0.0", "uvicorn>=0.32.0,<1.0.0", "fastapi>=0.115.6,<0.128.0", "pipecat-ai-small-webrtc-prebuilt>=2.3.0"] sagemaker = ["aws_sdk_sagemaker_runtime_http2; python_version>='3.12'"] sambanova = [] sarvam = [ "sarvamai==0.1.26a2", "pipecat-ai[websockets-base]" ] @@ -257,6 +257,11 @@ directory = "fixed" name = "Fixed" showcontent = true +[[tool.towncrier.type]] +directory = "performance" +name = "Performance" +showcontent = true + [[tool.towncrier.type]] directory = "security" name = "Security" diff --git a/scripts/evals/run-release-evals.py b/scripts/evals/run-release-evals.py index 19e5d2649..77fc23a33 100644 --- a/scripts/evals/run-release-evals.py +++ b/scripts/evals/run-release-evals.py @@ -123,6 +123,7 @@ TESTS_07 = [ ("07n-interruptible-google.py", EVAL_SIMPLE_MATH), ("07n-interruptible-google-http.py", EVAL_SIMPLE_MATH), ("07o-interruptible-assemblyai.py", EVAL_SIMPLE_MATH), + ("07p-interruptible-krisp-viva.py", EVAL_SIMPLE_MATH), ("07q-interruptible-rime.py", EVAL_SIMPLE_MATH), ("07q-interruptible-rime-http.py", EVAL_SIMPLE_MATH), ("07r-interruptible-nvidia.py", EVAL_SIMPLE_MATH), @@ -148,8 +149,6 @@ TESTS_07 = [ ("07zj-interruptible-kokoro.py", EVAL_SIMPLE_MATH), # Needs a local XTTS docker instance running. # ("07i-interruptible-xtts.py", EVAL_SIMPLE_MATH), - # Needs a Krisp license. - # ("07p-interruptible-krisp.py", EVAL_SIMPLE_MATH), ] TESTS_12 = [ diff --git a/src/pipecat/audio/filters/krisp_viva_filter.py b/src/pipecat/audio/filters/krisp_viva_filter.py index ea5bfb8de..1e2f6c81b 100644 --- a/src/pipecat/audio/filters/krisp_viva_filter.py +++ b/src/pipecat/audio/filters/krisp_viva_filter.py @@ -39,7 +39,11 @@ class KrispVivaFilter(BaseAudioFilter): """ def __init__( - self, model_path: str = None, frame_duration: int = 10, noise_suppression_level: int = 100 + self, + model_path: str = None, + frame_duration: int = 10, + noise_suppression_level: int = 100, + api_key: str = "", ) -> None: """Initialize the Krisp noise reduction filter. @@ -48,6 +52,8 @@ class KrispVivaFilter(BaseAudioFilter): If None, uses KRISP_VIVA_FILTER_MODEL_PATH environment variable. frame_duration: Frame duration in milliseconds. noise_suppression_level: Noise suppression level. + api_key: Krisp SDK API key. If empty, falls back to + the KRISP_VIVA_API_KEY environment variable. Raises: ValueError: If model_path is not provided and KRISP_VIVA_FILTER_MODEL_PATH is not set. @@ -57,6 +63,8 @@ class KrispVivaFilter(BaseAudioFilter): """ super().__init__() + self._api_key = api_key + try: # Set model path, checking environment if not specified if model_path: @@ -132,7 +140,7 @@ class KrispVivaFilter(BaseAudioFilter): """ try: # Acquire SDK reference (will initialize on first call) - KrispVivaSDKManager.acquire() + KrispVivaSDKManager.acquire(api_key=self._api_key) self._session = self._create_session(sample_rate, self._frame_duration_ms) except Exception as e: logger.error(f"Failed to start Krisp session: {e}", exc_info=True) diff --git a/src/pipecat/audio/krisp_instance.py b/src/pipecat/audio/krisp_instance.py index fae2c691e..5ebfd24cc 100644 --- a/src/pipecat/audio/krisp_instance.py +++ b/src/pipecat/audio/krisp_instance.py @@ -7,6 +7,7 @@ """Krisp Instance manager for pipecat audio.""" import atexit +import os from threading import Lock from loguru import logger @@ -88,17 +89,26 @@ class KrispVivaSDKManager: _lock = Lock() _reference_count = 0 + @staticmethod + def _license_callback(error, error_message): + """Callback for Krisp SDK licensing errors.""" + logger.error(f"Krisp licensing error: {error} - {error_message}") + @staticmethod def _log_callback(log_message, log_level): """Thread-safe callback for Krisp SDK logging.""" logger.info(f"[{log_level}] {log_message}") @classmethod - def acquire(cls): + def acquire(cls, api_key: str = ""): """Acquire a reference to the SDK (initializes if needed). Call this when creating a filter instance. + Args: + api_key: Krisp SDK API key. If empty, falls back to the + KRISP_VIVA_API_KEY environment variable. + Raises: Exception: If SDK initialization fails (propagated from krisp_audio) """ @@ -106,7 +116,19 @@ class KrispVivaSDKManager: # Initialize SDK on first acquire if cls._reference_count == 0: try: - krisp_audio.globalInit("", cls._log_callback, krisp_audio.LogLevel.Off) + key = api_key or os.environ.get("KRISP_VIVA_API_KEY", "") + try: + # New SDK signature (requires license key) + krisp_audio.globalInit( + "", + key, + cls._license_callback, + cls._log_callback, + krisp_audio.LogLevel.Off, + ) + except TypeError: + # Old SDK signature (no license key) + krisp_audio.globalInit("", cls._log_callback, krisp_audio.LogLevel.Off) cls._initialized = True diff --git a/src/pipecat/audio/turn/krisp_viva_turn.py b/src/pipecat/audio/turn/krisp_viva_turn.py index 04e59421f..3aa540491 100644 --- a/src/pipecat/audio/turn/krisp_viva_turn.py +++ b/src/pipecat/audio/turn/krisp_viva_turn.py @@ -15,6 +15,7 @@ passed directly to the constructor. """ import os +import time from typing import Optional, Tuple import numpy as np @@ -26,7 +27,7 @@ from pipecat.audio.krisp_instance import ( int_to_krisp_sample_rate, ) from pipecat.audio.turn.base_turn_analyzer import BaseTurnAnalyzer, BaseTurnParams, EndOfTurnState -from pipecat.metrics.metrics import MetricsData +from pipecat.metrics.metrics import MetricsData, TurnMetricsData try: import krisp_audio @@ -63,6 +64,7 @@ class KrispVivaTurn(BaseTurnAnalyzer): model_path: Optional[str] = None, sample_rate: Optional[int] = None, params: Optional[KrispTurnParams] = None, + api_key: str = "", ) -> None: """Initialize the Krisp turn analyzer. @@ -72,6 +74,8 @@ class KrispVivaTurn(BaseTurnAnalyzer): sample_rate: Optional initial sample rate for audio processing. If provided, this will be used as the fixed sample rate. params: Configuration parameters for turn analysis behavior. + api_key: Krisp SDK API key. If empty, falls back to + the KRISP_VIVA_API_KEY environment variable. Raises: ValueError: If model_path is not provided and KRISP_VIVA_TURN_MODEL_PATH is not set. @@ -83,7 +87,7 @@ class KrispVivaTurn(BaseTurnAnalyzer): # Acquire SDK reference (will initialize on first call) try: - KrispVivaSDKManager.acquire() + KrispVivaSDKManager.acquire(api_key=api_key) self._sdk_acquired = True except Exception as e: self._sdk_acquired = False @@ -115,6 +119,9 @@ class KrispVivaTurn(BaseTurnAnalyzer): self._last_probability = None self._frame_probabilities = [] self._last_state = EndOfTurnState.INCOMPLETE + self._speech_stopped_time: Optional[float] = None + self._e2e_processing_time_ms: Optional[float] = None + self._last_metrics: Optional[TurnMetricsData] = None # Create session with provided sample rate or default to 16000 Hz # This preloads the model to improve latency when set_sample_rate is called later @@ -288,7 +295,14 @@ class KrispVivaTurn(BaseTurnAnalyzer): # Track speech start time if not self._speech_triggered: logger.trace("Speech detected, turn analysis started") + self._e2e_processing_time_ms = None self._speech_triggered = True + # Reset speech stopped time when speech resumes + self._speech_stopped_time = None + else: + # Record the moment speech transitions to non-speech + if self._speech_triggered and self._speech_stopped_time is None: + self._speech_stopped_time = time.perf_counter() # Note: We don't immediately mark as complete on silence detection. # Instead, we wait for the model's probability check below to confirm # end-of-turn based on the threshold. @@ -308,6 +322,18 @@ class KrispVivaTurn(BaseTurnAnalyzer): # Only mark as complete if we've detected speech and the model # confirms with sufficient confidence if self._speech_triggered and prob >= self._params.threshold: + # Calculate e2e processing time: time from speech stop to threshold crossing + if self._speech_stopped_time is not None: + self._e2e_processing_time_ms = ( + time.perf_counter() - self._speech_stopped_time + ) * 1000 + self._last_metrics = TurnMetricsData( + processor="KrispVivaTurn", + is_complete=True, + probability=prob, + e2e_processing_time_ms=self._e2e_processing_time_ms, + ) + logger.debug(f"Krisp turn complete") state = EndOfTurnState.COMPLETE self.clear() break @@ -329,12 +355,15 @@ class KrispVivaTurn(BaseTurnAnalyzer): Tuple containing the end-of-turn state and optional metrics data. Returns the last state determined by append_audio(). """ - # For real-time processing, the state is determined in append_audio - # Return the last state that was computed - return self._last_state, None + # For real-time processing, the state is determined in append_audio. + # Consume metrics so they aren't pushed twice. + metrics = self._last_metrics + self._last_metrics = None + return self._last_state, metrics def clear(self): """Reset the turn analyzer to its initial state.""" self._speech_triggered = False self._audio_buffer.clear() self._last_state = EndOfTurnState.INCOMPLETE + self._speech_stopped_time = None diff --git a/src/pipecat/audio/turn/smart_turn/base_smart_turn.py b/src/pipecat/audio/turn/smart_turn/base_smart_turn.py index 66b45a8f6..fa652d884 100644 --- a/src/pipecat/audio/turn/smart_turn/base_smart_turn.py +++ b/src/pipecat/audio/turn/smart_turn/base_smart_turn.py @@ -21,7 +21,7 @@ import numpy as np from loguru import logger from pipecat.audio.turn.base_turn_analyzer import BaseTurnAnalyzer, BaseTurnParams, EndOfTurnState -from pipecat.metrics.metrics import MetricsData, SmartTurnMetricsData +from pipecat.metrics.metrics import MetricsData, TurnMetricsData # Default timing parameters STOP_SECS = 3 @@ -222,18 +222,11 @@ class BaseSmartTurn(BaseTurnAnalyzer): # Calculate processing time e2e_processing_time_ms = (end_time - start_time) * 1000 - # Extract metrics from the nested structure - metrics = result.get("metrics", {}) - inference_time = metrics.get("inference_time", 0) - total_time = metrics.get("total_time", 0) - # Prepare the result data - result_data = SmartTurnMetricsData( + result_data = TurnMetricsData( processor="BaseSmartTurn", is_complete=result["prediction"] == 1, probability=result["probability"], - inference_time_ms=inference_time * 1000, - server_total_time_ms=total_time * 1000, e2e_processing_time_ms=e2e_processing_time_ms, ) @@ -241,8 +234,6 @@ class BaseSmartTurn(BaseTurnAnalyzer): f"Prediction: {'Complete' if result_data.is_complete else 'Incomplete'}" ) logger.trace(f"Probability of complete: {result_data.probability:.4f}") - logger.trace(f"Inference time: {result_data.inference_time_ms:.2f}ms") - logger.trace(f"Server total time: {result_data.server_total_time_ms:.2f}ms") logger.trace(f"E2E processing time: {result_data.e2e_processing_time_ms:.2f}ms") except SmartTurnTimeoutException: logger.debug( diff --git a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py index 1eae7cc02..a8cc249fd 100644 --- a/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +++ b/src/pipecat/audio/turn/smart_turn/local_smart_turn_v3.py @@ -13,20 +13,16 @@ local end-of-turn detection without requiring network connectivity. from typing import Any, Dict, Optional import numpy as np +import onnxruntime as ort +import soxr from loguru import logger +from transformers import WhisperFeatureExtractor from pipecat.audio.turn.smart_turn.base_smart_turn import BaseSmartTurn from pipecat.utils.env import env_truthy -try: - import onnxruntime as ort - from transformers import WhisperFeatureExtractor -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error( - "In order to use LocalSmartTurnAnalyzerV3, you need to `pip install pipecat-ai[local-smart-turn-v3]`." - ) - raise Exception(f"Missing module: {e}") +# The Whisper-based ONNX model expects 16 kHz audio input. +_MODEL_SAMPLE_RATE = 16000 class LocalSmartTurnAnalyzerV3(BaseSmartTurn): @@ -85,7 +81,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): logger.debug("Loaded Local Smart Turn v3.x") def _write_audio_to_wav( - self, audio_array: np.ndarray, sample_rate: int = 16000, suffix: str = "" + self, audio_array: np.ndarray, sample_rate: int = _MODEL_SAMPLE_RATE, suffix: str = "" ) -> None: """Write audio data to a WAV file in a background thread. @@ -127,10 +123,27 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): thread = threading.Thread(target=write_wav, daemon=True) thread.start() + def _resample_to_model_rate(self, audio_array: np.ndarray) -> np.ndarray: + """Resample audio to the model's expected sample rate (16 kHz). + + Args: + audio_array: Audio data as a float32 numpy array. + + Returns: + Resampled audio array at 16 kHz. + """ + actual_rate = self._sample_rate or _MODEL_SAMPLE_RATE + if actual_rate == _MODEL_SAMPLE_RATE: + return audio_array + + return soxr.resample(audio_array, actual_rate, _MODEL_SAMPLE_RATE, quality="VHQ") + def _predict_endpoint(self, audio_array: np.ndarray) -> Dict[str, Any]: """Predict end-of-turn using local ONNX model.""" - def truncate_audio_to_last_n_seconds(audio_array, n_seconds=8, sample_rate=16000): + def truncate_audio_to_last_n_seconds( + audio_array, n_seconds=8, sample_rate=_MODEL_SAMPLE_RATE + ): """Truncate audio to last n seconds or pad with zeros to meet n seconds.""" max_samples = n_seconds * sample_rate if len(audio_array) > max_samples: @@ -142,6 +155,10 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): return audio_array audio_for_logging = audio_array + actual_rate = self._sample_rate or _MODEL_SAMPLE_RATE + + # Resample to 16 kHz if the pipeline uses a different sample rate + audio_array = self._resample_to_model_rate(audio_array) # Truncate to 8 seconds (keeping the end) or pad to 8 seconds audio_array = truncate_audio_to_last_n_seconds(audio_array, n_seconds=8) @@ -149,10 +166,10 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): # Process audio using Whisper's feature extractor inputs = self._feature_extractor( audio_array, - sampling_rate=16000, + sampling_rate=_MODEL_SAMPLE_RATE, return_tensors="np", padding="max_length", - max_length=8 * 16000, + max_length=8 * _MODEL_SAMPLE_RATE, truncation=True, do_normalize=True, ) @@ -172,7 +189,7 @@ class LocalSmartTurnAnalyzerV3(BaseSmartTurn): if self._log_data: suffix = "_complete" if prediction == 1 else "_incomplete" - self._write_audio_to_wav(audio_for_logging, sample_rate=16000, suffix=suffix) + self._write_audio_to_wav(audio_for_logging, sample_rate=actual_rate, suffix=suffix) return { "prediction": prediction, diff --git a/src/pipecat/extensions/voicemail/voicemail_detector.py b/src/pipecat/extensions/voicemail/voicemail_detector.py index 7e22e535a..470f5dd54 100644 --- a/src/pipecat/extensions/voicemail/voicemail_detector.py +++ b/src/pipecat/extensions/voicemail/voicemail_detector.py @@ -368,7 +368,7 @@ class ClassificationProcessor(FrameProcessor): await self._voicemail_notifier.notify() # Clear buffered TTS frames # Interrupt the current pipeline to stop any ongoing processing - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() # Set the voicemail event to trigger the voicemail handler self._voicemail_event.clear() diff --git a/src/pipecat/frames/frames.py b/src/pipecat/frames/frames.py index 2d4bbc65e..390eb93dd 100644 --- a/src/pipecat/frames/frames.py +++ b/src/pipecat/frames/frames.py @@ -11,10 +11,8 @@ including data frames, system frames, and control frames for audio, video, text, and LLM processing. """ -import asyncio import time from dataclasses import dataclass, field -from enum import Enum from typing import ( TYPE_CHECKING, Any, @@ -36,12 +34,15 @@ from pipecat.audio.turn.base_turn_analyzer import BaseTurnParams from pipecat.audio.vad.vad_analyzer import VADParams from pipecat.metrics.metrics import MetricsData from pipecat.transcriptions.language import Language +from pipecat.utils.text.base_text_aggregator import AggregationType from pipecat.utils.time import nanoseconds_to_str from pipecat.utils.utils import obj_count, obj_id if TYPE_CHECKING: from pipecat.processors.aggregators.llm_context import LLMContext, NotGiven from pipecat.processors.frame_processor import FrameProcessor + from pipecat.services.settings import ServiceSettings + from pipecat.utils.context.llm_context_summarization import LLMContextSummaryConfig from pipecat.utils.tracing.tracing_context import TracingContext @@ -392,16 +393,6 @@ class LLMTextFrame(TextFrame): self.includes_inter_frame_spaces = True -class AggregationType(str, Enum): - """Built-in aggregation strings.""" - - SENTENCE = "sentence" - WORD = "word" - - def __str__(self): - return self.value - - @dataclass class AggregatedTextFrame(TextFrame): """Text frame representing an aggregation of TextFrames. @@ -1149,24 +1140,9 @@ class InterruptionFrame(SystemFrame): This frame is used to interrupt the pipeline. For example, when a user starts speaking to cancel any in-progress bot output. It can also be pushed by any processor. - - Parameters: - event: Optional event set when the frame has fully traversed the - pipeline. - """ - event: Optional[asyncio.Event] = None - - def complete(self): - """Signal that this interruption has been fully processed. - - Called automatically when the frame reaches the pipeline sink, or - manually when the frame is consumed before reaching it (e.g. when - the user is muted). - """ - if self.event: - self.event.set() + pass @dataclass @@ -1833,16 +1809,11 @@ class InterruptionTaskFrame(TaskFrame): """Frame indicating the pipeline should be interrupted. This frame should be pushed upstream to indicate the pipeline should be - interrupted. The pipeline task converts this into an `InterruptionFrame` and - sends it downstream. The `event` is passed to the `InterruptionFrame` so it - can signal when the interruption has fully traversed the pipeline. - - Parameters: - event: Optional event passed to the corresponding `InterruptionFrame`. - + interrupted. The pipeline task converts this into an `InterruptionFrame` + and sends it downstream. """ - event: Optional[asyncio.Event] = None + pass @dataclass @@ -1918,6 +1889,29 @@ class StopFrame(ControlFrame, UninterruptibleFrame): pass +@dataclass +class BotConnectedFrame(SystemFrame): + """Frame indicating the bot has connected to the transport service. + + Pushed downstream by SFU transports (Daily, LiveKit, HeyGen, Tavus) + when the bot successfully joins the room. Non-SFU transports do not + emit this frame. + """ + + pass + + +@dataclass +class ClientConnectedFrame(SystemFrame): + """Frame indicating that a client has connected to the transport. + + Pushed downstream by the input transport when a client (participant) + connects. Used by observers to measure transport readiness timing. + """ + + pass + + @dataclass class OutputTransportReadyFrame(ControlFrame): """Frame indicating that the output transport is ready. @@ -1999,6 +1993,32 @@ class LLMFullResponseEndFrame(ControlFrame): self.skip_tts = None +@dataclass +class LLMAssistantPushAggregationFrame(ControlFrame): + """Frame that forces the LLM assistant aggregator to push its current aggregation to context. + + When received by ``LLMAssistantAggregator``, any text that has been accumulated + in the aggregation buffer is immediately committed to the conversation context as + an assistant message, without waiting for an ``LLMFullResponseEndFrame``. + """ + + +@dataclass +class LLMSummarizeContextFrame(ControlFrame): + """Frame requesting on-demand context summarization. + + Push this frame into the pipeline to trigger a manual context summarization. + + Parameters: + config: Optional per-request override for summary generation settings + (prompt, token budget, messages to keep). If ``None``, the + summarizer's default :class:`~pipecat.utils.context.llm_context_summarization.LLMContextSummaryConfig` + is used. + """ + + config: Optional["LLMContextSummaryConfig"] = None + + @dataclass class LLMContextSummaryRequestFrame(ControlFrame): """Frame requesting context summarization from an LLM service. @@ -2018,6 +2038,8 @@ class LLMContextSummaryRequestFrame(ControlFrame): the summary text. summarization_prompt: System prompt instructing the LLM how to generate the summary. + summarization_timeout: Maximum time in seconds for the LLM to generate a + summary. When None, a default timeout of 120s is applied. """ request_id: str @@ -2025,6 +2047,7 @@ class LLMContextSummaryRequestFrame(ControlFrame): min_messages_to_keep: int target_context_tokens: int summarization_prompt: str + summarization_timeout: Optional[float] = None @dataclass @@ -2117,16 +2140,24 @@ class TTSStoppedFrame(ControlFrame): @dataclass -class ServiceUpdateSettingsFrame(ControlFrame): +class ServiceUpdateSettingsFrame(ControlFrame, UninterruptibleFrame): """Base frame for updating service settings. - A control frame containing a request to update service settings. + Supports both a ``settings`` dict (for backward compatibility) and a + ``delta`` object. When both are provided, ``delta`` takes precedence. Parameters: settings: Dictionary of setting name to value mappings. + + .. deprecated:: 0.0.104 + Use ``delta`` with a typed settings object instead. + + delta: :class:`~pipecat.services.settings.ServiceSettings` delta-mode + object describing the fields to change. """ - settings: Mapping[str, Any] + settings: Mapping[str, Any] = field(default_factory=dict) + delta: Optional["ServiceSettings"] = None @dataclass diff --git a/src/pipecat/metrics/metrics.py b/src/pipecat/metrics/metrics.py index 98903483a..2030306e5 100644 --- a/src/pipecat/metrics/metrics.py +++ b/src/pipecat/metrics/metrics.py @@ -87,19 +87,44 @@ class TTSUsageMetricsData(MetricsData): value: int -class SmartTurnMetricsData(MetricsData): - """Metrics data for smart turn predictions. +class TextAggregationMetricsData(MetricsData): + """Text aggregation time metrics data. + + Measures the time from the first LLM token to the first complete sentence, + representing the latency cost of sentence aggregation in the TTS pipeline. + + Parameters: + value: Aggregation time in seconds. + """ + + value: float + + +class TurnMetricsData(MetricsData): + """Metrics data for turn detection predictions. Parameters: is_complete: Whether the turn is predicted to be complete. probability: Confidence probability of the turn completion prediction. - inference_time_ms: Time taken for inference in milliseconds. - server_total_time_ms: Total server processing time in milliseconds. - e2e_processing_time_ms: End-to-end processing time in milliseconds. + e2e_processing_time_ms: End-to-end processing time in milliseconds, + measured from VAD speech-to-silence transition to turn completion. """ is_complete: bool probability: float - inference_time_ms: float - server_total_time_ms: float e2e_processing_time_ms: float + + +class SmartTurnMetricsData(TurnMetricsData): + """Metrics data for smart turn predictions. + + .. deprecated:: 0.0.104 + Use :class:`TurnMetricsData` instead. This class will be removed in a future version. + + Parameters: + inference_time_ms: Time taken for inference in milliseconds. + server_total_time_ms: Total server processing time in milliseconds. + """ + + inference_time_ms: float = 0.0 + server_total_time_ms: float = 0.0 diff --git a/src/pipecat/observers/base_observer.py b/src/pipecat/observers/base_observer.py index 78e36fec8..70c79224a 100644 --- a/src/pipecat/observers/base_observer.py +++ b/src/pipecat/observers/base_observer.py @@ -100,3 +100,11 @@ class BaseObserver(BaseObject): data: The event data containing details about the frame transfer. """ pass + + async def on_pipeline_started(self): + """Called when the pipeline has fully started. + + Fired after the ``StartFrame`` has been processed by all processors + in the pipeline, including nested ``ParallelPipeline`` branches. + """ + pass diff --git a/src/pipecat/observers/loggers/metrics_log_observer.py b/src/pipecat/observers/loggers/metrics_log_observer.py index a36ab510e..7f4c1635c 100644 --- a/src/pipecat/observers/loggers/metrics_log_observer.py +++ b/src/pipecat/observers/loggers/metrics_log_observer.py @@ -24,6 +24,7 @@ from pipecat.metrics.metrics import ( SmartTurnMetricsData, TTFBMetricsData, TTSUsageMetricsData, + TurnMetricsData, ) from pipecat.observers.base_observer import BaseObserver, FramePushed @@ -37,7 +38,7 @@ class MetricsLogObserver(BaseObserver): - ProcessingMetricsData (General processing time) - LLMUsageMetricsData (Token usage statistics) - TTSUsageMetricsData (Text-to-Speech character counts) - - SmartTurnMetricsData (Turn prediction metrics) + - TurnMetricsData (Turn prediction metrics) This allows developers to track performance metrics, token usage, and other statistics throughout the pipeline. @@ -70,6 +71,17 @@ class MetricsLogObserver(BaseObserver): **kwargs: Additional arguments passed to parent class. """ super().__init__(**kwargs) + # Normalize deprecated types in include_metrics + if include_metrics and SmartTurnMetricsData in include_metrics: + import warnings + + warnings.warn( + "SmartTurnMetricsData is deprecated in include_metrics, " + "use TurnMetricsData instead.", + DeprecationWarning, + stacklevel=2, + ) + include_metrics = (include_metrics - {SmartTurnMetricsData}) | {TurnMetricsData} self._include_metrics = include_metrics self._frames_seen = set() @@ -144,8 +156,8 @@ class MetricsLogObserver(BaseObserver): logger.debug( f"📊 {processor_info} TTS USAGE{model_info}: {metrics_data.value} characters at {time_sec:.3f}s" ) - elif isinstance(metrics_data, SmartTurnMetricsData): - self._log_smart_turn(metrics_data, processor_info, model_info, time_sec) + elif isinstance(metrics_data, TurnMetricsData): + self._log_turn(metrics_data, processor_info, model_info, time_sec) else: # Generic fallback for unknown metrics types logger.debug( @@ -191,28 +203,27 @@ class MetricsLogObserver(BaseObserver): f"📊 {processor_info} LLM TOKEN USAGE{model_info}: {usage_str} at {time_sec:.2f}s" ) - def _log_smart_turn( + def _log_turn( self, - metrics_data: SmartTurnMetricsData, + metrics_data: TurnMetricsData, processor_info: str, model_info: str, time_sec: float, ): - """Log smart turn prediction metrics. + """Log turn prediction metrics. Args: - metrics_data: The smart turn metrics data. + metrics_data: The turn metrics data. processor_info: Formatted processor name string. model_info: Formatted model name string. time_sec: Timestamp in seconds. """ complete_str = "COMPLETE" if metrics_data.is_complete else "INCOMPLETE" + e2e_str = f"{metrics_data.e2e_processing_time_ms:.1f}ms" logger.debug( - f"📊 {processor_info} SMART TURN{model_info}: {complete_str} " + f"📊 {processor_info} TURN{model_info}: {complete_str} " f"(probability: {metrics_data.probability:.2%}, " - f"inference: {metrics_data.inference_time_ms:.1f}ms, " - f"server: {metrics_data.server_total_time_ms:.1f}ms, " - f"e2e: {metrics_data.e2e_processing_time_ms:.1f}ms) " + f"e2e: {e2e_str}) " f"at {time_sec:.2f}s" ) diff --git a/src/pipecat/observers/startup_timing_observer.py b/src/pipecat/observers/startup_timing_observer.py new file mode 100644 index 000000000..a1ea04d47 --- /dev/null +++ b/src/pipecat/observers/startup_timing_observer.py @@ -0,0 +1,328 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Observer for tracking pipeline startup timing. + +This module provides an observer that measures how long each processor's +``start()`` method takes during pipeline startup. It works by tracking +when a ``StartFrame`` arrives at a processor (``on_process_frame``) versus +when it leaves (``on_push_frame``), giving the exact ``start()`` duration +for each processor in the pipeline. + +It also measures transport timing — the time from ``StartFrame`` to the +first ``BotConnectedFrame`` (SFU transports only) and ``ClientConnectedFrame`` +— via a separate ``on_transport_timing_report`` event. + +Example:: + + observer = StartupTimingObserver() + + @observer.event_handler("on_startup_timing_report") + async def on_report(observer, report): + for t in report.processor_timings: + print(f"{t.processor_name}: {t.duration_secs:.3f}s") + + @observer.event_handler("on_transport_timing_report") + async def on_transport(observer, report): + if report.bot_connected_secs is not None: + print(f"Bot connected in {report.bot_connected_secs:.3f}s") + print(f"Client connected in {report.client_connected_secs:.3f}s") + + task = PipelineTask(pipeline, observers=[observer]) +""" + +import time +from dataclasses import dataclass +from typing import Dict, List, Optional, Tuple, Type + +from pydantic import BaseModel, Field + +from pipecat.frames.frames import BotConnectedFrame, ClientConnectedFrame, StartFrame +from pipecat.observers.base_observer import BaseObserver, FrameProcessed, FramePushed +from pipecat.pipeline.base_pipeline import BasePipeline +from pipecat.pipeline.pipeline import PipelineSource +from pipecat.processors.frame_processor import FrameProcessor + +# Internal pipeline types excluded from tracking by default. +_INTERNAL_TYPES = (PipelineSource, BasePipeline) + + +@dataclass +class _ArrivalInfo: + """Internal record of when a StartFrame arrived at a processor.""" + + processor: FrameProcessor + arrival_ts_ns: int + + +class ProcessorStartupTiming(BaseModel): + """Startup timing for a single processor. + + Parameters: + processor_name: The name of the processor. + start_offset_secs: Offset in seconds from the StartFrame to when this + processor's start() began. + duration_secs: How long the processor's start() took, in seconds. + """ + + processor_name: str + start_offset_secs: float + duration_secs: float + + +class StartupTimingReport(BaseModel): + """Report of startup timings for all measured processors. + + Parameters: + start_time: Unix timestamp when the first processor began starting. + total_duration_secs: Total wall-clock time from first to last processor start. + processor_timings: Per-processor timing data, in pipeline order. + """ + + start_time: float + total_duration_secs: float + processor_timings: List[ProcessorStartupTiming] = Field(default_factory=list) + + +class TransportTimingReport(BaseModel): + """Time from pipeline start to transport connection milestones. + + Parameters: + start_time: Unix timestamp of the StartFrame (pipeline start). + bot_connected_secs: Seconds from StartFrame to first BotConnectedFrame + (only set for SFU transports). + client_connected_secs: Seconds from StartFrame to first ClientConnectedFrame. + """ + + start_time: float + bot_connected_secs: Optional[float] = None + client_connected_secs: Optional[float] = None + + +class StartupTimingObserver(BaseObserver): + """Observer that measures processor startup times during pipeline initialization. + + Tracks how long each processor's ``start()`` method takes by measuring the + time between when a ``StartFrame`` arrives at a processor and when it is + pushed downstream. This captures WebSocket connections, API authentication, + model loading, and other initialization work. + + Also measures transport timing, the time from ``StartFrame`` to connection + milestones: + + - ``bot_connected_secs``: When the bot joins the transport room + (SFU transports only, triggered by ``BotConnectedFrame``). + - ``client_connected_secs``: When a remote participant connects + (triggered by ``ClientConnectedFrame``). + + By default, internal pipeline processors (``PipelineSource``, ``Pipeline``) + are excluded from the report. Pass ``processor_types`` to measure only + specific types. + + Event handlers available: + + - on_startup_timing_report: Called once after startup completes with the full + timing report. + - on_transport_timing_report: Called once when the first client connects with a + TransportTimingReport containing client_connected_secs and bot_connected_secs + (if available). + + Example:: + + observer = StartupTimingObserver( + processor_types=(STTService, TTSService) + ) + + @observer.event_handler("on_startup_timing_report") + async def on_report(observer, report): + for t in report.processor_timings: + logger.info(f"{t.processor_name}: {t.duration_secs:.3f}s") + + @observer.event_handler("on_transport_timing_report") + async def on_transport(observer, report): + if report.bot_connected_secs is not None: + logger.info(f"Bot connected in {report.bot_connected_secs:.3f}s") + logger.info(f"Client connected in {report.client_connected_secs:.3f}s") + + task = PipelineTask(pipeline, observers=[observer]) + + Args: + processor_types: Optional tuple of processor types to measure. If None, + all non-internal processors are measured. + """ + + def __init__( + self, + *, + processor_types: Optional[Tuple[Type[FrameProcessor], ...]] = None, + **kwargs, + ): + """Initialize the startup timing observer. + + Args: + processor_types: Optional tuple of processor types to measure. + If None, all non-internal processors are measured. + **kwargs: Additional arguments passed to parent class. + """ + super().__init__(**kwargs) + self._processor_types = processor_types + + # Map processor ID -> arrival info. + self._arrivals: Dict[int, _ArrivalInfo] = {} + + # Collected timings in pipeline order. + self._timings: List[ProcessorStartupTiming] = [] + + # Lock onto the first StartFrame we see (by frame ID). + self._start_frame_id: Optional[str] = None + + # Whether we've already emitted the startup timing report. + self._startup_timing_reported = False + + # Whether we've already measured transport timing. + self._transport_timing_reported = False + + # Timestamp (ns) when we first see a StartFrame arrive at a processor. + self._start_frame_arrival_ns: Optional[int] = None + + # Bot connected timing (stored for inclusion in the transport report). + self._bot_connected_secs: Optional[float] = None + + # Wall clock time when the StartFrame was first seen. + self._start_wall_clock: Optional[float] = None + + self._register_event_handler("on_startup_timing_report") + self._register_event_handler("on_transport_timing_report") + + def _should_track(self, processor: FrameProcessor) -> bool: + """Check if a processor should be tracked for timing. + + Args: + processor: The processor to check. + + Returns: + True if the processor matches the filter or no filter is set. + """ + if self._processor_types is not None: + return isinstance(processor, self._processor_types) + # Default: exclude internal pipeline plumbing. + return not isinstance(processor, _INTERNAL_TYPES) + + async def on_pipeline_started(self): + """Emit the startup timing report when the pipeline has fully started. + + Called by the ``PipelineTask`` after the ``StartFrame`` has been + processed by all processors, including nested ``ParallelPipeline`` + branches. + """ + if self._timings: + await self._emit_report() + + async def on_process_frame(self, data: FrameProcessed): + """Record when a StartFrame arrives at a processor. + + Args: + data: The frame processing event data. + """ + if self._startup_timing_reported: + return + + if not isinstance(data.frame, StartFrame): + return + + # Lock onto the first StartFrame. + if self._start_frame_id is None: + self._start_frame_id = data.frame.id + self._start_frame_arrival_ns = data.timestamp + self._start_wall_clock = time.time() + elif data.frame.id != self._start_frame_id: + return + + if self._should_track(data.processor): + self._arrivals[data.processor.id] = _ArrivalInfo( + processor=data.processor, arrival_ts_ns=data.timestamp + ) + + async def on_push_frame(self, data: FramePushed): + """Record when a StartFrame leaves a processor and compute the delta. + + Also handles ``BotConnectedFrame`` and ``ClientConnectedFrame`` to + measure transport timing. + + Args: + data: The frame push event data. + """ + if isinstance(data.frame, BotConnectedFrame): + self._handle_bot_connected(data) + return + + if isinstance(data.frame, ClientConnectedFrame): + await self._handle_client_connected(data) + return + + if self._startup_timing_reported: + return + + if not isinstance(data.frame, StartFrame): + return + + if self._start_frame_id is not None and data.frame.id != self._start_frame_id: + return + + arrival = self._arrivals.pop(data.source.id, None) + if arrival is None: + return + + duration_ns = data.timestamp - arrival.arrival_ts_ns + duration_secs = duration_ns / 1e9 + start_offset_secs = (arrival.arrival_ts_ns - self._start_frame_arrival_ns) / 1e9 + + self._timings.append( + ProcessorStartupTiming( + processor_name=arrival.processor.name, + start_offset_secs=start_offset_secs, + duration_secs=duration_secs, + ) + ) + + def _handle_bot_connected(self, data: FramePushed): + """Record bot connected timing on first BotConnectedFrame.""" + if self._bot_connected_secs is not None or self._start_frame_arrival_ns is None: + return + + delta_ns = data.timestamp - self._start_frame_arrival_ns + self._bot_connected_secs = delta_ns / 1e9 + + async def _handle_client_connected(self, data: FramePushed): + """Emit transport timing report on first ClientConnectedFrame.""" + if self._transport_timing_reported or self._start_frame_arrival_ns is None: + return + + self._transport_timing_reported = True + delta_ns = data.timestamp - self._start_frame_arrival_ns + client_connected_secs = delta_ns / 1e9 + report = TransportTimingReport( + start_time=self._start_wall_clock or 0.0, + bot_connected_secs=self._bot_connected_secs, + client_connected_secs=client_connected_secs, + ) + await self._call_event_handler("on_transport_timing_report", report) + + async def _emit_report(self): + """Build and emit the startup timing report.""" + if self._startup_timing_reported: + return + self._startup_timing_reported = True + + total = sum(t.duration_secs for t in self._timings) + + report = StartupTimingReport( + start_time=self._start_wall_clock or 0.0, + total_duration_secs=total, + processor_timings=self._timings, + ) + + await self._call_event_handler("on_startup_timing_report", report) diff --git a/src/pipecat/pipeline/task.py b/src/pipecat/pipeline/task.py index 2cfe26606..e795961a1 100644 --- a/src/pipecat/pipeline/task.py +++ b/src/pipecat/pipeline/task.py @@ -330,6 +330,7 @@ class PipelineTask(BasePipelineTask): # RTVI support self._rtvi = None + prepend_rtvi = False external_rtvi = self._find_processor(pipeline, RTVIProcessor) external_observer_found = any(isinstance(o, RTVIObserver) for o in observers) @@ -352,6 +353,7 @@ class PipelineTask(BasePipelineTask): elif enable_rtvi: self._rtvi = rtvi_processor or RTVIProcessor() observers.append(self._rtvi.create_rtvi_observer(params=rtvi_observer_params)) + prepend_rtvi = True if self._rtvi: # Automatically call RTVIProcessor.set_bot_ready() @@ -387,9 +389,12 @@ class PipelineTask(BasePipelineTask): # source allows us to receive and react to upstream frames, and the sink # allows us to receive and react to downstream frames. source = PipelineSource(self._source_push_frame, name=f"{self}::Source") - sink = PipelineSink(self._sink_push_frame, name=f"{self}::Sink") - processors = [self._rtvi, pipeline] if self._rtvi else [pipeline] - self._pipeline = Pipeline(processors, source=source, sink=sink) + self._sink = PipelineSink(self._sink_push_frame, name=f"{self}::Sink") + # Only prepend the RTVIProcessor if we created it ourselves. When the + # user already placed it inside their pipeline we must not insert it + # again or it will appear twice in the frame chain. + processors = [self._rtvi, pipeline] if prepend_rtvi else [pipeline] + self._pipeline = Pipeline(processors, source=source, sink=self._sink) # The task observer acts as a proxy to the provided observers. This way, # we only need to pass a single observer (using the StartFrame) which @@ -620,26 +625,43 @@ class PipelineTask(BasePipelineTask): self._finished = True logger.debug(f"Pipeline task {self} has finished") - async def queue_frame(self, frame: Frame): - """Queue a single frame to be pushed down the pipeline. + async def queue_frame( + self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM + ): + """Queue a single frame to be pushed through the pipeline. + + Downstream frames are pushed from the beginning of the pipeline. + Upstream frames are pushed from the end of the pipeline. Args: frame: The frame to be processed. + direction: The direction to push the frame. Defaults to downstream. """ - await self._push_queue.put(frame) + if direction == FrameDirection.DOWNSTREAM: + await self._push_queue.put(frame) + else: + await self._sink.queue_frame(frame, direction) - async def queue_frames(self, frames: Iterable[Frame] | AsyncIterable[Frame]): - """Queues multiple frames to be pushed down the pipeline. + async def queue_frames( + self, + frames: Iterable[Frame] | AsyncIterable[Frame], + direction: FrameDirection = FrameDirection.DOWNSTREAM, + ): + """Queue multiple frames to be pushed through the pipeline. + + Downstream frames are pushed from the beginning of the pipeline. + Upstream frames are pushed from the end of the pipeline. Args: frames: An iterable or async iterable of frames to be processed. + direction: The direction to push the frames. Defaults to downstream. """ if isinstance(frames, AsyncIterable): async for frame in frames: - await self.queue_frame(frame) + await self.queue_frame(frame, direction) elif isinstance(frames, Iterable): for frame in frames: - await self.queue_frame(frame) + await self.queue_frame(frame, direction) async def _cancel(self, *, reason: Optional[str] = None): """Internal cancellation logic for the pipeline task. @@ -870,7 +892,7 @@ class PipelineTask(BasePipelineTask): # pipeline. This is in case the push task is blocked waiting for a # pipeline-ending frame to finish traversing the pipeline. logger.debug(f"{self}: received interruption task frame {frame}") - await self._pipeline.queue_frame(InterruptionFrame(event=frame.event)) + await self._pipeline.queue_frame(InterruptionFrame()) elif isinstance(frame, ErrorFrame): await self._call_event_handler("on_pipeline_error", frame) if frame.fatal: @@ -893,6 +915,7 @@ class PipelineTask(BasePipelineTask): if isinstance(frame, StartFrame): await self._call_event_handler("on_pipeline_started", frame) + await self._observer.on_pipeline_started() # Start heartbeat tasks now that StartFrame has been processed # by all processors in the pipeline @@ -909,8 +932,6 @@ class PipelineTask(BasePipelineTask): self._pipeline_end_event.set() elif isinstance(frame, CancelFrame): self._pipeline_end_event.set() - elif isinstance(frame, InterruptionFrame): - frame.complete() elif isinstance(frame, HeartbeatFrame): await self._heartbeat_queue.put(frame) diff --git a/src/pipecat/pipeline/task_observer.py b/src/pipecat/pipeline/task_observer.py index 4d33fd60e..dc2040e07 100644 --- a/src/pipecat/pipeline/task_observer.py +++ b/src/pipecat/pipeline/task_observer.py @@ -39,6 +39,12 @@ class Proxy: observer: BaseObserver +class _PipelineStartedSignal: + """Internal sentinel queued to observers when the pipeline has started.""" + + pass + + class TaskObserver(BaseObserver): """Proxy observer that manages multiple observers without blocking the pipeline. @@ -129,6 +135,10 @@ class TaskObserver(BaseObserver): for proxy in self._proxies: await proxy.cleanup() + async def on_pipeline_started(self): + """Forward pipeline started signal to all managed observers.""" + await self._send_to_proxy(_PipelineStartedSignal()) + async def on_process_frame(self, data: FrameProcessed): """Queue frame data for all managed observers. @@ -186,7 +196,9 @@ class TaskObserver(BaseObserver): while True: data = await queue.get() - if isinstance(data, FramePushed): + if isinstance(data, _PipelineStartedSignal): + await observer.on_pipeline_started() + elif isinstance(data, FramePushed): if on_push_frame_deprecated: await observer.on_push_frame( data.source, data.destination, data.frame, data.direction, data.timestamp diff --git a/src/pipecat/processors/aggregators/dtmf_aggregator.py b/src/pipecat/processors/aggregators/dtmf_aggregator.py index 1b9c59158..ea56ba6fc 100644 --- a/src/pipecat/processors/aggregators/dtmf_aggregator.py +++ b/src/pipecat/processors/aggregators/dtmf_aggregator.py @@ -104,7 +104,7 @@ class DTMFAggregator(FrameProcessor): # For first digit, schedule interruption. if is_first_digit: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() # Check for immediate flush conditions if frame.button == self._termination_digit: diff --git a/src/pipecat/processors/aggregators/llm_context_summarizer.py b/src/pipecat/processors/aggregators/llm_context_summarizer.py index a1a613ccc..54879a8bb 100644 --- a/src/pipecat/processors/aggregators/llm_context_summarizer.py +++ b/src/pipecat/processors/aggregators/llm_context_summarizer.py @@ -6,8 +6,10 @@ """This module defines a summarizer for managing LLM context summarization.""" +import asyncio import uuid -from typing import Optional +from dataclasses import dataclass +from typing import TYPE_CHECKING, Optional from loguru import logger @@ -17,28 +19,68 @@ from pipecat.frames.frames import ( LLMContextSummaryRequestFrame, LLMContextSummaryResultFrame, LLMFullResponseStartFrame, + LLMSummarizeContextFrame, ) -from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage from pipecat.utils.asyncio.task_manager import BaseTaskManager from pipecat.utils.base_object import BaseObject from pipecat.utils.context.llm_context_summarization import ( - LLMContextSummarizationConfig, + DEFAULT_SUMMARIZATION_TIMEOUT, + LLMAutoContextSummarizationConfig, LLMContextSummarizationUtil, + LLMContextSummaryConfig, ) +if TYPE_CHECKING: + from pipecat.services.llm_service import LLMService + + +@dataclass +class SummaryAppliedEvent: + """Event data emitted when context summarization completes successfully. + + Parameters: + original_message_count: Number of messages before summarization. + new_message_count: Number of messages after summarization. + summarized_message_count: Number of messages that were compressed + into the summary. + preserved_message_count: Number of recent messages preserved + uncompressed. + """ + + original_message_count: int + new_message_count: int + summarized_message_count: int + preserved_message_count: int + class LLMContextSummarizer(BaseObject): """Summarizer for managing LLM context summarization. - This class manages automatic context summarization when token or message - limits are reached. It monitors the LLM context size, triggers - summarization requests, and applies the results to compress conversation history. + This class manages context summarization, either automatically when token or + message limits are reached, or on-demand when an ``LLMSummarizeContextFrame`` + is received. It monitors the LLM context size, triggers summarization requests, + and applies the results to compress conversation history. + + When ``auto_trigger=True`` (the default), summarization is triggered + automatically based on the configured thresholds in + ``LLMAutoContextSummarizationConfig``. When ``auto_trigger=False``, + threshold checks are skipped and summarization only happens when an + ``LLMSummarizeContextFrame`` is explicitly pushed into the pipeline. + + Both modes can coexist: set ``auto_trigger=True`` and also push + ``LLMSummarizeContextFrame`` at any time to force an immediate summarization + (subject to the ``_summarization_in_progress`` guard). Event handlers available: - on_request_summarization: Emitted when summarization should be triggered. The aggregator should broadcast this frame to the LLM service. + - on_summary_applied: Emitted after a summary has been successfully applied + to the context. Receives a SummaryAppliedEvent with metrics about the + compression. + Example:: @summarizer.event_handler("on_request_summarization") @@ -49,24 +91,36 @@ class LLMContextSummarizer(BaseObject): context=frame.context, ... ) + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event: SummaryAppliedEvent): + logger.info(f"Compressed {event.original_message_count} -> {event.new_message_count} messages") """ def __init__( self, *, context: LLMContext, - config: Optional[LLMContextSummarizationConfig] = None, + config: Optional[LLMAutoContextSummarizationConfig] = None, + auto_trigger: bool = True, ): """Initialize the context summarizer. Args: context: The LLM context to monitor and summarize. - config: Configuration for summarization behavior. If None, uses default config. + config: Auto-summarization configuration controlling both trigger + thresholds and default summary generation parameters. If None, + uses default ``LLMAutoContextSummarizationConfig`` values. + auto_trigger: Whether to automatically trigger summarization when + thresholds are reached. When False, summarization only happens + when an ``LLMSummarizeContextFrame`` is pushed into the pipeline. + Defaults to True. """ super().__init__() self._context = context - self._config = config or LLMContextSummarizationConfig() + self._auto_config = config or LLMAutoContextSummarizationConfig() + self._auto_trigger = auto_trigger self._task_manager: Optional[BaseTaskManager] = None @@ -74,6 +128,7 @@ class LLMContextSummarizer(BaseObject): self._pending_summary_request_id: Optional[str] = None self._register_event_handler("on_request_summarization", sync=True) + self._register_event_handler("on_summary_applied") @property def task_manager(self) -> BaseTaskManager: @@ -103,6 +158,8 @@ class LLMContextSummarizer(BaseObject): """ if isinstance(frame, LLMFullResponseStartFrame): await self._handle_llm_response_start(frame) + elif isinstance(frame, LLMSummarizeContextFrame): + await self._handle_manual_summarization_request(frame) elif isinstance(frame, LLMContextSummaryResultFrame): await self._handle_summary_result(frame) elif isinstance(frame, InterruptionFrame): @@ -117,12 +174,24 @@ class LLMContextSummarizer(BaseObject): if self._should_summarize(): await self._request_summarization() - async def _handle_interruption(self): - """Handle interruption by canceling summarization in progress. + async def _handle_manual_summarization_request(self, frame: LLMSummarizeContextFrame): + """Handle an explicit on-demand summarization request. + + Reuses the same ``_request_summarization()`` code path as auto mode, + so bookkeeping (``_summarization_in_progress``, + ``_pending_summary_request_id``) is always updated correctly. Args: - frame: The interruption frame. + frame: The manual summarization request frame, optionally carrying + a per-request :class:`~pipecat.utils.context.llm_context_summarization.LLMContextSummaryConfig`. """ + if self._summarization_in_progress: + logger.debug(f"{self}: Summarization already in progress, ignoring manual request") + return + await self._request_summarization(config_override=frame.config) + + async def _handle_interruption(self): + """Handle interruption by canceling summarization in progress.""" # Reset summarization state to allow new requests. This is necessary because # the request frame (LLMContextSummaryRequestFrame) may have been cancelled # during interruption. We preserve _pending_summary_request_id to handle the @@ -145,13 +214,17 @@ class LLMContextSummarizer(BaseObject): Returns: True if all conditions are met: + - ``auto_trigger`` is enabled - No summarization currently in progress - AND either: - - Token count exceeds max_context_tokens - - OR message count exceeds max_unsummarized_messages since last summary + - Token count exceeds ``max_context_tokens`` + - OR message count exceeds ``max_unsummarized_messages`` since last summary """ logger.trace(f"{self}: Checking if context summarization is needed") + if not self._auto_trigger: + return False + if self._summarization_in_progress: logger.debug(f"{self}: Summarization already in progress") return False @@ -161,20 +234,20 @@ class LLMContextSummarizer(BaseObject): num_messages = len(self._context.messages) # Check if we've reached the token limit - token_limit = self._config.max_context_tokens + token_limit = self._auto_config.max_context_tokens token_limit_exceeded = total_tokens >= token_limit # Check if we've exceeded max unsummarized messages messages_since_summary = len(self._context.messages) - 1 message_threshold_exceeded = ( - messages_since_summary >= self._config.max_unsummarized_messages + messages_since_summary >= self._auto_config.max_unsummarized_messages ) logger.trace( f"{self}: Context has {num_messages} messages, " f"~{total_tokens} tokens (limit: {token_limit}), " f"{messages_since_summary} messages since last summary " - f"(message threshold: {self._config.max_unsummarized_messages})" + f"(message threshold: {self._auto_config.max_unsummarized_messages})" ) # Trigger if either limit is exceeded @@ -189,21 +262,30 @@ class LLMContextSummarizer(BaseObject): reason.append(f"~{total_tokens} tokens (>={token_limit} limit)") if message_threshold_exceeded: reason.append( - f"{messages_since_summary} messages (>={self._config.max_unsummarized_messages} threshold)" + f"{messages_since_summary} messages (>={self._auto_config.max_unsummarized_messages} threshold)" ) logger.debug(f"{self}: ✓ Summarization needed - {', '.join(reason)}") return True - async def _request_summarization(self): + async def _request_summarization( + self, config_override: Optional[LLMContextSummaryConfig] = None + ): """Request context summarization from LLM service. - Creates a summarization request frame and emits it via event handler. + Creates a summarization request frame and either handles it directly + using a dedicated LLM (if configured) or emits it via event handler + for the pipeline's primary LLM. Tracks the request ID to match async responses and prevent race conditions. + + Args: + config_override: Optional per-request summary configuration. If provided, + overrides the default summary generation settings from + ``self._auto_config.summary_config``. """ # Generate unique request ID request_id = str(uuid.uuid4()) - min_keep = self._config.min_messages_after_summary + summary_config = config_override or self._auto_config.summary_config # Mark summarization in progress self._summarization_in_progress = True @@ -215,13 +297,66 @@ class LLMContextSummarizer(BaseObject): request_frame = LLMContextSummaryRequestFrame( request_id=request_id, context=self._context, - min_messages_to_keep=min_keep, - target_context_tokens=self._config.target_context_tokens, - summarization_prompt=self._config.summary_prompt, + min_messages_to_keep=summary_config.min_messages_after_summary, + target_context_tokens=summary_config.target_context_tokens, + summarization_prompt=summary_config.summary_prompt, + summarization_timeout=summary_config.summarization_timeout, ) - # Emit event for aggregator to broadcast - await self._call_event_handler("on_request_summarization", request_frame) + if summary_config.llm: + # Use dedicated LLM directly — no need to involve the pipeline + self.task_manager.create_task( + self._generate_summary_with_dedicated_llm(summary_config.llm, request_frame), + f"{self}-dedicated-llm-summary", + ) + else: + # Emit event for aggregator to broadcast to the pipeline LLM + await self._call_event_handler("on_request_summarization", request_frame) + + async def _generate_summary_with_dedicated_llm( + self, llm: "LLMService", frame: LLMContextSummaryRequestFrame + ): + """Generate summary using a dedicated LLM service. + + Calls the dedicated LLM's _generate_summary directly and feeds the + result back through _handle_summary_result, bypassing the pipeline. + + Args: + llm: The dedicated LLM service to use for summarization. + frame: The summarization request frame. + """ + timeout = frame.summarization_timeout or DEFAULT_SUMMARIZATION_TIMEOUT + + try: + summary, last_index = await asyncio.wait_for( + llm._generate_summary(frame), + timeout=timeout, + ) + result_frame = LLMContextSummaryResultFrame( + request_id=frame.request_id, + summary=summary, + last_summarized_index=last_index, + ) + except asyncio.TimeoutError: + error = f"Context summarization timed out after {timeout}s" + logger.error(f"{self}: {error}") + result_frame = LLMContextSummaryResultFrame( + request_id=frame.request_id, + summary="", + last_summarized_index=-1, + error=error, + ) + except Exception as e: + error = f"Error generating context summary: {e}" + logger.error(f"{self}: {error}") + result_frame = LLMContextSummaryResultFrame( + request_id=frame.request_id, + summary="", + last_summarized_index=-1, + error=error, + ) + + await self._handle_summary_result(result_frame) async def _handle_summary_result(self, frame: LLMContextSummaryResultFrame): """Handle context summarization result from LLM service. @@ -234,7 +369,9 @@ class LLMContextSummarizer(BaseObject): """ logger.debug(f"{self}: Received summary result (request_id={frame.request_id})") - # Check if this is the result we're waiting for + # Check if this is the result we're waiting for. Both auto and manual + # summarization set _pending_summary_request_id via _request_summarization(), + # so this check always applies. if frame.request_id != self._pending_summary_request_id: logger.debug(f"{self}: Ignoring stale summary result (request_id={frame.request_id})") return @@ -271,7 +408,7 @@ class LLMContextSummarizer(BaseObject): if last_summarized_index >= len(self._context.messages): return False - min_keep = self._config.min_messages_after_summary + min_keep = self._auto_config.summary_config.min_messages_after_summary remaining = len(self._context.messages) - 1 - last_summarized_index if remaining < min_keep: return False @@ -288,16 +425,29 @@ class LLMContextSummarizer(BaseObject): summary: The generated summary text. last_summarized_index: Index of the last message that was summarized. """ + config = self._auto_config.summary_config messages = self._context.messages - # Find the first system message to preserve - first_system_msg = next((msg for msg in messages if msg.get("role") == "system"), None) + # Find the first system message to preserve. LLMSpecificMessage instances are excluded + # because they are not dict-like and never represent a system message; they hold + # service-specific metadata (e.g. thinking blocks) that is always paired with a + # standard message. + first_system_msg = next( + ( + msg + for msg in messages + if not isinstance(msg, LLMSpecificMessage) and msg.get("role") == "system" + ), + None, + ) # Get recent messages to keep recent_messages = messages[last_summarized_index + 1 :] - # Create summary message as an assistant message - summary_message = {"role": "assistant", "content": f"Conversation summary: {summary}"} + # Create summary message as a user message (the summary is context + # provided *to* the assistant, not something the assistant said) + summary_content = config.summary_message_template.format(summary=summary) + summary_message = {"role": "user", "content": summary_content} # Reconstruct context new_messages = [] @@ -307,9 +457,23 @@ class LLMContextSummarizer(BaseObject): new_messages.extend(recent_messages) # Update context + original_message_count = len(messages) + num_system_preserved = 1 if first_system_msg else 0 self._context.set_messages(new_messages) + # Messages actually summarized = index range minus the preserved system message + summarized_count = last_summarized_index + 1 - num_system_preserved + logger.info( - f"{self}: Applied context summary, compressed {last_summarized_index + 1} messages " - f"into summary. Context now has {len(new_messages)} messages (was {len(messages)})" + f"{self}: Applied context summary, compressed {summarized_count} messages " + f"into summary. Context now has {len(new_messages)} messages (was {original_message_count})" ) + + # Emit event for observability + event = SummaryAppliedEvent( + original_message_count=original_message_count, + new_message_count=len(new_messages), + summarized_message_count=summarized_count, + preserved_message_count=len(recent_messages) + num_system_preserved, + ) + await self._call_event_handler("on_summary_applied", event) diff --git a/src/pipecat/processors/aggregators/llm_response.py b/src/pipecat/processors/aggregators/llm_response.py index 44e5ce252..7c246b209 100644 --- a/src/pipecat/processors/aggregators/llm_response.py +++ b/src/pipecat/processors/aggregators/llm_response.py @@ -581,7 +581,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator): logger.debug( "Interruption conditions met - pushing interruption and aggregation" ) - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self._process_aggregation() else: logger.debug("Interruption conditions not met - not pushing aggregation") diff --git a/src/pipecat/processors/aggregators/llm_response_universal.py b/src/pipecat/processors/aggregators/llm_response_universal.py index e5884a868..cf6c81e5f 100644 --- a/src/pipecat/processors/aggregators/llm_response_universal.py +++ b/src/pipecat/processors/aggregators/llm_response_universal.py @@ -35,6 +35,7 @@ from pipecat.frames.frames import ( InputAudioRawFrame, InterimTranscriptionFrame, InterruptionFrame, + LLMAssistantPushAggregationFrame, LLMContextAssistantTimestampFrame, LLMContextFrame, LLMContextSummaryRequestFrame, @@ -78,7 +79,10 @@ from pipecat.turns.user_stop import BaseUserTurnStopStrategy, UserTurnStoppedPar from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionConfig from pipecat.turns.user_turn_controller import UserTurnController from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies, UserTurnStrategies -from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig +from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, + LLMContextSummarizationConfig, +) from pipecat.utils.string import TextPartForConcatenation, concatenate_aggregated_text from pipecat.utils.time import time_now_iso8601 @@ -124,18 +128,54 @@ class LLMAssistantAggregatorParams: in text frames by adding spaces between tokens. This parameter is ignored when used with the newer LLMAssistantAggregator, which handles word spacing automatically. - enable_context_summarization: Enable automatic context summarization when token - limits are reached (disabled by default). When enabled, older conversation - messages are automatically compressed into summaries to manage context size. - context_summarization_config: Configuration for context summarization behavior. - Controls thresholds, message preservation, and summarization prompts. If None - and summarization is enabled, uses default configuration values. + enable_auto_context_summarization: Enable automatic context summarization when token + or message-count limits are reached (disabled by default). When enabled, + older conversation messages are automatically compressed into summaries to + manage context size. + auto_context_summarization_config: Configuration for automatic context + summarization. Controls trigger thresholds, message preservation, and + summarization prompts. If None, uses default + ``LLMAutoContextSummarizationConfig`` values. """ expect_stripped_words: bool = True - enable_context_summarization: bool = False + enable_auto_context_summarization: bool = False + auto_context_summarization_config: Optional[LLMAutoContextSummarizationConfig] = None + + # --------------------------------------------------------------------------- + # Deprecated field names — kept for backward compatibility. + # Use enable_auto_context_summarization and auto_context_summarization_config instead. + # --------------------------------------------------------------------------- + enable_context_summarization: Optional[bool] = None context_summarization_config: Optional[LLMContextSummarizationConfig] = None + def __post_init__(self): + if self.enable_context_summarization is not None: + warnings.warn( + "LLMAssistantAggregatorParams.enable_context_summarization is deprecated. " + "Use enable_auto_context_summarization instead.", + DeprecationWarning, + stacklevel=2, + ) + self.enable_auto_context_summarization = self.enable_context_summarization + self.enable_context_summarization = None + + if self.context_summarization_config is not None: + warnings.warn( + "LLMAssistantAggregatorParams.context_summarization_config is deprecated. " + "Use auto_context_summarization_config (LLMAutoContextSummarizationConfig) instead.", + DeprecationWarning, + stacklevel=2, + ) + if isinstance(self.context_summarization_config, LLMContextSummarizationConfig): + self.auto_context_summarization_config = ( + self.context_summarization_config.to_auto_config() + ) + else: + # Accept LLMAutoContextSummarizationConfig passed to the deprecated field + self.auto_context_summarization_config = self.context_summarization_config # type: ignore[assignment] + self.context_summarization_config = None + @dataclass class UserTurnStoppedMessage: @@ -461,6 +501,10 @@ class LLMUserAggregator(LLMContextAggregator): await self.push_frame(frame, direction) elif isinstance(frame, TranscriptionFrame): await self._handle_transcription(frame) + elif isinstance(frame, (InterimTranscriptionFrame, TranslationFrame)): + # Interim transcriptions and translations are consumed here + # and not pushed downstream, same as final TranscriptionFrame. + pass elif isinstance(frame, LLMRunFrame): await self._handle_llm_run(frame) elif isinstance(frame, LLMMessagesAppendFrame): @@ -564,12 +608,6 @@ class LLMUserAggregator(LLMContextAggregator): if should_mute_frame: logger.trace(f"{frame.name} suppressed - user currently muted") - # When muted, the InterruptionFrame won't propagate further and - # will never reach the pipeline sink. Complete it here so - # push_interruption_task_frame_and_wait() doesn't hang. - if should_mute_frame and isinstance(frame, InterruptionFrame): - frame.complete() - should_mute_next_time = False for s in self._params.user_mute_strategies: should_mute_next_time |= await s.process_frame(frame) @@ -598,6 +636,9 @@ class LLMUserAggregator(LLMContextAggregator): async def _handle_llm_messages_update(self, frame: LLMMessagesUpdateFrame): self.set_messages(frame.messages) + if self._params.filter_incomplete_user_turns: + config = self._params.user_turn_completion_config or UserTurnCompletionConfig() + self._context.add_message({"role": "system", "content": config.completion_instructions}) if frame.run_llm: await self.push_context_frame() @@ -690,7 +731,7 @@ class LLMUserAggregator(LLMContextAggregator): await self._user_idle_controller.process_frame(UserStartedSpeakingFrame()) if params.enable_interruptions and self._allow_interruptions: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self._call_event_handler("on_user_turn_started", strategy) @@ -820,16 +861,18 @@ class LLMAssistantAggregator(LLMContextAggregator): self._thought_aggregation: List[TextPartForConcatenation] = [] self._thought_start_time: str = "" - # Context summarization - self._summarizer: Optional[LLMContextSummarizer] = None - if self._params.enable_context_summarization: - self._summarizer = LLMContextSummarizer( - context=self._context, - config=self._params.context_summarization_config, - ) - self._summarizer.add_event_handler( - "on_request_summarization", self._on_request_summarization - ) + # Context summarization — always create the summarizer so that manually + # pushed LLMSummarizeContextFrame frames are always handled. + # Auto-triggering based on thresholds is only enabled when + # enable_auto_context_summarization is True. + self._summarizer: Optional[LLMContextSummarizer] = LLMContextSummarizer( + context=self._context, + config=self._params.auto_context_summarization_config, + auto_trigger=self._params.enable_auto_context_summarization, + ) + self._summarizer.add_event_handler( + "on_request_summarization", self._on_request_summarization + ) self._register_event_handler("on_assistant_turn_started") self._register_event_handler("on_assistant_turn_stopped") @@ -875,6 +918,8 @@ class LLMAssistantAggregator(LLMContextAggregator): elif isinstance(frame, (EndFrame, CancelFrame)): await self._handle_end_or_cancel(frame) await self.push_frame(frame, direction) + elif isinstance(frame, LLMAssistantPushAggregationFrame): + await self.push_aggregation() elif isinstance(frame, LLMFullResponseStartFrame): await self._handle_llm_start(frame) elif isinstance(frame, LLMFullResponseEndFrame): diff --git a/src/pipecat/processors/filters/stt_mute_filter.py b/src/pipecat/processors/filters/stt_mute_filter.py index f5d008e28..9f522a20d 100644 --- a/src/pipecat/processors/filters/stt_mute_filter.py +++ b/src/pipecat/processors/filters/stt_mute_filter.py @@ -234,12 +234,6 @@ class STTMuteFilter(FrameProcessor): await self.push_frame(frame, direction) else: logger.trace(f"{frame.__class__.__name__} suppressed - STT currently muted") - - # When muted, the InterruptionFrame won't propagate further - # and will never reach the pipeline sink. Complete it here so - # push_interruption_task_frame_and_wait() doesn't hang. - if isinstance(frame, InterruptionFrame): - frame.complete() else: # Pass all other frames through await self.push_frame(frame, direction) diff --git a/src/pipecat/processors/frame_processor.py b/src/pipecat/processors/frame_processor.py index bcdb2d57b..f3d9fbdea 100644 --- a/src/pipecat/processors/frame_processor.py +++ b/src/pipecat/processors/frame_processor.py @@ -41,7 +41,6 @@ from pipecat.frames.frames import ( FrameProcessorResumeFrame, FrameProcessorResumeUrgentFrame, InterruptionFrame, - InterruptionTaskFrame, StartFrame, SystemFrame, UninterruptibleFrame, @@ -240,10 +239,6 @@ class FrameProcessor(BaseObject): self.__process_frame_task: Optional[asyncio.Task] = None self.__process_current_frame: Optional[Frame] = None - # Set while awaiting push_interruption_task_frame_and_wait() so that - # _start_interruption() knows not to cancel the process task. - self._wait_for_interruption = False - # Frame processor events. self._register_event_handler("on_before_process_frame", sync=True) self._register_event_handler("on_after_process_frame", sync=True) @@ -329,7 +324,7 @@ class FrameProcessor(BaseObject): warnings.simplefilter("always") warnings.warn( "`FrameProcessor.interruptions_allowed` is deprecated. " - "Use `LLMUserAggregator`'s new `user_mute_strategies` parameter instead.", + "Use `LLMUserAggregator`'s new `user_mute_strategies` parameter instead.", DeprecationWarning, stacklevel=2, ) @@ -485,10 +480,23 @@ class FrameProcessor(BaseObject): if frame: await self.push_frame(frame) + async def start_text_aggregation_metrics(self): + """Start text aggregation time metrics collection.""" + if self.can_generate_metrics() and self.metrics_enabled: + await self._metrics.start_text_aggregation_metrics() + + async def stop_text_aggregation_metrics(self): + """Stop text aggregation time metrics collection and push results.""" + if self.can_generate_metrics() and self.metrics_enabled: + frame = await self._metrics.stop_text_aggregation_metrics() + if frame: + await self.push_frame(frame) + async def stop_all_metrics(self): """Stop all active metrics collection.""" await self.stop_ttfb_metrics() await self.stop_processing_metrics() + await self.stop_text_aggregation_metrics() def create_task(self, coroutine: Coroutine, name: Optional[str] = None) -> asyncio.Task: """Create a new task managed by this processor. @@ -618,15 +626,6 @@ class FrameProcessor(BaseObject): if self._cancelling: return - # If we are waiting for an interruption, bypass all queued system frames - # and process the frame right away. This is because a previous system - # frame might be waiting for the interruption frame blocking the input - # task, so this InterruptionFrame would never be dequeued and we'd - # deadlock. - if self._wait_for_interruption and isinstance(frame, InterruptionFrame): - await self.__process_frame(frame, direction, callback) - return - if self._enable_direct_mode: await self.__process_frame(frame, direction, callback) else: @@ -761,43 +760,32 @@ class FrameProcessor(BaseObject): await self._call_event_handler("on_after_push_frame", frame) + async def broadcast_interruption(self): + """Broadcast an `InterruptionFrame` both upstream and downstream.""" + logger.debug(f"{self}: broadcasting interruption") + self.__reset_process_task() + await self.stop_all_metrics() + await self.broadcast_frame(InterruptionFrame) + async def push_interruption_task_frame_and_wait(self, *, timeout: float = 5.0): """Push an interruption task frame upstream and wait for the interruption. - This function sends an `InterruptionTaskFrame` upstream to the - pipeline task. The task creates a corresponding `InterruptionFrame` - and sends it downstream through the pipeline. An `asyncio.Event` is - attached to both frames so the caller can wait until the interruption - has fully traversed the pipeline. The event is set when the - `InterruptionFrame` reaches the pipeline sink. If the frame does - not complete within the given timeout, a warning is logged and the - event is forcibly set so the caller is unblocked. - - Args: - timeout: Maximum seconds to wait for the interruption to complete. + .. deprecated:: 0.0.104 + Use :meth:`broadcast_interruption` instead. This method now + delegates to ``broadcast_interruption()`` and ignores *timeout*. """ - self._wait_for_interruption = True + import warnings - event = asyncio.Event() + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "`FrameProcessor.push_interruption_task_frame_and_wait()` is deprecated. " + "Use `FrameProcessor.broadcast_interruption()` instead.", + DeprecationWarning, + stacklevel=2, + ) - await self.push_frame(InterruptionTaskFrame(event=event), FrameDirection.UPSTREAM) - - # Wait for the `InterruptionFrame` to complete and log a warning if it - # takes too long. If it does take too long make sure we unblock it, - # otherwise we will hang here forever. - while not event.is_set(): - try: - await asyncio.wait_for(event.wait(), timeout=timeout) - except asyncio.TimeoutError: - logger.warning( - f"{self}: InterruptionFrame has not completed after" - f" {timeout}s. Make sure InterruptionFrame.complete()" - " is being called (e.g. if the frame is being blocked" - " or consumed before reaching the pipeline sink)." - ) - event.set() - - self._wait_for_interruption = False + await self.broadcast_interruption() async def broadcast_frame(self, frame_cls: Type[Frame], **kwargs): """Broadcasts a frame of the specified class upstream and downstream. @@ -904,15 +892,7 @@ class FrameProcessor(BaseObject): async def _start_interruption(self): """Start handling an interruption by cancelling current tasks.""" try: - if self._wait_for_interruption: - # If we get here we know the process task was just waiting for - # an interruption (push_interruption_task_frame_and_wait()), so - # we can't cancel the task because it might still need to do - # more things (e.g. pushing a frame after the - # interruption). Instead we just drain the queue because this is - # an interruption. - self.__reset_process_task() - elif isinstance(self.__process_current_frame, UninterruptibleFrame): + if isinstance(self.__process_current_frame, UninterruptibleFrame): # We don't want to cancel UninterruptibleFrame, so we simply # cleanup the queue. self.__reset_process_queue() @@ -936,7 +916,7 @@ class FrameProcessor(BaseObject): try: timestamp = self._clock.get_time() if self._clock else 0 if direction == FrameDirection.DOWNSTREAM and self._next: - logger.trace(f"Pushing {frame} from {self} to {self._next}") + logger.trace(f"Pushing {frame} downstream from {self} to {self._next}") if self._observer: data = FramePushed( diff --git a/src/pipecat/processors/frameworks/rtvi.py b/src/pipecat/processors/frameworks/rtvi.py index e01e95714..eb1e79f3e 100644 --- a/src/pipecat/processors/frameworks/rtvi.py +++ b/src/pipecat/processors/frameworks/rtvi.py @@ -1702,7 +1702,7 @@ class RTVIProcessor(FrameProcessor): async def interrupt_bot(self): """Send a bot interruption frame upstream.""" - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def send_server_message(self, data: Any): """Send a server message to the client.""" diff --git a/src/pipecat/processors/metrics/frame_processor_metrics.py b/src/pipecat/processors/metrics/frame_processor_metrics.py index c82fd9698..7a52895a2 100644 --- a/src/pipecat/processors/metrics/frame_processor_metrics.py +++ b/src/pipecat/processors/metrics/frame_processor_metrics.py @@ -17,6 +17,7 @@ from pipecat.metrics.metrics import ( LLMUsageMetricsData, MetricsData, ProcessingMetricsData, + TextAggregationMetricsData, TTFBMetricsData, TTSUsageMetricsData, ) @@ -43,6 +44,7 @@ class FrameProcessorMetrics(BaseObject): self._task_manager = None self._start_ttfb_time = 0 self._start_processing_time = 0 + self._start_text_aggregation_time = 0 self._last_ttfb_time = 0 self._should_report_ttfb = True @@ -211,3 +213,24 @@ class FrameProcessorMetrics(BaseObject): ) logger.debug(f"{self._processor_name()} usage characters: {characters.value}") return MetricsFrame(data=[characters]) + + async def start_text_aggregation_metrics(self): + """Start measuring text aggregation time (first token to first sentence).""" + self._start_text_aggregation_time = time.time() + + async def stop_text_aggregation_metrics(self): + """Stop text aggregation measurement and generate metrics frame. + + Returns: + MetricsFrame containing text aggregation time, or None if not measuring. + """ + if self._start_text_aggregation_time == 0: + return None + + value = time.time() - self._start_text_aggregation_time + logger.debug(f"{self._processor_name()} text aggregation time: {value}") + aggregation = TextAggregationMetricsData( + processor=self._processor_name(), value=value, model=self._model_name() + ) + self._start_text_aggregation_time = 0 + return MetricsFrame(data=[aggregation]) diff --git a/src/pipecat/processors/metrics/sentry.py b/src/pipecat/processors/metrics/sentry.py index db2c6de63..c865ee470 100644 --- a/src/pipecat/processors/metrics/sentry.py +++ b/src/pipecat/processors/metrics/sentry.py @@ -7,6 +7,7 @@ """Sentry integration for frame processor metrics.""" import asyncio +from typing import Optional from loguru import logger @@ -70,13 +71,18 @@ class SentryMetrics(FrameProcessorMetrics): logger.trace(f"{self} Flushing Sentry metrics") sentry_sdk.flush(timeout=5.0) - async def start_ttfb_metrics(self, report_only_initial_ttfb): + async def start_ttfb_metrics( + self, *, start_time: Optional[float] = None, report_only_initial_ttfb: bool + ): """Start tracking time-to-first-byte metrics. Args: + start_time: Optional start timestamp override. report_only_initial_ttfb: Whether to report only the initial TTFB measurement. """ - await super().start_ttfb_metrics(report_only_initial_ttfb) + await super().start_ttfb_metrics( + start_time=start_time, report_only_initial_ttfb=report_only_initial_ttfb + ) if self._should_report_ttfb and self._sentry_available: self._ttfb_metrics_tx = sentry_sdk.start_transaction( @@ -87,23 +93,25 @@ class SentryMetrics(FrameProcessorMetrics): f"{self} Sentry transaction started (ID: {self._ttfb_metrics_tx.span_id} Name: {self._ttfb_metrics_tx.name})" ) - async def stop_ttfb_metrics(self): + async def stop_ttfb_metrics(self, *, end_time: Optional[float] = None): """Stop tracking time-to-first-byte metrics. - Queues the TTFB transaction for completion and transmission to Sentry. + Args: + end_time: Optional end timestamp override. """ - await super().stop_ttfb_metrics() + await super().stop_ttfb_metrics(end_time=end_time) if self._sentry_available and self._ttfb_metrics_tx: await self._sentry_queue.put(self._ttfb_metrics_tx) self._ttfb_metrics_tx = None - async def start_processing_metrics(self): + async def start_processing_metrics(self, *, start_time: Optional[float] = None): """Start tracking frame processing metrics. - Creates a new Sentry transaction to track processing performance. + Args: + start_time: Optional start timestamp override. """ - await super().start_processing_metrics() + await super().start_processing_metrics(start_time=start_time) if self._sentry_available: self._processing_metrics_tx = sentry_sdk.start_transaction( @@ -114,12 +122,13 @@ class SentryMetrics(FrameProcessorMetrics): f"{self} Sentry transaction started (ID: {self._processing_metrics_tx.span_id} Name: {self._processing_metrics_tx.name})" ) - async def stop_processing_metrics(self): + async def stop_processing_metrics(self, *, end_time: Optional[float] = None): """Stop tracking frame processing metrics. - Queues the processing transaction for completion and transmission to Sentry. + Args: + end_time: Optional end timestamp override. """ - await super().stop_processing_metrics() + await super().stop_processing_metrics(end_time=end_time) if self._sentry_available and self._processing_metrics_tx: await self._sentry_queue.put(self._processing_metrics_tx) diff --git a/src/pipecat/serializers/genesys.py b/src/pipecat/serializers/genesys.py index 24b68eb81..a25287b5c 100644 --- a/src/pipecat/serializers/genesys.py +++ b/src/pipecat/serializers/genesys.py @@ -642,7 +642,6 @@ class GenesysAudioHookSerializer(FrameSerializer): """ # Binary data = audio if isinstance(data, bytes): - logger.debug(f"[AUDIO IN] Received {len(data)} bytes from Genesys") return await self._deserialize_audio(data) # Text data = JSON control message diff --git a/src/pipecat/services/ai_service.py b/src/pipecat/services/ai_service.py index 52b42663f..c4e45a417 100644 --- a/src/pipecat/services/ai_service.py +++ b/src/pipecat/services/ai_service.py @@ -10,7 +10,7 @@ Provides the foundation for all AI services in the Pipecat framework, including model management, settings handling, and frame processing lifecycle methods. """ -from typing import Any, AsyncGenerator, Dict, Mapping +from typing import Any, AsyncGenerator, Dict from loguru import logger @@ -23,6 +23,7 @@ from pipecat.frames.frames import ( ) from pipecat.metrics.metrics import MetricsData from pipecat.processors.frame_processor import FrameDirection, FrameProcessor +from pipecat.services.settings import ServiceSettings class AIService(FrameProcessor): @@ -34,36 +35,38 @@ class AIService(FrameProcessor): this base infrastructure. """ - def __init__(self, **kwargs): + def __init__(self, settings: ServiceSettings | None = None, **kwargs): """Initialize the AI service. Args: + settings: The runtime-updatable settings for the AI service. **kwargs: Additional arguments passed to the parent FrameProcessor. """ super().__init__(**kwargs) - self._model_name: str = "" - self._settings: Dict[str, Any] = {} + self._settings: ServiceSettings = ( + settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or ServiceSettings() + ) + self._sync_model_name_to_metrics() self._session_properties: Dict[str, Any] = {} self._tracing_enabled: bool = False self._tracing_context = None - @property - def model_name(self) -> str: - """Get the current model name. + def _sync_model_name_to_metrics(self): + """Sync the current AI model name (in `self._settings.model`) for usage in metrics. - Returns: - The name of the AI model being used. - """ - return self._model_name - - def set_model_name(self, model: str): - """Set the AI model name and update metrics. + We don't store model name here because there's already a single source + of truth for it in `self._settings.model`. This method is just for + syncing the model name to the metrics data. Args: model: The name of the AI model to use. """ - self._model_name = model - self.set_core_metrics_data(MetricsData(processor=self.name, model=self._model_name)) + self.set_core_metrics_data( + MetricsData(processor=self.name, model=self._settings.model or "") + ) async def start(self, frame: StartFrame): """Start the AI service. @@ -74,6 +77,7 @@ class AIService(FrameProcessor): Args: frame: The start frame containing initialization parameters. """ + self._settings.validate_complete() self._tracing_enabled = frame.enable_tracing self._tracing_context = frame.tracing_context @@ -99,44 +103,45 @@ class AIService(FrameProcessor): """ pass - async def _update_settings(self, settings: Mapping[str, Any]): - from pipecat.services.openai.realtime.events import SessionProperties + async def _update_settings(self, delta: ServiceSettings) -> Dict[str, Any]: + """Apply a settings delta and return the changed fields. - for key, value in settings.items(): - logger.debug("Update request for:", key, value) + The delta is applied to ``_settings`` and a dict mapping each changed + field name to its **pre-update** value is returned. The ``model`` + field is handled specially: when it changes, ``set_model_name`` is + called. - if key in self._settings: - logger.info(f"Updating LLM setting {key} to: [{value}]") - self._settings[key] = value - elif key in SessionProperties.model_fields: - logger.debug("Attempting to update", key, value) + Concrete services should override this method (calling ``super()``) + to react to specific changed fields (e.g. reconnect on voice change). - try: - from pipecat.services.openai.realtime.events import TurnDetection + Args: + delta: A delta-mode settings object. - if isinstance(self._session_properties, SessionProperties): - current_properties = self._session_properties - else: - current_properties = SessionProperties(**self._session_properties) + Returns: + Dict mapping changed field names to their previous values. + """ + changed = self._settings.apply_update(delta) - if key == "turn_detection" and isinstance(value, dict): - turn_detection = TurnDetection(**value) - setattr(current_properties, key, turn_detection) - else: - setattr(current_properties, key, value) + if "model" in changed: + self._sync_model_name_to_metrics() - validated_properties = SessionProperties.model_validate( - current_properties.model_dump() - ) - logger.info(f"Updating LLM setting {key} to: [{value}]") - self._session_properties = validated_properties.model_dump() - except Exception as e: - logger.warning(f"Unexpected error updating session property {key}: {e}") - elif key == "model": - logger.info(f"Updating LLM setting {key} to: [{value}]") - self.set_model_name(value) - else: - logger.warning(f"Unknown setting for {self.name} service: {key}") + if changed: + logger.info(f"{self.name}: updated settings fields: {set(changed)}") + + return changed + + def _warn_unhandled_updated_settings(self, unhandled): + """Log a warning for settings changes that won't take effect at runtime. + + Convenience helper for ``_update_settings`` overrides. Accepts any + iterable of field names (a ``dict``, ``set``, ``dict_keys``, etc.). + + Args: + unhandled: Field names that changed but are not applied. + """ + if unhandled: + fields = ", ".join(sorted(unhandled)) + logger.warning(f"{self.name}: runtime update of [{fields}] is not currently supported") async def process_frame(self, frame: Frame, direction: FrameDirection): """Process frames and handle service lifecycle. diff --git a/src/pipecat/services/anthropic/llm.py b/src/pipecat/services/anthropic/llm.py index a21296fe3..03190ef99 100644 --- a/src/pipecat/services/anthropic/llm.py +++ b/src/pipecat/services/anthropic/llm.py @@ -16,8 +16,8 @@ import copy import io import json import re -from dataclasses import dataclass -from typing import Any, Dict, List, Literal, Optional, Union +from dataclasses import dataclass, field +from typing import Any, ClassVar, Dict, List, Literal, Optional, Union import httpx from loguru import logger @@ -42,7 +42,6 @@ from pipecat.frames.frames import ( LLMThoughtEndFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, - LLMUpdateSettingsFrame, UserImageRawFrame, ) from pipecat.metrics.metrics import LLMTokenUsage @@ -59,6 +58,8 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService +from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN +from pipecat.services.settings import LLMSettings, _NotGiven, is_given from pipecat.utils.tracing.service_decorators import traced_llm try: @@ -69,6 +70,50 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +class AnthropicThinkingConfig(BaseModel): + """Configuration for extended thinking. + + Parameters: + type: Type of thinking mode (currently only "enabled" or "disabled"). + budget_tokens: Maximum number of tokens for thinking. + With today's models, the minimum is 1024. + Only allowed if type is "enabled". + """ + + # Why `| str` here? To not break compatibility in case Anthropic adds + # more types in the future. + type: Literal["enabled", "disabled"] | str + + # Why not enforce minimnum of 1024 here? To not break compatibility in + # case Anthropic changes this requirement in the future. + budget_tokens: int + + +@dataclass +class AnthropicLLMSettings(LLMSettings): + """Settings for Anthropic LLM services. + + Parameters: + enable_prompt_caching: Whether to enable prompt caching. + thinking: Extended thinking configuration. + """ + + enable_prompt_caching: bool | _NotGiven = field(default_factory=lambda: _NOT_GIVEN) + thinking: AnthropicThinkingConfig | _NotGiven = field(default_factory=lambda: _NOT_GIVEN) + + @classmethod + def from_mapping(cls, settings): + """Convert a plain dict to settings, coercing thinking dicts. + + For backward compatibility, a ``thinking`` value that is a plain dict + is converted to a :class:`AnthropicThinkingConfig`. + """ + instance = super().from_mapping(settings) + if is_given(instance.thinking) and isinstance(instance.thinking, dict): + instance.thinking = AnthropicThinkingConfig(**instance.thinking) + return instance + + @dataclass class AnthropicContextAggregatorPair: """Pair of context aggregators for Anthropic conversations. @@ -115,26 +160,13 @@ class AnthropicLLMService(LLMService): Can use custom clients like AsyncAnthropicBedrock and AsyncAnthropicVertex. """ + _settings: AnthropicLLMSettings + # Overriding the default adapter to use the Anthropic one. adapter_class = AnthropicLLMAdapter - class ThinkingConfig(BaseModel): - """Configuration for extended thinking. - - Parameters: - type: Type of thinking mode (currently only "enabled" or "disabled"). - budget_tokens: Maximum number of tokens for thinking. - With today's models, the minimum is 1024. - Only allowed if type is "enabled". - """ - - # Why `| str` here? To not break compatibility in case Anthropic adds - # more types in the future. - type: Literal["enabled", "disabled"] | str - - # Why not enforce minimnum of 1024 here? To not break compatibility in - # case Anthropic changes this requirement in the future. - budget_tokens: int + # Backward compatibility: ThinkingConfig used to be defined inline here. + ThinkingConfig = AnthropicThinkingConfig class InputParams(BaseModel): """Input parameters for Anthropic model inference. @@ -163,9 +195,7 @@ class AnthropicLLMService(LLMService): temperature: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0) top_k: Optional[int] = Field(default_factory=lambda: NOT_GIVEN, ge=0) top_p: Optional[float] = Field(default_factory=lambda: NOT_GIVEN, ge=0.0, le=1.0) - thinking: Optional["AnthropicLLMService.ThinkingConfig"] = Field( - default_factory=lambda: NOT_GIVEN - ) + thinking: Optional[AnthropicThinkingConfig] = Field(default_factory=lambda: NOT_GIVEN) extra: Optional[Dict[str, Any]] = Field(default_factory=dict) def model_post_init(self, __context): @@ -184,7 +214,7 @@ class AnthropicLLMService(LLMService): self, *, api_key: str, - model: str = "claude-sonnet-4-5-20250929", + model: str = "claude-sonnet-4-6", params: Optional[InputParams] = None, client=None, retry_timeout_secs: Optional[float] = 5.0, @@ -195,38 +225,46 @@ class AnthropicLLMService(LLMService): Args: api_key: Anthropic API key for authentication. - model: Model name to use. Defaults to "claude-sonnet-4-5-20250929". + model: Model name to use. Defaults to "claude-sonnet-4-6". params: Optional model parameters for inference. client: Optional custom Anthropic client instance. retry_timeout_secs: Request timeout in seconds for retry logic. retry_on_timeout: Whether to retry the request once if it times out. **kwargs: Additional arguments passed to parent LLMService. """ - super().__init__(**kwargs) params = params or AnthropicLLMService.InputParams() + + super().__init__( + settings=AnthropicLLMSettings( + model=model, + max_tokens=params.max_tokens, + enable_prompt_caching=( + params.enable_prompt_caching + if params.enable_prompt_caching is not None + else ( + params.enable_prompt_caching_beta + if params.enable_prompt_caching_beta is not None + else False + ) + ), + temperature=params.temperature, + top_k=params.top_k, + top_p=params.top_p, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + thinking=params.thinking, + extra=params.extra if isinstance(params.extra, dict) else {}, + ), + **kwargs, + ) self._client = client or AsyncAnthropic( api_key=api_key ) # if the client is provided, use it and remove it, otherwise create a new one - self.set_model_name(model) self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout - self._settings = { - "max_tokens": params.max_tokens, - "enable_prompt_caching": ( - params.enable_prompt_caching - if params.enable_prompt_caching is not None - else ( - params.enable_prompt_caching_beta - if params.enable_prompt_caching_beta is not None - else False - ) - ), - "temperature": params.temperature, - "top_k": params.top_k, - "top_p": params.top_p, - "thinking": params.thinking, - "extra": params.extra if isinstance(params.extra, dict) else {}, - } def can_generate_metrics(self) -> bool: """Check if this service can generate usage metrics. @@ -280,7 +318,7 @@ class AnthropicLLMService(LLMService): if isinstance(context, LLMContext): adapter: AnthropicLLMAdapter = self.get_llm_adapter() invocation_params = adapter.get_llm_invocation_params( - context, enable_prompt_caching=self._settings["enable_prompt_caching"] + context, enable_prompt_caching=self._settings.enable_prompt_caching ) messages = invocation_params["messages"] system = invocation_params["system"] @@ -293,21 +331,21 @@ class AnthropicLLMService(LLMService): # Build params using the same method as streaming completions params = { - "model": self.model_name, - "max_tokens": max_tokens if max_tokens is not None else self._settings["max_tokens"], + "model": self._settings.model, + "max_tokens": max_tokens if max_tokens is not None else self._settings.max_tokens, "stream": False, - "temperature": self._settings["temperature"], - "top_k": self._settings["top_k"], - "top_p": self._settings["top_p"], + "temperature": self._settings.temperature, + "top_k": self._settings.top_k, + "top_p": self._settings.top_p, "messages": messages, "system": system, "tools": tools, "betas": ["interleaved-thinking-2025-05-14"], } - if self._settings["thinking"]: - params["thinking"] = self._settings["thinking"].model_dump(exclude_unset=True) + if self._settings.thinking: + params["thinking"] = self._settings.thinking.model_dump(exclude_unset=True) - params.update(self._settings["extra"]) + params.update(self._settings.extra) # LLM completion response = await self._client.beta.messages.create(**params) @@ -358,14 +396,14 @@ class AnthropicLLMService(LLMService): if isinstance(context, LLMContext): adapter: AnthropicLLMAdapter = self.get_llm_adapter() params = adapter.get_llm_invocation_params( - context, enable_prompt_caching=self._settings["enable_prompt_caching"] + context, enable_prompt_caching=self._settings.enable_prompt_caching ) return params # Anthropic-specific context messages = ( context.get_messages_with_cache_control_markers() - if self._settings["enable_prompt_caching"] + if self._settings.enable_prompt_caching else context.messages ) return AnthropicLLMInvocationParams( @@ -407,22 +445,22 @@ class AnthropicLLMService(LLMService): await self.start_ttfb_metrics() params = { - "model": self.model_name, - "max_tokens": self._settings["max_tokens"], + "model": self._settings.model, + "max_tokens": self._settings.max_tokens, "stream": True, - "temperature": self._settings["temperature"], - "top_k": self._settings["top_k"], - "top_p": self._settings["top_p"], + "temperature": self._settings.temperature, + "top_k": self._settings.top_k, + "top_p": self._settings.top_p, } # Add thinking parameter if set - if self._settings["thinking"]: - params["thinking"] = self._settings["thinking"].model_dump(exclude_unset=True) + if self._settings.thinking: + params["thinking"] = self._settings.thinking.model_dump(exclude_unset=True) # Messages, system, tools params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) # "Interleaved thinking" needed to allow thinking between sequences # of function calls, when extended thinking is enabled. @@ -576,11 +614,9 @@ class AnthropicLLMService(LLMService): # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal # LLMContext with it context = AnthropicLLMContext.from_messages(frame.messages) - elif isinstance(frame, LLMUpdateSettingsFrame): - await self._update_settings(frame.settings) elif isinstance(frame, LLMEnablePromptCachingFrame): logger.debug(f"Setting enable prompt caching to: [{frame.enable}]") - self._settings["enable_prompt_caching"] = frame.enable + self._settings.enable_prompt_caching = frame.enable else: await self.push_frame(frame, direction) diff --git a/src/pipecat/services/assemblyai/stt.py b/src/pipecat/services/assemblyai/stt.py index 41a0ae2a0..a89f5fe52 100644 --- a/src/pipecat/services/assemblyai/stt.py +++ b/src/pipecat/services/assemblyai/stt.py @@ -12,6 +12,7 @@ WebSocket API for streaming audio transcription. import asyncio import json +from dataclasses import dataclass, field from typing import Any, AsyncGenerator, Dict, Optional from urllib.parse import urlencode @@ -29,6 +30,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import ASSEMBLYAI_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language @@ -52,6 +54,21 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class AssemblyAISTTSettings(STTSettings): + """Settings for the AssemblyAI STT service. + + See :class:`AssemblyAIConnectionParams` for detailed parameter descriptions. + + Parameters: + connection_params: Connection configuration parameters. + """ + + connection_params: AssemblyAIConnectionParams | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + + class AssemblyAISTTService(WebsocketSTTService): """AssemblyAI real-time speech-to-text service. @@ -60,6 +77,8 @@ class AssemblyAISTTService(WebsocketSTTService): for audio processing and connection management. """ + _settings: AssemblyAISTTSettings + def __init__( self, *, @@ -92,13 +111,18 @@ class AssemblyAISTTService(WebsocketSTTService): connection_params = self._configure_manual_turn_mode(connection_params) super().__init__( - sample_rate=connection_params.sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs + sample_rate=connection_params.sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=AssemblyAISTTSettings( + model=None, + language=language, + connection_params=connection_params, + ), + **kwargs, ) self._api_key = api_key - self._language = language self._api_endpoint_base_url = api_endpoint_base_url - self._connection_params = connection_params self._vad_force_turn_endpoint = vad_force_turn_endpoint self._termination_event = asyncio.Event() @@ -165,6 +189,37 @@ class AssemblyAISTTService(WebsocketSTTService): """ return True + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta. + + Settings are stored but not applied to the active connection. + + Args: + delta: A :class:`STTSettings` (or ``AssemblyAISTTSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(delta) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # # Re-apply manual turn mode config if vad_force_turn_endpoint is active + # # and connection_params were updated. + # if self._vad_force_turn_endpoint and "connection_params" in changed: + # self._settings.connection_params = self._configure_manual_turn_mode( + # self._settings.connection_params + # ) + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + async def start(self, frame: StartFrame): """Start the speech-to-text service. @@ -239,7 +294,7 @@ class AssemblyAISTTService(WebsocketSTTService): def _build_ws_url(self) -> str: """Build WebSocket URL with query parameters using urllib.parse.urlencode.""" params = {} - for k, v in self._connection_params.model_dump().items(): + for k, v in self._settings.connection_params.model_dump().items(): if v is not None: if k == "keyterms_prompt": params[k] = json.dumps(v) @@ -415,18 +470,18 @@ class AssemblyAISTTService(WebsocketSTTService): if not message.transcript: return if message.end_of_turn and ( - not self._connection_params.formatted_finals or message.turn_is_formatted + not self._settings.connection_params.formatted_finals or message.turn_is_formatted ): await self.push_frame( TranscriptionFrame( message.transcript, self._user_id, time_now_iso8601(), - self._language, + self._settings.language, message, ) ) - await self._trace_transcription(message.transcript, True, self._language) + await self._trace_transcription(message.transcript, True, self._settings.language) await self.stop_processing_metrics() else: await self.push_frame( @@ -434,7 +489,7 @@ class AssemblyAISTTService(WebsocketSTTService): message.transcript, self._user_id, time_now_iso8601(), - self._language, + self._settings.language, message, ) ) diff --git a/src/pipecat/services/asyncai/tts.py b/src/pipecat/services/asyncai/tts.py index 69ed90ca1..4f1fd5a58 100644 --- a/src/pipecat/services/asyncai/tts.py +++ b/src/pipecat/services/asyncai/tts.py @@ -9,7 +9,8 @@ import asyncio import base64 import json -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Mapping, Optional import aiohttp from loguru import logger @@ -20,14 +21,14 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, - InterruptionFrame, StartFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.tts_service import AudioContextTTSService, TTSService +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven +from pipecat.services.tts_service import AudioContextTTSService, TextAggregationMode, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -72,12 +73,40 @@ def language_to_async_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class AsyncAITTSSettings(TTSSettings): + """Settings for Async AI TTS services. + + Parameters: + output_container: Audio container format (e.g. "raw"). + output_encoding: Audio encoding format (e.g. "pcm_s16le"). + output_sample_rate: Audio sample rate in Hz. + """ + + output_container: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + @classmethod + def from_mapping(cls, settings: Mapping[str, Any]) -> "AsyncAITTSSettings": + """Construct settings from a plain dict, destructuring legacy nested ``output_format``.""" + flat = dict(settings) + nested = flat.pop("output_format", None) + if isinstance(nested, dict): + flat.setdefault("output_container", nested.get("container")) + flat.setdefault("output_encoding", nested.get("encoding")) + flat.setdefault("output_sample_rate", nested.get("sample_rate")) + return super().from_mapping(flat) + + class AsyncAITTSService(AudioContextTTSService): """Async TTS service with WebSocket streaming. Provides text-to-speech using Async's streaming WebSocket API. """ + _settings: AsyncAITTSSettings + class InputParams(BaseModel): """Input parameters for Async TTS configuration. @@ -99,7 +128,8 @@ class AsyncAITTSService(AudioContextTTSService): encoding: str = "pcm_s16le", container: str = "raw", params: Optional[InputParams] = None, - aggregate_sentences: Optional[bool] = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, **kwargs, ): """Initialize the Async TTS service. @@ -115,39 +145,56 @@ class AsyncAITTSService(AudioContextTTSService): encoding: Audio encoding format. container: Audio container format. params: Additional input parameters for voice customization. - aggregate_sentences: Whether to aggregate sentences within the TTSService. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + + text_aggregation_mode: How to aggregate text before synthesis. **kwargs: Additional arguments passed to the parent service. """ + params = params or AsyncAITTSService.InputParams() + super().__init__( aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, pause_frame_processing=True, push_stop_frames=True, sample_rate=sample_rate, + settings=AsyncAITTSSettings( + model=model, + voice=voice_id, + output_container=container, + output_encoding=encoding, + output_sample_rate=0, + language=self.language_to_service_language(params.language) + if params.language + else None, + ), **kwargs, ) - params = params or AsyncAITTSService.InputParams() - self._api_key = api_key self._api_version = version self._url = url - self._settings = { - "output_format": { - "container": container, - "encoding": encoding, - "sample_rate": 0, - }, - "language": self.language_to_service_language(params.language) - if params.language - else None, - } - - self.set_model_name(model) - self.set_voice(voice_id) self._receive_task = None self._keepalive_task = None + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(delta) + + if not changed: + return changed + + self._warn_unhandled_updated_settings(changed) + + return changed + def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -178,7 +225,7 @@ class AsyncAITTSService(AudioContextTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["output_format"]["sample_rate"] = self.sample_rate + self._settings.output_sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -232,10 +279,14 @@ class AsyncAITTSService(AudioContextTTSService): f"{self._url}?api_key={self._api_key}&version={self._api_version}" ) init_msg = { - "model_id": self._model_name, - "voice": {"mode": "id", "id": self._voice_id}, - "output_format": self._settings["output_format"], - "language": self._settings["language"], + "model_id": self._settings.model, + "voice": {"mode": "id", "id": self._settings.voice}, + "output_format": { + "container": self._settings.output_container, + "encoding": self._settings.output_encoding, + "sample_rate": self._settings.output_sample_rate, + }, + "language": self._settings.language, } await self._get_websocket().send(json.dumps(init_msg)) @@ -346,18 +397,29 @@ class AsyncAITTSService(AudioContextTTSService): logger.warning(f"{self} keepalive error: {e}") break - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by closing the current context.""" - context_id = self.get_active_audio_context_id() - await super()._handle_interruption(frame, direction) - # Close the current context when interrupted without closing the websocket + async def _close_context(self, context_id: str): + # Async AI requires explicit context closure to free server-side resources, + # both on interruption and on normal completion. if context_id and self._websocket: try: await self._websocket.send( json.dumps({"context_id": context_id, "close_context": True, "transcript": ""}) ) except Exception as e: - logger.error(f"Error closing context on interruption: {e}") + logger.error(f"{self}: Error closing context {context_id}: {e}") + + async def on_audio_context_interrupted(self, context_id: str): + """Close the Async AI context when the bot is interrupted.""" + await self._close_context(context_id) + + async def on_audio_context_completed(self, context_id: str): + """Close the Async AI context after all audio has been played. + + Async AI does not send a server-side signal when a context is + exhausted, so Pipecat must explicitly close it with + ``close_context: True`` to free server-side resources. + """ + await self._close_context(context_id) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -404,6 +466,8 @@ class AsyncAIHttpTTSService(TTSService): connection is not required or desired. """ + _settings: AsyncAITTSSettings + class InputParams(BaseModel): """Input parameters for Async API. @@ -443,25 +507,26 @@ class AsyncAIHttpTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to the parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or AsyncAIHttpTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=AsyncAITTSSettings( + model=model, + voice=voice_id, + output_container=container, + output_encoding=encoding, + output_sample_rate=0, + language=self.language_to_service_language(params.language) + if params.language + else None, + ), + **kwargs, + ) + self._api_key = api_key self._base_url = url self._api_version = version - self._settings = { - "output_format": { - "container": container, - "encoding": encoding, - "sample_rate": 0, - }, - "language": self.language_to_service_language(params.language) - if params.language - else None, - } - self.set_voice(voice_id) - self.set_model_name(model) self._session = aiohttp_session @@ -491,7 +556,7 @@ class AsyncAIHttpTTSService(TTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["output_format"]["sample_rate"] = self.sample_rate + self._settings.output_sample_rate = self.sample_rate @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -507,14 +572,18 @@ class AsyncAIHttpTTSService(TTSService): logger.debug(f"{self}: Generating TTS [{text}]") try: - voice_config = {"mode": "id", "id": self._voice_id} + voice_config = {"mode": "id", "id": self._settings.voice} await self.start_ttfb_metrics() payload = { - "model_id": self._model_name, + "model_id": self._settings.model, "transcript": text, "voice": voice_config, - "output_format": self._settings["output_format"], - "language": self._settings["language"], + "output_format": { + "container": self._settings.output_container, + "encoding": self._settings.output_encoding, + "sample_rate": self._settings.output_sample_rate, + }, + "language": self._settings.language, } yield TTSStartedFrame(context_id=context_id) headers = { diff --git a/src/pipecat/services/aws/llm.py b/src/pipecat/services/aws/llm.py index 1778ae74e..540ac4a8e 100644 --- a/src/pipecat/services/aws/llm.py +++ b/src/pipecat/services/aws/llm.py @@ -18,8 +18,8 @@ import io import json import os import re -from dataclasses import dataclass -from typing import Any, Dict, List, Optional +from dataclasses import dataclass, field +from typing import Any, ClassVar, Dict, List, Optional from loguru import logger from PIL import Image @@ -40,7 +40,6 @@ from pipecat.frames.frames import ( LLMFullResponseStartFrame, LLMMessagesFrame, LLMTextFrame, - LLMUpdateSettingsFrame, UserImageRawFrame, ) from pipecat.metrics.metrics import LLMTokenUsage @@ -57,6 +56,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import LLMService +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.utils.tracing.service_decorators import traced_llm try: @@ -71,6 +71,21 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class AWSBedrockLLMSettings(LLMSettings): + """Settings for AWS Bedrock LLM services. + + Parameters: + latency: Performance mode - "standard" or "optimized". + additional_model_request_fields: Additional model-specific parameters. + """ + + latency: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + additional_model_request_fields: Dict[str, Any] | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + + @dataclass class AWSBedrockContextAggregatorPair: """Container for AWS Bedrock context aggregators. @@ -730,6 +745,8 @@ class AWSBedrockLLMService(LLMService): vision capabilities. """ + _settings: AWSBedrockLLMSettings + # Overriding the default adapter to use the Anthropic one. adapter_class = AWSBedrockLLMAdapter @@ -780,10 +797,28 @@ class AWSBedrockLLMService(LLMService): retry_on_timeout: Whether to retry the request once if it times out. **kwargs: Additional arguments passed to parent LLMService. """ - super().__init__(**kwargs) - params = params or AWSBedrockLLMService.InputParams() + super().__init__( + settings=AWSBedrockLLMSettings( + model=model, + max_tokens=params.max_tokens, + temperature=params.temperature, + top_p=params.top_p, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + latency=params.latency, + additional_model_request_fields=params.additional_model_request_fields + if isinstance(params.additional_model_request_fields, dict) + else {}, + ), + **kwargs, + ) + # Initialize the AWS Bedrock client if not client_config: client_config = Config( @@ -803,18 +838,8 @@ class AWSBedrockLLMService(LLMService): "config": client_config, } - self.set_model_name(model) self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout - self._settings = { - "max_tokens": params.max_tokens, - "temperature": params.temperature, - "top_p": params.top_p, - "latency": params.latency, - "additional_model_request_fields": params.additional_model_request_fields - if isinstance(params.additional_model_request_fields, dict) - else {}, - } logger.info(f"Using AWS Bedrock model: {model}") @@ -836,12 +861,12 @@ class AWSBedrockLLMService(LLMService): Dictionary containing only the inference parameters that are not None. """ inference_config = {} - if self._settings["max_tokens"] is not None: - inference_config["maxTokens"] = self._settings["max_tokens"] - if self._settings["temperature"] is not None: - inference_config["temperature"] = self._settings["temperature"] - if self._settings["top_p"] is not None: - inference_config["topP"] = self._settings["top_p"] + if self._settings.max_tokens is not None: + inference_config["maxTokens"] = self._settings.max_tokens + if self._settings.temperature is not None: + inference_config["temperature"] = self._settings.temperature + if self._settings.top_p is not None: + inference_config["topP"] = self._settings.top_p return inference_config async def run_inference( @@ -877,9 +902,9 @@ class AWSBedrockLLMService(LLMService): inference_config["maxTokens"] = max_tokens request_params = { - "modelId": self.model_name, + "modelId": self._settings.model, "messages": messages, - "additionalModelRequestFields": self._settings["additional_model_request_fields"], + "additionalModelRequestFields": self._settings.additional_model_request_fields, } if inference_config: @@ -1034,9 +1059,9 @@ class AWSBedrockLLMService(LLMService): # Prepare request parameters request_params = { - "modelId": self.model_name, + "modelId": self._settings.model, "messages": messages, - "additionalModelRequestFields": self._settings["additional_model_request_fields"], + "additionalModelRequestFields": self._settings.additional_model_request_fields, } # Only add inference config if it has parameters @@ -1081,8 +1106,8 @@ class AWSBedrockLLMService(LLMService): request_params["toolConfig"] = tool_config # Add performance config if latency is specified - if self._settings["latency"] in ["standard", "optimized"]: - request_params["performanceConfig"] = {"latency": self._settings["latency"]} + if self._settings.latency in ["standard", "optimized"]: + request_params["performanceConfig"] = {"latency": self._settings.latency} # Log request params with messages redacted for logging if isinstance(context, LLMContext): @@ -1207,8 +1232,6 @@ class AWSBedrockLLMService(LLMService): # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal # LLMContext with it context = AWSBedrockLLMContext.from_messages(frame.messages) - elif isinstance(frame, LLMUpdateSettingsFrame): - await self._update_settings(frame.settings) else: await self.push_frame(frame, direction) diff --git a/src/pipecat/services/aws/nova_sonic/llm.py b/src/pipecat/services/aws/nova_sonic/llm.py index 05baba2bd..29612e593 100644 --- a/src/pipecat/services/aws/nova_sonic/llm.py +++ b/src/pipecat/services/aws/nova_sonic/llm.py @@ -16,7 +16,7 @@ import json import time import uuid import wave -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum from importlib.resources import files from typing import Any, List, Optional @@ -60,6 +60,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import LLMService +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.utils.time import time_now_iso8601 try: @@ -185,6 +186,20 @@ class Params(BaseModel): endpointing_sensitivity: Optional[str] = Field(default=None) +@dataclass +class AWSNovaSonicLLMSettings(LLMSettings): + """Settings for AWS Nova Sonic LLM service. + + Parameters: + voice_id: Voice for speech synthesis. + endpointing_sensitivity: Controls how quickly Nova Sonic decides the + user has stopped speaking. Can be "LOW", "MEDIUM", or "HIGH". + """ + + voice_id: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + endpointing_sensitivity: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class AWSNovaSonicLLMService(LLMService): """AWS Nova Sonic speech-to-speech LLM service. @@ -192,6 +207,8 @@ class AWSNovaSonicLLMService(LLMService): and function calling capabilities using AWS Nova Sonic model. """ + _settings: AWSNovaSonicLLMSettings + # Override the default adapter to use the AWSNovaSonicLLMAdapter one adapter_class = AWSNovaSonicLLMAdapter @@ -237,28 +254,51 @@ class AWSNovaSonicLLMService(LLMService): **kwargs: Additional arguments passed to the parent LLMService. """ - super().__init__(**kwargs) + params = params or Params() + + super().__init__( + settings=AWSNovaSonicLLMSettings( + model=model, + voice_id=voice_id, + temperature=params.temperature, + max_tokens=params.max_tokens, + top_p=params.top_p, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + endpointing_sensitivity=params.endpointing_sensitivity, + ), + **kwargs, + ) self._secret_access_key = secret_access_key self._access_key_id = access_key_id self._session_token = session_token self._region = region - self._model = model self._client: Optional[BedrockRuntimeClient] = None - self._voice_id = voice_id - self._params = params or Params() + + # Audio I/O config (hardware settings, not runtime-tunable) + self._input_sample_rate = params.input_sample_rate + self._input_sample_size = params.input_sample_size + self._input_channel_count = params.input_channel_count + self._output_sample_rate = params.output_sample_rate + self._output_sample_size = params.output_sample_size + self._output_channel_count = params.output_channel_count self._system_instruction = system_instruction self._tools = tools # Validate endpointing_sensitivity parameter if ( - self._params.endpointing_sensitivity + self._settings.endpointing_sensitivity and not self._is_endpointing_sensitivity_supported() ): logger.warning( f"endpointing_sensitivity is not supported for model '{model}' and will be ignored. " "This parameter is only supported starting with Nova 2 Sonic (amazon.nova-2-sonic-v1:0)." ) - self._params.endpointing_sensitivity = None + self._settings.endpointing_sensitivity = None if not send_transcription_frames: import warnings @@ -302,6 +342,29 @@ class AWSNovaSonicLLMService(LLMService): with wave.open(file_path.open("rb"), "rb") as wav_file: self._assistant_response_trigger_audio = wav_file.readframes(wav_file.getnframes()) + # + # settings + # + + async def _update_settings(self, delta: AWSNovaSonicLLMSettings) -> dict[str, Any]: + """Apply a settings delta. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(delta) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._start_connecting() + + self._warn_unhandled_updated_settings(changed) + + return changed + # # standard AIService frame handling # @@ -472,7 +535,7 @@ class AWSNovaSonicLLMService(LLMService): # Start the bidirectional stream self._stream = await self._client.invoke_model_with_bidirectional_stream( - InvokeModelWithBidirectionalStreamOperationInput(model_id=self._model) + InvokeModelWithBidirectionalStreamOperationInput(model_id=self._settings.model) ) # Send session start event @@ -639,7 +702,7 @@ class AWSNovaSonicLLMService(LLMService): def _is_first_generation_sonic_model(self) -> bool: # Nova Sonic (the older model) is identified by "amazon.nova-sonic-v1:0" - return self._model == "amazon.nova-sonic-v1:0" + return self._settings.model == "amazon.nova-sonic-v1:0" def _is_endpointing_sensitivity_supported(self) -> bool: # endpointing_sensitivity is only supported with Nova 2 Sonic (and, @@ -658,9 +721,9 @@ class AWSNovaSonicLLMService(LLMService): turn_detection_config = ( f""", "turnDetectionConfiguration": {{ - "endpointingSensitivity": "{self._params.endpointing_sensitivity}" + "endpointingSensitivity": "{self._settings.endpointing_sensitivity}" }}""" - if self._params.endpointing_sensitivity + if self._settings.endpointing_sensitivity else "" ) @@ -669,9 +732,9 @@ class AWSNovaSonicLLMService(LLMService): "event": {{ "sessionStart": {{ "inferenceConfiguration": {{ - "maxTokens": {self._params.max_tokens}, - "topP": {self._params.top_p}, - "temperature": {self._params.temperature} + "maxTokens": {self._settings.max_tokens}, + "topP": {self._settings.top_p}, + "temperature": {self._settings.temperature} }}{turn_detection_config} }} }} @@ -706,10 +769,10 @@ class AWSNovaSonicLLMService(LLMService): }}, "audioOutputConfiguration": {{ "mediaType": "audio/lpcm", - "sampleRateHertz": {self._params.output_sample_rate}, - "sampleSizeBits": {self._params.output_sample_size}, - "channelCount": {self._params.output_channel_count}, - "voiceId": "{self._voice_id}", + "sampleRateHertz": {self._output_sample_rate}, + "sampleSizeBits": {self._output_sample_size}, + "channelCount": {self._output_channel_count}, + "voiceId": "{self._settings.voice_id}", "encoding": "base64", "audioType": "SPEECH" }}{tools_config} @@ -734,9 +797,9 @@ class AWSNovaSonicLLMService(LLMService): "role": "USER", "audioInputConfiguration": {{ "mediaType": "audio/lpcm", - "sampleRateHertz": {self._params.input_sample_rate}, - "sampleSizeBits": {self._params.input_sample_size}, - "channelCount": {self._params.input_channel_count}, + "sampleRateHertz": {self._input_sample_rate}, + "sampleSizeBits": {self._input_sample_size}, + "channelCount": {self._input_channel_count}, "audioType": "SPEECH", "encoding": "base64" }} @@ -1019,8 +1082,8 @@ class AWSNovaSonicLLMService(LLMService): audio = base64.b64decode(audio_content) frame = TTSAudioRawFrame( audio=audio, - sample_rate=self._params.output_sample_rate, - num_channels=self._params.output_channel_count, + sample_rate=self._output_sample_rate, + num_channels=self._output_channel_count, ) await self.push_frame(frame) @@ -1304,7 +1367,7 @@ class AWSNovaSonicLLMService(LLMService): """ if not self._is_assistant_response_trigger_needed(): logger.warning( - f"Assistant response trigger not needed for model '{self._model}'; skipping. " + f"Assistant response trigger not needed for model '{self._settings.model}'; skipping. " "An LLMRunFrame() should be sufficient to prompt the assistant to respond, " "assuming the context ends in a user message." ) @@ -1332,9 +1395,9 @@ class AWSNovaSonicLLMService(LLMService): chunk_duration = 0.02 # what we might get from InputAudioRawFrame chunk_size = int( chunk_duration - * self._params.input_sample_rate - * self._params.input_channel_count - * (self._params.input_sample_size / 8) + * self._input_sample_rate + * self._input_channel_count + * (self._input_sample_size / 8) ) # e.g. 0.02 seconds of 16-bit (2-byte) PCM mono audio at 16kHz is 640 bytes # Lead with a bit of blank audio, if needed. diff --git a/src/pipecat/services/aws/stt.py b/src/pipecat/services/aws/stt.py index f78bc4d4b..7c3fb398e 100644 --- a/src/pipecat/services/aws/stt.py +++ b/src/pipecat/services/aws/stt.py @@ -14,7 +14,8 @@ import json import os import random import string -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -28,6 +29,7 @@ from pipecat.frames.frames import ( TranscriptionFrame, ) from pipecat.services.aws.utils import build_event_message, decode_event, get_presigned_url +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import AWS_TRANSCRIBE_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -43,6 +45,25 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class AWSTranscribeSTTSettings(STTSettings): + """Settings for the AWS Transcribe STT service. + + Parameters: + sample_rate: Audio sample rate in Hz (8000 or 16000). + media_encoding: Audio encoding format (e.g. "linear16"). + number_of_channels: Number of audio channels. + show_speaker_label: Whether to show speaker labels. + enable_channel_identification: Whether to enable channel identification. + """ + + sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + media_encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + number_of_channels: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + show_speaker_label: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_channel_identification: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class AWSTranscribeSTTService(WebsocketSTTService): """AWS Transcribe Speech-to-Text service using WebSocket streaming. @@ -51,6 +72,8 @@ class AWSTranscribeSTTService(WebsocketSTTService): final transcription results. """ + _settings: AWSTranscribeSTTSettings + def __init__( self, *, @@ -76,23 +99,25 @@ class AWSTranscribeSTTService(WebsocketSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to parent STTService class. """ - super().__init__(ttfs_p99_latency=ttfs_p99_latency, **kwargs) - - self._settings = { - "sample_rate": sample_rate, - "language": language, - "media_encoding": "linear16", # AWS expects raw PCM - "number_of_channels": 1, - "show_speaker_label": False, - "enable_channel_identification": False, - } + super().__init__( + ttfs_p99_latency=ttfs_p99_latency, + settings=AWSTranscribeSTTSettings( + language=self.language_to_service_language(language) or "en-US", + sample_rate=sample_rate, + media_encoding="linear16", + number_of_channels=1, + show_speaker_label=False, + enable_channel_identification=False, + ), + **kwargs, + ) # Validate sample rate - AWS Transcribe only supports 8000 Hz or 16000 Hz if sample_rate not in [8000, 16000]: logger.warning( f"AWS Transcribe only supports 8000 Hz or 16000 Hz sample rates. Converting from {sample_rate} Hz to 16000 Hz." ) - self._settings["sample_rate"] = 16000 + self._settings.sample_rate = 16000 self._credentials = { "aws_access_key_id": aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID"), @@ -103,6 +128,14 @@ class AWSTranscribeSTTService(WebsocketSTTService): self._receive_task = None + def can_generate_metrics(self) -> bool: + """Check if this service can generate processing metrics. + + Returns: + True, as AWS Transcribe STT supports metrics generation. + """ + return True + def get_service_encoding(self, encoding: str) -> str: """Convert internal encoding format to AWS Transcribe format. @@ -117,6 +150,26 @@ class AWSTranscribeSTTService(WebsocketSTTService): } return encoding_map.get(encoding, encoding) + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(delta) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # if changed and self._websocket: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + async def start(self, frame: StartFrame): """Initialize the connection when the service starts. @@ -208,9 +261,9 @@ class AWSTranscribeSTTService(WebsocketSTTService): logger.debug("Connecting to AWS Transcribe WebSocket") - language_code = self.language_to_service_language(Language(self._settings["language"])) + language_code = self._settings.language if not language_code: - raise ValueError(f"Unsupported language: {self._settings['language']}") + raise ValueError(f"Unsupported language: {language_code}") # Generate random websocket key websocket_key = "".join( @@ -237,14 +290,14 @@ class AWSTranscribeSTTService(WebsocketSTTService): }, language_code=language_code, media_encoding=self.get_service_encoding( - self._settings["media_encoding"] + self._settings.media_encoding ), # Convert to AWS format - sample_rate=self._settings["sample_rate"], - number_of_channels=self._settings["number_of_channels"], + sample_rate=self._settings.sample_rate, + number_of_channels=self._settings.number_of_channels, enable_partial_results_stabilization=True, partial_results_stability="high", - show_speaker_label=self._settings["show_speaker_label"], - enable_channel_identification=self._settings["enable_channel_identification"], + show_speaker_label=self._settings.show_speaker_label, + enable_channel_identification=self._settings.enable_channel_identification, ) logger.debug(f"{self} Connecting to WebSocket with URL: {presigned_url[:100]}...") @@ -479,14 +532,14 @@ class AWSTranscribeSTTService(WebsocketSTTService): transcript, self._user_id, time_now_iso8601(), - self._settings["language"], + self._settings.language, result=result, ) ) await self._handle_transcription( transcript, is_final, - self._settings["language"], + self._settings.language, ) await self.stop_processing_metrics() else: @@ -495,7 +548,7 @@ class AWSTranscribeSTTService(WebsocketSTTService): transcript, self._user_id, time_now_iso8601(), - self._settings["language"], + self._settings.language, result=result, ) ) diff --git a/src/pipecat/services/aws/tts.py b/src/pipecat/services/aws/tts.py index b902564d2..017477a7a 100644 --- a/src/pipecat/services/aws/tts.py +++ b/src/pipecat/services/aws/tts.py @@ -11,6 +11,7 @@ supporting multiple languages, voices, and SSML features. """ import os +from dataclasses import dataclass, field from typing import AsyncGenerator, List, Optional from loguru import logger @@ -24,6 +25,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -121,6 +123,25 @@ def language_to_aws_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class AWSPollyTTSSettings(TTSSettings): + """Settings for AWS Polly TTS service. + + Parameters: + engine: TTS engine to use ('standard', 'neural', etc.). + pitch: Voice pitch adjustment (for standard engine only). + rate: Speech rate adjustment. + volume: Voice volume adjustment. + lexicon_names: List of pronunciation lexicons to apply. + """ + + engine: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pitch: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + rate: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + volume: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + lexicon_names: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class AWSPollyTTSService(TTSService): """AWS Polly text-to-speech service. @@ -129,6 +150,8 @@ class AWSPollyTTSService(TTSService): options including prosody controls. """ + _settings: AWSPollyTTSSettings + class InputParams(BaseModel): """Input parameters for AWS Polly TTS configuration. @@ -172,10 +195,25 @@ class AWSPollyTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to parent TTSService class. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or AWSPollyTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=AWSPollyTTSSettings( + model=None, + voice=voice_id, + engine=params.engine, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + pitch=params.pitch, + rate=params.rate, + volume=params.volume, + lexicon_names=params.lexicon_names, + ), + **kwargs, + ) + # Get credentials from environment variables if not provided self._aws_params = { "aws_access_key_id": aws_access_key_id or os.getenv("AWS_ACCESS_KEY_ID"), @@ -185,21 +223,9 @@ class AWSPollyTTSService(TTSService): } self._aws_session = aioboto3.Session() - self._settings = { - "engine": params.engine, - "language": self.language_to_service_language(params.language) - if params.language - else "en-US", - "pitch": params.pitch, - "rate": params.rate, - "volume": params.volume, - "lexicon_names": params.lexicon_names, - } self._resampler = create_stream_resampler() - self.set_voice(voice_id) - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -222,19 +248,19 @@ class AWSPollyTTSService(TTSService): def _construct_ssml(self, text: str) -> str: ssml = "" - language = self._settings["language"] + language = self._settings.language ssml += f"" prosody_attrs = [] # Prosody tags are only supported for standard and neural engines - if self._settings["engine"] == "standard": - if self._settings["pitch"]: - prosody_attrs.append(f"pitch='{self._settings['pitch']}'") + if self._settings.engine == "standard": + if self._settings.pitch: + prosody_attrs.append(f"pitch='{self._settings.pitch}'") - if self._settings["rate"]: - prosody_attrs.append(f"rate='{self._settings['rate']}'") - if self._settings["volume"]: - prosody_attrs.append(f"volume='{self._settings['volume']}'") + if self._settings.rate: + prosody_attrs.append(f"rate='{self._settings.rate}'") + if self._settings.volume: + prosody_attrs.append(f"volume='{self._settings.volume}'") if prosody_attrs: ssml += f"" @@ -275,11 +301,11 @@ class AWSPollyTTSService(TTSService): "Text": ssml, "TextType": "ssml", "OutputFormat": "pcm", - "VoiceId": self._voice_id, - "Engine": self._settings["engine"], + "VoiceId": self._settings.voice, + "Engine": self._settings.engine, # AWS only supports 8000 and 16000 for PCM. We select 16000. "SampleRate": "16000", - "LexiconNames": self._settings["lexicon_names"], + "LexiconNames": self._settings.lexicon_names, } # Filter out None values diff --git a/src/pipecat/services/azure/image.py b/src/pipecat/services/azure/image.py index 2bddf6c43..66cc28504 100644 --- a/src/pipecat/services/azure/image.py +++ b/src/pipecat/services/azure/image.py @@ -12,6 +12,7 @@ using REST endpoints for creating images from text prompts. import asyncio import io +from dataclasses import dataclass from typing import AsyncGenerator import aiohttp @@ -19,6 +20,16 @@ from PIL import Image from pipecat.frames.frames import ErrorFrame, Frame, URLImageRawFrame from pipecat.services.image_service import ImageGenService +from pipecat.services.settings import ImageGenSettings + + +@dataclass +class AzureImageGenSettings(ImageGenSettings): + """Settings for the Azure image generation service. + + Parameters: + model: Azure image generation model identifier. + """ class AzureImageGenServiceREST(ImageGenService): @@ -49,12 +60,11 @@ class AzureImageGenServiceREST(ImageGenService): aiohttp_session: Shared aiohttp session for HTTP requests. api_version: Azure API version string. Defaults to "2023-06-01-preview". """ - super().__init__() + super().__init__(settings=AzureImageGenSettings(model=model)) self._api_key = api_key self._azure_endpoint = endpoint self._api_version = api_version - self.set_model_name(model) self._image_size = image_size self._aiohttp_session = aiohttp_session diff --git a/src/pipecat/services/azure/stt.py b/src/pipecat/services/azure/stt.py index 1bc7ec70a..5533e350e 100644 --- a/src/pipecat/services/azure/stt.py +++ b/src/pipecat/services/azure/stt.py @@ -11,7 +11,8 @@ Speech SDK for real-time audio transcription. """ import asyncio -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -25,6 +26,7 @@ from pipecat.frames.frames import ( TranscriptionFrame, ) from pipecat.services.azure.common import language_to_azure_language +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import AZURE_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -33,6 +35,7 @@ from pipecat.utils.tracing.service_decorators import traced_stt try: from azure.cognitiveservices.speech import ( + CancellationReason, ResultReason, SpeechConfig, SpeechRecognizer, @@ -48,6 +51,19 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class AzureSTTSettings(STTSettings): + """Settings for the Azure STT service. + + Parameters: + region: Azure region for the Speech service. + sample_rate: Audio sample rate in Hz. + """ + + region: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + sample_rate: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class AzureSTTService(STTService): """Azure Speech-to-Text service for real-time audio transcription. @@ -56,6 +72,8 @@ class AzureSTTService(STTService): provides real-time transcription results with timing information. """ + _settings: AzureSTTSettings + def __init__( self, *, @@ -63,6 +81,7 @@ class AzureSTTService(STTService): region: str, language: Language = Language.EN_US, sample_rate: Optional[int] = None, + private_endpoint: Optional[str] = None, endpoint_id: Optional[str] = None, ttfs_p99_latency: Optional[float] = AZURE_TTFS_P99, **kwargs, @@ -74,17 +93,30 @@ class AzureSTTService(STTService): region: Azure region for the Speech service (e.g., 'eastus'). language: Language for speech recognition. Defaults to English (US). sample_rate: Audio sample rate in Hz. If None, uses service default. + private_endpoint: Private endpoint for STT behind firewall. + See https://docs.azure.cn/en-us/ai-services/speech-service/speech-services-private-link?tabs=portal endpoint_id: Custom model endpoint id. ttfs_p99_latency: P99 latency from speech end to final transcript in seconds. Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to parent STTService. """ - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=AzureSTTSettings( + model=None, + region=region, + language=language_to_azure_language(language), + sample_rate=sample_rate, + ), + **kwargs, + ) self._speech_config = SpeechConfig( subscription=api_key, region=region, speech_recognition_language=language_to_azure_language(language), + endpoint=private_endpoint, ) if endpoint_id: @@ -92,11 +124,6 @@ class AzureSTTService(STTService): self._audio_stream = None self._speech_recognizer = None - self._settings = { - "region": region, - "language": language_to_azure_language(language), - "sample_rate": sample_rate, - } def can_generate_metrics(self) -> bool: """Check if this service can generate performance metrics. @@ -106,6 +133,38 @@ class AzureSTTService(STTService): """ return True + def language_to_service_language(self, language: Language) -> Optional[str]: + """Convert a Language enum to Azure service-specific language code. + + Args: + language: The language to convert. + + Returns: + The Azure-specific language identifier, or None if not supported. + """ + return language_to_azure_language(language) + + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta. + + Settings are stored but not applied to the active recognizer. + """ + changed = await super()._update_settings(delta) + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # if "language" in changed: + # self._speech_config.speech_recognition_language = self._settings.language + # if self._speech_recognizer: + # # Requires refactoring to set up and tear down recognizer, as + # # language is applied at recognizer initialization + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]: """Process audio data for speech-to-text conversion. @@ -151,6 +210,7 @@ class AzureSTTService(STTService): ) self._speech_recognizer.recognizing.connect(self._on_handle_recognizing) self._speech_recognizer.recognized.connect(self._on_handle_recognized) + self._speech_recognizer.canceled.connect(self._on_handle_canceled) self._speech_recognizer.start_continuous_recognition_async() except Exception as e: await self.push_error( @@ -198,7 +258,7 @@ class AzureSTTService(STTService): def _on_handle_recognized(self, event): if event.result.reason == ResultReason.RecognizedSpeech and len(event.result.text) > 0: - language = getattr(event.result, "language", None) or self._settings.get("language") + language = getattr(event.result, "language", None) or self._settings.language frame = TranscriptionFrame( event.result.text, self._user_id, @@ -213,7 +273,7 @@ class AzureSTTService(STTService): def _on_handle_recognizing(self, event): if event.result.reason == ResultReason.RecognizingSpeech and len(event.result.text) > 0: - language = getattr(event.result, "language", None) or self._settings.get("language") + language = getattr(event.result, "language", None) or self._settings.language frame = InterimTranscriptionFrame( event.result.text, self._user_id, @@ -222,3 +282,13 @@ class AzureSTTService(STTService): result=event, ) asyncio.run_coroutine_threadsafe(self.push_frame(frame), self.get_event_loop()) + + def _on_handle_canceled(self, event): + details = event.result.cancellation_details + if details.reason == CancellationReason.Error: + error_msg = f"Azure STT recognition canceled: {details.reason}" + if details.error_details: + error_msg += f" - {details.error_details}" + asyncio.run_coroutine_threadsafe( + self.push_error(error_msg=error_msg), self.get_event_loop() + ) diff --git a/src/pipecat/services/azure/tts.py b/src/pipecat/services/azure/tts.py index 7d4aa0253..6e62c73bf 100644 --- a/src/pipecat/services/azure/tts.py +++ b/src/pipecat/services/azure/tts.py @@ -7,6 +7,7 @@ """Azure Cognitive Services Text-to-Speech service implementations.""" import asyncio +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional from loguru import logger @@ -25,7 +26,8 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.azure.common import language_to_azure_language -from pipecat.services.tts_service import TTSService, WordTTSService +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven +from pipecat.services.tts_service import TextAggregationMode, TTSService from pipecat.transcriptions.language import Language from pipecat.utils.tracing.service_decorators import traced_tts @@ -65,6 +67,31 @@ def sample_rate_to_output_format(sample_rate: int) -> SpeechSynthesisOutputForma return sample_rate_map.get(sample_rate, SpeechSynthesisOutputFormat.Raw24Khz16BitMonoPcm) +@dataclass +class AzureTTSSettings(TTSSettings): + """Settings for Azure TTS services. + + Parameters: + emphasis: Emphasis level for speech ("strong", "moderate", "reduced"). + language: Language for synthesis. Defaults to English (US). + pitch: Voice pitch adjustment (e.g., "+10%", "-5Hz", "high"). + rate: Speech rate adjustment (e.g., "1.0", "1.25", "slow", "fast"). + role: Voice role for expression (e.g., "YoungAdultFemale"). + style: Speaking style (e.g., "cheerful", "sad", "excited"). + style_degree: Intensity of the speaking style (0.01 to 2.0). + volume: Volume level (e.g., "+20%", "loud", "x-soft"). + """ + + emphasis: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pitch: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + rate: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + role: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + style: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + style_degree: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + volume: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class AzureBaseTTSService: """Base mixin class for Azure Cognitive Services text-to-speech implementations. @@ -73,6 +100,8 @@ class AzureBaseTTSService: This is a mixin class and should be used alongside TTSService or its subclasses. """ + _settings: AzureTTSSettings + # Define SSML escape mappings based on SSML reserved characters # See - https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-structure SSML_ESCAPE_CHARS = { @@ -112,7 +141,6 @@ class AzureBaseTTSService: api_key: str, region: str, voice: str = "en-US-SaraNeural", - params: Optional[InputParams] = None, ): """Initialize Azure-specific configuration. @@ -122,26 +150,9 @@ class AzureBaseTTSService: api_key: Azure Cognitive Services subscription key. region: Azure region identifier (e.g., "eastus", "westus2"). voice: Voice name to use for synthesis. Defaults to "en-US-SaraNeural". - params: Voice and synthesis parameters configuration. """ - params = params or AzureBaseTTSService.InputParams() - - self._settings = { - "emphasis": params.emphasis, - "language": self.language_to_service_language(params.language) - if params.language - else "en-US", - "pitch": params.pitch, - "rate": params.rate, - "role": params.role, - "style": params.style, - "style_degree": params.style_degree, - "volume": params.volume, - } - self._api_key = api_key self._region = region - self._voice_id = voice self._speech_synthesizer = None def language_to_service_language(self, language: Language) -> Optional[str]: @@ -156,7 +167,7 @@ class AzureBaseTTSService: return language_to_azure_language(language) def _construct_ssml(self, text: str) -> str: - language = self._settings["language"] + language = self._settings.language # Escape special characters escaped_text = self._escape_text(text) @@ -165,42 +176,42 @@ class AzureBaseTTSService: f"" - f"" + f"" "" ) - if self._settings["style"]: - ssml += f"" - if self._settings["emphasis"]: - ssml += f"" + if self._settings.emphasis: + ssml += f"" ssml += escaped_text - if self._settings["emphasis"]: + if self._settings.emphasis: ssml += "" if prosody_attrs: ssml += "" - if self._settings["style"]: + if self._settings.style: ssml += "" ssml += "" @@ -229,7 +240,7 @@ class AzureBaseTTSService: return escaped_text -class AzureTTSService(WordTTSService, AzureBaseTTSService): +class AzureTTSService(TTSService, AzureBaseTTSService): """Azure Cognitive Services streaming TTS service with word timestamps. Provides real-time text-to-speech synthesis using Azure's WebSocket-based @@ -245,7 +256,8 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): voice: str = "en-US-SaraNeural", sample_rate: Optional[int] = None, params: Optional[AzureBaseTTSService.InputParams] = None, - aggregate_sentences: bool = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, **kwargs, ): """Initialize the Azure streaming TTS service. @@ -256,21 +268,43 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): voice: Voice name to use for synthesis. Defaults to "en-US-SaraNeural". sample_rate: Audio sample rate in Hz. If None, uses service default. params: Voice and synthesis parameters configuration. - aggregate_sentences: Whether to aggregate sentences before synthesis. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + + text_aggregation_mode: How to aggregate text before synthesis. **kwargs: Additional arguments passed to parent WordTTSService. """ - # Initialize WordTTSService first to set up word timestamp tracking + params = params or AzureBaseTTSService.InputParams() + super().__init__( aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, push_text_frames=False, # We'll push text frames based on word timestamps push_stop_frames=True, pause_frame_processing=True, + supports_word_timestamps=True, sample_rate=sample_rate, + settings=AzureTTSSettings( + model=None, + emphasis=params.emphasis, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + pitch=params.pitch, + rate=params.rate, + role=params.role, + style=params.style, + style_degree=params.style_degree, + voice=voice, + volume=params.volume, + ), **kwargs, ) # Initialize Azure-specific functionality from mixin - self._init_azure_base(api_key=api_key, region=region, voice=voice, params=params) + self._init_azure_base(api_key=api_key, region=region, voice=voice) self._speech_config = None self._speech_synthesizer = None @@ -314,7 +348,7 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): subscription=self._api_key, region=self._region, ) - self._speech_config.speech_synthesis_language = self._settings["language"] + self._speech_config.speech_synthesis_language = self._settings.language self._speech_config.set_speech_synthesis_output_format( sample_rate_to_output_format(self.sample_rate) ) @@ -364,7 +398,7 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): Returns: True if the language is CJK, False otherwise. """ - language = self._settings.get("language", "").lower() + language = (self._settings.language if self._settings.language else "").lower() # Check if language starts with CJK language codes return language.startswith(("zh", "ja", "ko", "cmn", "yue", "wuu")) @@ -527,9 +561,13 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): # User cancellation (from interruption) is expected, not an error if reason == CancellationReason.CancelledByUser: logger.debug(f"{self}: Speech synthesis canceled by user (interruption)") + self._audio_queue.put_nowait(None) else: - logger.warning(f"{self}: Speech synthesis canceled: {reason}") - self._audio_queue.put_nowait(None) + details = evt.result.cancellation_details + error_msg = f"Azure TTS synthesis canceled: {reason}" + if details.error_details: + error_msg += f" - {details.error_details}" + self._audio_queue.put_nowait(Exception(error_msg)) async def push_frame(self, frame: Frame, direction: FrameDirection = FrameDirection.DOWNSTREAM): """Push a frame and handle state changes. @@ -642,6 +680,9 @@ class AzureTTSService(WordTTSService, AzureBaseTTSService): chunk = await self._audio_queue.get() if chunk is None: # End of stream break + if isinstance(chunk, Exception): # Error from _handle_canceled + yield ErrorFrame(error=str(chunk)) + break if self._first_chunk: await self.stop_ttfb_metrics() @@ -704,10 +745,29 @@ class AzureHttpTTSService(TTSService, AzureBaseTTSService): params: Voice and synthesis parameters configuration. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) + params = params or AzureBaseTTSService.InputParams() + + super().__init__( + sample_rate=sample_rate, + settings=AzureTTSSettings( + model=None, + emphasis=params.emphasis, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + pitch=params.pitch, + rate=params.rate, + role=params.role, + style=params.style, + style_degree=params.style_degree, + voice=voice, + volume=params.volume, + ), + **kwargs, + ) # Initialize Azure-specific functionality from mixin - self._init_azure_base(api_key=api_key, region=region, voice=voice, params=params) + self._init_azure_base(api_key=api_key, region=region, voice=voice) self._speech_config = None self._speech_synthesizer = None @@ -735,7 +795,7 @@ class AzureHttpTTSService(TTSService, AzureBaseTTSService): subscription=self._api_key, region=self._region, ) - self._speech_config.speech_synthesis_language = self._settings["language"] + self._speech_config.speech_synthesis_language = self._settings.language self._speech_config.set_speech_synthesis_output_format( sample_rate_to_output_format(self.sample_rate) ) diff --git a/src/pipecat/services/camb/tts.py b/src/pipecat/services/camb/tts.py index def57d3a0..75b299569 100644 --- a/src/pipecat/services/camb/tts.py +++ b/src/pipecat/services/camb/tts.py @@ -16,6 +16,7 @@ Features: - Model-specific sample rates: mars-pro (48kHz), mars-flash (22.05kHz) """ +from dataclasses import dataclass, field from typing import Any, AsyncGenerator, Dict, Optional from camb import StreamTtsOutputConfiguration @@ -31,6 +32,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -133,6 +135,18 @@ def _get_aligned_audio(buffer: bytes) -> tuple[bytes, bytes]: return buffer[:aligned_size], buffer[aligned_size:] +@dataclass +class CambTTSSettings(TTSSettings): + """Settings for Camb.ai TTS service. + + Parameters: + user_instructions: Custom instructions for mars-instruct model only. + Ignored for other models. Max 1000 characters. + """ + + user_instructions: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class CambTTSService(TTSService): """Camb.ai MARS text-to-speech service using the official SDK. @@ -156,6 +170,8 @@ class CambTTSService(TTSService): ) """ + _settings: CambTTSSettings + class InputParams(BaseModel): """Input parameters for Camb.ai TTS configuration. @@ -197,11 +213,6 @@ class CambTTSService(TTSService): params: Additional voice parameters. If None, uses defaults. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - - self._api_key = api_key - self._timeout = timeout - params = params or CambTTSService.InputParams() # Warn if sample rate doesn't match model's supported rate @@ -211,17 +222,23 @@ class CambTTSService(TTSService): f"sample rate. Current rate of {sample_rate}Hz may cause issues." ) - # Build settings - self._settings = { - "language": ( - self.language_to_service_language(params.language) if params.language else "en-us" + super().__init__( + sample_rate=sample_rate, + settings=CambTTSSettings( + model=model, + voice=voice_id, + language=( + self.language_to_service_language(params.language) + if params.language + else "en-us" + ), + user_instructions=params.user_instructions, ), - "user_instructions": params.user_instructions, - } + **kwargs, + ) - self.set_model_name(model) - self.set_voice(str(voice_id)) - self._voice_id = voice_id + self._api_key = api_key + self._timeout = timeout self._client = None @@ -256,7 +273,7 @@ class CambTTSService(TTSService): # Use model-specific sample rate if not explicitly specified if not self._init_sample_rate: - self._sample_rate = MODEL_SAMPLE_RATES.get(self.model_name, 22050) + self._sample_rate = MODEL_SAMPLE_RATES.get(self._settings.model, 22050) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -282,15 +299,15 @@ class CambTTSService(TTSService): # Build SDK parameters tts_kwargs: Dict[str, Any] = { "text": text, - "voice_id": self._voice_id, - "language": self._settings["language"], - "speech_model": self.model_name, + "voice_id": self._settings.voice, + "language": self._settings.language, + "speech_model": self._settings.model, "output_configuration": StreamTtsOutputConfiguration(format="pcm_s16le"), } # Add user instructions if using mars-instruct model - if self._model_name == "mars-instruct" and self._settings.get("user_instructions"): - tts_kwargs["user_instructions"] = self._settings["user_instructions"] + if self._settings.model == "mars-instruct" and self._settings.user_instructions: + tts_kwargs["user_instructions"] = self._settings.user_instructions await self.start_tts_usage_metrics(text) yield TTSStartedFrame(context_id=context_id) diff --git a/src/pipecat/services/cartesia/stt.py b/src/pipecat/services/cartesia/stt.py index c4429226f..526fc9116 100644 --- a/src/pipecat/services/cartesia/stt.py +++ b/src/pipecat/services/cartesia/stt.py @@ -12,7 +12,8 @@ the Cartesia Live transcription API for real-time speech recognition. import json import urllib.parse -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -27,6 +28,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import CARTESIA_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language @@ -42,6 +44,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class CartesiaSTTSettings(STTSettings): + """Settings for the Cartesia STT service. + + Parameters: + encoding: Audio encoding format (e.g. ``"pcm_s16le"``). + """ + + encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class CartesiaLiveOptions: """Configuration options for Cartesia Live STT service. @@ -136,6 +149,8 @@ class CartesiaSTTService(WebsocketSTTService): See: https://docs.cartesia.ai/api-reference/stt/stt """ + _settings: CartesiaSTTSettings + def __init__( self, *, @@ -158,13 +173,6 @@ class CartesiaSTTService(WebsocketSTTService): **kwargs: Additional arguments passed to parent STTService. """ sample_rate = sample_rate or (live_options.sample_rate if live_options else None) - super().__init__( - sample_rate=sample_rate, - ttfs_p99_latency=ttfs_p99_latency, - keepalive_timeout=120, - keepalive_interval=30, - **kwargs, - ) default_options = CartesiaLiveOptions( model="ink-whisper", @@ -181,8 +189,19 @@ class CartesiaSTTService(WebsocketSTTService): k: v for k, v in merged_options.items() if not isinstance(v, str) or v != "None" } - self._settings = merged_options - self.set_model_name(merged_options["model"]) + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + keepalive_timeout=120, + keepalive_interval=30, + settings=CartesiaSTTSettings( + model=merged_options["model"], + language=merged_options.get("language"), + encoding=merged_options.get("encoding", "pcm_s16le"), + ), + **kwargs, + ) + self._api_key = api_key self._base_url = base_url or "api.cartesia.ai" self._receive_task = None @@ -275,13 +294,39 @@ class CartesiaSTTService(WebsocketSTTService): await self._disconnect_websocket() + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta. + + Args: + delta: A :class:`STTSettings` (or ``CartesiaSTTSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(delta) + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # if changed: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + async def _connect_websocket(self): try: if self._websocket and self._websocket.state is State.OPEN: return logger.debug("Connecting to Cartesia STT") - params = self._settings + params = { + "model": self._settings.model, + "language": self._settings.language, + "encoding": self._settings.encoding, + "sample_rate": str(self.sample_rate), + } ws_url = f"wss://{self._base_url}/stt/websocket?{urllib.parse.urlencode(params)}" headers = {"Cartesia-Version": "2025-04-16", "X-API-Key": self._api_key} diff --git a/src/pipecat/services/cartesia/tts.py b/src/pipecat/services/cartesia/tts.py index e30acdcd1..2e637c339 100644 --- a/src/pipecat/services/cartesia/tts.py +++ b/src/pipecat/services/cartesia/tts.py @@ -9,8 +9,9 @@ import base64 import json import warnings +from dataclasses import dataclass, field from enum import Enum -from typing import AsyncGenerator, List, Literal, Optional +from typing import Any, AsyncGenerator, List, Literal, Mapping, Optional from loguru import logger from pydantic import BaseModel, Field @@ -20,14 +21,13 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, - InterruptionFrame, StartFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.tts_service import AudioContextWordTTSService, TTSService +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven +from pipecat.services.tts_service import AudioContextTTSService, TextAggregationMode, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator from pipecat.utils.text.skip_tags_aggregator import SkipTagsAggregator @@ -191,7 +191,43 @@ class CartesiaEmotion(str, Enum): DETERMINED = "determined" -class CartesiaTTSService(AudioContextWordTTSService): +@dataclass +class CartesiaTTSSettings(TTSSettings): + """Settings for Cartesia TTS services. + + Parameters: + output_container: Audio container format (e.g. "raw"). + output_encoding: Audio encoding format (e.g. "pcm_s16le"). + output_sample_rate: Audio sample rate in Hz. + speed: Voice speed control for non-Sonic-3 models (literal values). + emotion: List of emotion controls for non-Sonic-3 models. + generation_config: Generation configuration for Sonic-3 models. Includes volume, + speed (numeric), and emotion (string) parameters. + pronunciation_dict_id: The ID of the pronunciation dictionary to use for + custom pronunciations. + """ + + output_container: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: Literal["slow", "normal", "fast"] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + emotion: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + generation_config: GenerationConfig | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pronunciation_dict_id: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + @classmethod + def from_mapping(cls, settings: Mapping[str, Any]) -> "CartesiaTTSSettings": + """Construct settings from a plain dict, destructuring legacy nested ``output_format``.""" + flat = dict(settings) + nested = flat.pop("output_format", None) + if isinstance(nested, dict): + flat.setdefault("output_container", nested.get("container")) + flat.setdefault("output_encoding", nested.get("encoding")) + flat.setdefault("output_sample_rate", nested.get("sample_rate")) + return super().from_mapping(flat) + + +class CartesiaTTSService(AudioContextTTSService): """Cartesia TTS service with WebSocket streaming and word timestamps. Provides text-to-speech using Cartesia's streaming WebSocket API. @@ -199,6 +235,8 @@ class CartesiaTTSService(AudioContextWordTTSService): customization options including speed and emotion controls. """ + _settings: CartesiaTTSSettings + class InputParams(BaseModel): """Input parameters for Cartesia TTS configuration. @@ -234,7 +272,8 @@ class CartesiaTTSService(AudioContextWordTTSService): container: str = "raw", params: Optional[InputParams] = None, text_aggregator: Optional[BaseTextAggregator] = None, - aggregate_sentences: Optional[bool] = True, + text_aggregation_mode: Optional[TextAggregationMode] = None, + aggregate_sentences: Optional[bool] = None, **kwargs, ): """Initialize the Cartesia TTS service. @@ -254,25 +293,51 @@ class CartesiaTTSService(AudioContextWordTTSService): .. deprecated:: 0.0.95 Use an LLMTextProcessor before the TTSService for custom text aggregation. + text_aggregation_mode: How to aggregate incoming text before synthesis. aggregate_sentences: Whether to aggregate sentences within the TTSService. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + **kwargs: Additional arguments passed to the parent service. """ - # Aggregating sentences still gives cleaner-sounding results and fewer - # artifacts than streaming one word at a time. On average, waiting for a - # full sentence should only "cost" us 15ms or so with GPT-4o or a Llama - # 3 model, and it's worth it for the better audio quality. + # By default, we aggregate sentences before sending to TTS. This adds + # ~200-300ms of latency per sentence (waiting for the sentence-ending + # punctuation token from the LLM). Setting + # text_aggregation_mode=TextAggregationMode.TOKEN streams tokens + # directly, which reduces latency. Streaming quality is good but less + # tested than sentence aggregation. + # TODO: Consider making TOKEN the default for Cartesia in 1.0. # # We also don't want to automatically push LLM response text frames, # because the context aggregators will add them to the LLM context even # if we're interrupted. Cartesia gives us word-by-word timestamps. We # can use those to generate text frames ourselves aligned with the # playout timing of the audio! + params = params or CartesiaTTSService.InputParams() + super().__init__( + text_aggregation_mode=text_aggregation_mode, aggregate_sentences=aggregate_sentences, push_text_frames=False, pause_frame_processing=True, + supports_word_timestamps=True, sample_rate=sample_rate, text_aggregator=text_aggregator, + settings=CartesiaTTSSettings( + model=model, + output_container=container, + output_encoding=encoding, + output_sample_rate=0, + language=self.language_to_service_language(params.language) + if params.language + else None, + speed=params.speed, + emotion=params.emotion, + generation_config=params.generation_config, + pronunciation_dict_id=params.pronunciation_dict_id, + voice=voice_id, + ), **kwargs, ) @@ -282,29 +347,13 @@ class CartesiaTTSService(AudioContextWordTTSService): # The preferred way of taking advantage of Cartesia SSML Tags is # to use an LLMTextProcessor and/or a text_transformer to identify # and insert these tags for the purpose of the TTS service alone. - self._text_aggregator = SkipTagsAggregator([("", "")]) - - params = params or CartesiaTTSService.InputParams() + self._text_aggregator = SkipTagsAggregator( + [("", "")], aggregation_type=self._text_aggregation_mode + ) self._api_key = api_key self._cartesia_version = cartesia_version self._url = url - self._settings = { - "output_format": { - "container": container, - "encoding": encoding, - "sample_rate": 0, - }, - "language": self.language_to_service_language(params.language) - if params.language - else None, - "speed": params.speed, - "emotion": params.emotion, - "generation_config": params.generation_config, - "pronunciation_dict_id": params.pronunciation_dict_id, - } - self.set_model_name(model) - self.set_voice(voice_id) self._receive_task = None @@ -316,16 +365,6 @@ class CartesiaTTSService(AudioContextWordTTSService): """ return True - async def set_model(self, model: str): - """Set the TTS model. - - Args: - model: The model name to use for synthesis. - """ - self._model_id = model - await super().set_model(model) - logger.info(f"Switching TTS model to: [{model}]") - def language_to_service_language(self, language: Language) -> Optional[str]: """Convert a Language enum to Cartesia language format. @@ -390,7 +429,7 @@ class CartesiaTTSService(AudioContextWordTTSService): Returns: List of (word, start_time) tuples processed for the language. """ - current_language = self._settings.get("language") + current_language = self._settings.language # Check if this is a CJK language (if language is None, treat as non-CJK) if current_language and self._is_cjk_language(current_language): @@ -411,9 +450,9 @@ class CartesiaTTSService(AudioContextWordTTSService): ): voice_config = {} voice_config["mode"] = "id" - voice_config["id"] = self._voice_id + voice_config["id"] = self._settings.voice - if self._settings["emotion"]: + if self._settings.emotion: with warnings.catch_warnings(): warnings.simplefilter("always") warnings.warn( @@ -422,33 +461,36 @@ class CartesiaTTSService(AudioContextWordTTSService): stacklevel=2, ) voice_config["__experimental_controls"] = {} - if self._settings["emotion"]: - voice_config["__experimental_controls"]["emotion"] = self._settings["emotion"] + voice_config["__experimental_controls"]["emotion"] = self._settings.emotion msg = { "transcript": text, "continue": continue_transcript, "context_id": self.get_active_audio_context_id(), - "model_id": self.model_name, + "model_id": self._settings.model, "voice": voice_config, - "output_format": self._settings["output_format"], + "output_format": { + "container": self._settings.output_container, + "encoding": self._settings.output_encoding, + "sample_rate": self._settings.output_sample_rate, + }, "add_timestamps": add_timestamps, - "use_original_timestamps": False if self.model_name == "sonic" else True, + "use_original_timestamps": False if self._settings.model == "sonic" else True, } - if self._settings["language"]: - msg["language"] = self._settings["language"] + if self._settings.language: + msg["language"] = self._settings.language - if self._settings["speed"]: - msg["speed"] = self._settings["speed"] + if self._settings.speed: + msg["speed"] = self._settings.speed - if self._settings["generation_config"]: - msg["generation_config"] = self._settings["generation_config"].model_dump( + if self._settings.generation_config: + msg["generation_config"] = self._settings.generation_config.model_dump( exclude_none=True ) - if self._settings["pronunciation_dict_id"]: - msg["pronunciation_dict_id"] = self._settings["pronunciation_dict_id"] + if self._settings.pronunciation_dict_id: + msg["pronunciation_dict_id"] = self._settings.pronunciation_dict_id return json.dumps(msg) @@ -459,7 +501,7 @@ class CartesiaTTSService(AudioContextWordTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["output_format"]["sample_rate"] = self.sample_rate + self._settings.output_sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -530,14 +572,22 @@ class CartesiaTTSService(AudioContextWordTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - context_id = self.get_active_audio_context_id() - await super()._handle_interruption(frame, direction) + async def on_audio_context_interrupted(self, context_id: str): + """Cancel the active Cartesia context when the bot is interrupted.""" await self.stop_all_metrics() if context_id: cancel_msg = json.dumps({"context_id": context_id, "cancel": True}) await self._get_websocket().send(cancel_msg) + async def on_audio_context_completed(self, context_id: str): + """Close the Cartesia context after all audio has been played. + + No close message is needed: the server already considers the context + done once it has sent its ``done`` message, which is handled in + ``_process_messages``. + """ + pass + async def flush_audio(self): """Flush any pending audio and finalize the current context.""" context_id = self.get_active_audio_context_id() @@ -601,7 +651,10 @@ class CartesiaTTSService(AudioContextWordTTSService): Yields: Frame: Audio frames containing the synthesized speech. """ - logger.debug(f"{self}: Generating TTS [{text}]") + if not self._is_streaming_tokens: + logger.debug(f"{self}: Generating TTS [{text}]") + else: + logger.trace(f"{self}: Generating TTS [{text}]") try: if not self._websocket or self._websocket.state is State.CLOSED: @@ -636,6 +689,8 @@ class CartesiaHttpTTSService(TTSService): integration is preferred. """ + _settings: CartesiaTTSSettings + class InputParams(BaseModel): """Input parameters for Cartesia HTTP TTS configuration. @@ -686,29 +741,30 @@ class CartesiaHttpTTSService(TTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to the parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or CartesiaHttpTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=CartesiaTTSSettings( + model=model, + voice=voice_id, + output_container=container, + output_encoding=encoding, + output_sample_rate=0, + language=self.language_to_service_language(params.language) + if params.language + else None, + speed=params.speed, + emotion=params.emotion, + generation_config=params.generation_config, + pronunciation_dict_id=params.pronunciation_dict_id, + ), + **kwargs, + ) + self._api_key = api_key self._base_url = base_url self._cartesia_version = cartesia_version - self._settings = { - "output_format": { - "container": container, - "encoding": encoding, - "sample_rate": 0, - }, - "language": self.language_to_service_language(params.language) - if params.language - else None, - "speed": params.speed, - "emotion": params.emotion, - "generation_config": params.generation_config, - "pronunciation_dict_id": params.pronunciation_dict_id, - } - self.set_voice(voice_id) - self.set_model_name(model) self._client = AsyncCartesia( api_key=api_key, @@ -741,7 +797,7 @@ class CartesiaHttpTTSService(TTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["output_format"]["sample_rate"] = self.sample_rate + self._settings.output_sample_rate = self.sample_rate async def stop(self, frame: EndFrame): """Stop the Cartesia HTTP TTS service. @@ -775,9 +831,9 @@ class CartesiaHttpTTSService(TTSService): logger.debug(f"{self}: Generating TTS [{text}]") try: - voice_config = {"mode": "id", "id": self._voice_id} + voice_config = {"mode": "id", "id": self._settings.voice} - if self._settings["emotion"]: + if self._settings.emotion: with warnings.catch_warnings(): warnings.simplefilter("always") warnings.warn( @@ -785,30 +841,36 @@ class CartesiaHttpTTSService(TTSService): DeprecationWarning, stacklevel=2, ) - voice_config["__experimental_controls"] = {"emotion": self._settings["emotion"]} + voice_config["__experimental_controls"] = {"emotion": self._settings.emotion} await self.start_ttfb_metrics() - payload = { - "model_id": self._model_name, - "transcript": text, - "voice": voice_config, - "output_format": self._settings["output_format"], + output_format = { + "container": self._settings.output_container, + "encoding": self._settings.output_encoding, + "sample_rate": self._settings.output_sample_rate, } - if self._settings["language"]: - payload["language"] = self._settings["language"] + payload = { + "model_id": self._settings.model, + "transcript": text, + "voice": voice_config, + "output_format": output_format, + } - if self._settings["speed"]: - payload["speed"] = self._settings["speed"] + if self._settings.language: + payload["language"] = self._settings.language - if self._settings["generation_config"]: - payload["generation_config"] = self._settings["generation_config"].model_dump( + if self._settings.speed: + payload["speed"] = self._settings.speed + + if self._settings.generation_config: + payload["generation_config"] = self._settings.generation_config.model_dump( exclude_none=True ) - if self._settings["pronunciation_dict_id"]: - payload["pronunciation_dict_id"] = self._settings["pronunciation_dict_id"] + if self._settings.pronunciation_dict_id: + payload["pronunciation_dict_id"] = self._settings.pronunciation_dict_id yield TTSStartedFrame(context_id=context_id) diff --git a/src/pipecat/services/cerebras/llm.py b/src/pipecat/services/cerebras/llm.py index 54ea45ddb..e1ecceef7 100644 --- a/src/pipecat/services/cerebras/llm.py +++ b/src/pipecat/services/cerebras/llm.py @@ -66,16 +66,16 @@ class CerebrasLLMService(OpenAILLMService): Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, - "seed": self._settings["seed"], - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_completion_tokens": self._settings["max_completion_tokens"], + "seed": self._settings.seed, + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_completion_tokens": self._settings.max_completion_tokens, } # Messages, tools, tool_choice params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params diff --git a/src/pipecat/services/deepgram/flux/stt.py b/src/pipecat/services/deepgram/flux/stt.py index 5b091862c..984906c6c 100644 --- a/src/pipecat/services/deepgram/flux/stt.py +++ b/src/pipecat/services/deepgram/flux/stt.py @@ -9,6 +9,7 @@ import asyncio import json import time +from dataclasses import dataclass, field from enum import Enum from typing import Any, AsyncGenerator, Dict, Optional from urllib.parse import urlencode @@ -27,6 +28,7 @@ from pipecat.frames.frames import ( UserStartedSpeakingFrame, UserStoppedSpeakingFrame, ) +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language from pipecat.utils.time import time_now_iso8601 @@ -67,6 +69,34 @@ class FluxEventType(str, Enum): UPDATE = "Update" +@dataclass +class DeepgramFluxSTTSettings(STTSettings): + """Settings for the Deepgram Flux STT service. + + Parameters: + eager_eot_threshold: EagerEndOfTurn/TurnResumed threshold. Off by default. + Lower values = more aggressive (faster response, more LLM calls). + Higher values = more conservative (slower response, fewer LLM calls). + eot_threshold: End-of-turn confidence required to finish a turn (default 0.7). + eot_timeout_ms: Time in ms after speech to finish a turn regardless of EOT + confidence (default 5000). + keyterm: Keyterms to boost recognition accuracy for specialized terminology. + mip_opt_out: Opt out of the Deepgram Model Improvement Program (default False). + tag: Tags to label requests for identification during usage reporting. + min_confidence: Minimum confidence required to create a TranscriptionFrame. + encoding: Audio encoding format (e.g. ``"linear16"``). + """ + + eager_eot_threshold: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + eot_threshold: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + eot_timeout_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + keyterm: list | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + mip_opt_out: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + tag: list | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + min_confidence: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class DeepgramFluxSTTService(WebsocketSTTService): """Deepgram Flux speech-to-text service. @@ -89,6 +119,8 @@ class DeepgramFluxSTTService(WebsocketSTTService): ... """ + _settings: DeepgramFluxSTTSettings + class InputParams(BaseModel): """Configuration parameters for Deepgram Flux API. @@ -175,20 +207,27 @@ class DeepgramFluxSTTService(WebsocketSTTService): # was never destroyed. # So we can keep it here as false, because inside the method send_with_retry, it will # already try to reconnect if needed. + params = params or DeepgramFluxSTTService.InputParams() super().__init__( sample_rate=sample_rate, reconnect_on_error=False, + settings=DeepgramFluxSTTSettings( + model=model, + language=Language.EN, + encoding=flux_encoding, + eager_eot_threshold=params.eager_eot_threshold, + eot_threshold=params.eot_threshold, + eot_timeout_ms=params.eot_timeout_ms, + keyterm=params.keyterm or [], + mip_opt_out=params.mip_opt_out, + tag=params.tag or [], + min_confidence=params.min_confidence, + ), **kwargs, ) - self._api_key = api_key self._url = url - self._model = model - self._params = params or DeepgramFluxSTTService.InputParams() self._should_interrupt = should_interrupt - self._flux_encoding = flux_encoding - # This is the currently only supported language - self._language = Language.EN self._websocket_url = None self._receive_task = None # Flux event handlers @@ -343,6 +382,25 @@ class DeepgramFluxSTTService(WebsocketSTTService): """ return True + async def _update_settings(self, delta: DeepgramFluxSTTSettings) -> dict[str, Any]: + """Apply a settings delta. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(delta) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + async def start(self, frame: StartFrame): """Start the Deepgram Flux STT service. @@ -355,29 +413,29 @@ class DeepgramFluxSTTService(WebsocketSTTService): await super().start(frame) url_params = [ - f"model={self._model}", + f"model={self._settings.model}", f"sample_rate={self.sample_rate}", - f"encoding={self._flux_encoding}", + f"encoding={self._settings.encoding}", ] - if self._params.eager_eot_threshold is not None: - url_params.append(f"eager_eot_threshold={self._params.eager_eot_threshold}") + if self._settings.eager_eot_threshold is not None: + url_params.append(f"eager_eot_threshold={self._settings.eager_eot_threshold}") - if self._params.eot_threshold is not None: - url_params.append(f"eot_threshold={self._params.eot_threshold}") + if self._settings.eot_threshold is not None: + url_params.append(f"eot_threshold={self._settings.eot_threshold}") - if self._params.eot_timeout_ms is not None: - url_params.append(f"eot_timeout_ms={self._params.eot_timeout_ms}") + if self._settings.eot_timeout_ms is not None: + url_params.append(f"eot_timeout_ms={self._settings.eot_timeout_ms}") - if self._params.mip_opt_out is not None: - url_params.append(f"mip_opt_out={str(self._params.mip_opt_out).lower()}") + if self._settings.mip_opt_out is not None: + url_params.append(f"mip_opt_out={str(self._settings.mip_opt_out).lower()}") # Add keyterm parameters (can have multiple) - for keyterm in self._params.keyterm: + for keyterm in self._settings.keyterm: url_params.append(urlencode({"keyterm": keyterm})) # Add tag parameters (can have multiple) - for tag_value in self._params.tag: + for tag_value in self._settings.tag: url_params.append(urlencode({"tag": tag_value})) self._websocket_url = f"{self._url}?{'&'.join(url_params)}" @@ -617,7 +675,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): self._user_is_speaking = True await self.broadcast_frame(UserStartedSpeakingFrame) if self._should_interrupt: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self.start_metrics() await self._call_event_handler("on_start_of_turn", transcript) if transcript: @@ -676,7 +734,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): # Compute the average confidence average_confidence = self._calculate_average_confidence(data) - if not self._params.min_confidence or average_confidence > self._params.min_confidence: + if not self._settings.min_confidence or average_confidence > self._settings.min_confidence: # EndOfTurn means Flux has determined the turn is complete, # so this TranscriptionFrame is always finalized await self.push_frame( @@ -684,7 +742,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): transcript, self._user_id, time_now_iso8601(), - self._language, + self._settings.language, result=data, finalized=True, ) @@ -694,7 +752,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): f"Transcription confidence below min_confidence threshold: {average_confidence}" ) - await self._handle_transcription(transcript, True, self._language) + await self._handle_transcription(transcript, True, self._settings.language) await self.stop_processing_metrics() await self.broadcast_frame(UserStoppedSpeakingFrame) await self._call_event_handler("on_end_of_turn", transcript) @@ -738,7 +796,7 @@ class DeepgramFluxSTTService(WebsocketSTTService): transcript, self._user_id, time_now_iso8601(), - self._language, + self._settings.language, result=data, ) ) diff --git a/src/pipecat/services/deepgram/stt.py b/src/pipecat/services/deepgram/stt.py index c4f72e6c3..8eb246cf2 100644 --- a/src/pipecat/services/deepgram/stt.py +++ b/src/pipecat/services/deepgram/stt.py @@ -6,7 +6,9 @@ """Deepgram speech-to-text service implementation.""" -from typing import AsyncGenerator, Dict, Optional +import inspect +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Dict, Mapping, Optional, Type from loguru import logger @@ -23,6 +25,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import _S, NOT_GIVEN, STTSettings, _NotGiven, is_given from pipecat.services.stt_latency import DEEPGRAM_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -45,6 +48,168 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class _DeepgramSTTSettingsBase(STTSettings): + """Base settings for Deepgram STT services that use ``LiveOptions``. + + Shared by ``DeepgramSTTSettings`` and ``DeepgramSageMakerSTTSettings``. + Not intended for other Deepgram services that don't use ``LiveOptions``. + + Wraps the Deepgram SDK's ``LiveOptions`` in a single ``live_options`` + field and provides delta-merge semantics: when used as a delta (e.g. + via ``STTUpdateSettingsFrame``), only the non-None fields of + ``live_options`` are merged into the stored options rather than + replacing them wholesale. + + ``model`` and ``language`` are kept in sync bidirectionally between + the top-level settings fields and the nested ``live_options``. + + Parameters: + live_options: Deepgram ``LiveOptions`` for STT configuration. + In delta mode only its non-None fields are merged into the + stored options. + """ + + live_options: LiveOptions | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + # Valid LiveOptions __init__ parameter names (cached at class level). + _live_options_params: set[str] | None = field(default=None, init=False, repr=False) + + @classmethod + def _get_live_options_params(cls) -> set[str]: + """Return the set of valid ``LiveOptions.__init__`` parameter names.""" + if cls._live_options_params is None: + cls._live_options_params = set(inspect.signature(LiveOptions.__init__).parameters) - { + "self" + } + return cls._live_options_params + + def _merge_live_options_delta(self, delta: LiveOptions) -> Dict[str, Any]: + """Merge a ``LiveOptions`` delta into the stored ``live_options``. + + Non-None fields from *delta* overwrite corresponding fields in the + stored ``LiveOptions``. ``model`` and ``language`` are synced to + the top-level settings fields when they change. + + Args: + delta: A ``LiveOptions`` whose non-None fields are the desired + overrides. + + Returns: + Dict mapping each changed key to its **previous** value (same + contract as ``apply_update``). + """ + old_dict = self.live_options.to_dict() # type: ignore[union-attr] + delta_dict = delta.to_dict() + + # Deepgram SDK bug: model initialised to the *string* "None". + if delta_dict.get("model") == "None": + del delta_dict["model"] + + if not delta_dict: + return {} + + merged = {**old_dict, **delta_dict} + self.live_options = LiveOptions(**merged) + + # Track what changed. + changed: Dict[str, Any] = {} + for key in delta_dict: + old_val = old_dict.get(key, NOT_GIVEN) + if old_val != delta_dict[key]: + changed[key] = old_val + + # Sync model/language from live_options delta to top-level fields. + if "model" in delta_dict and delta_dict["model"] != self.model: + changed.setdefault("model", self.model) + self.model = delta_dict["model"] + if "language" in delta_dict and delta_dict["language"] != self.language: + changed.setdefault("language", self.language) + self.language = delta_dict["language"] + + return changed + + def apply_update(self: _S, delta: _S) -> Dict[str, Any]: + """Merge a delta into this store, with delta-merge for ``live_options``. + + ``live_options`` is merged field-by-field via + ``_merge_live_options_delta`` rather than being replaced wholesale. + + ``model`` and ``language`` are kept in sync bidirectionally between + the top-level settings fields and ``live_options``. + """ + # Pull live_options out of the delta so super() doesn't replace it. + delta_lo = getattr(delta, "live_options", NOT_GIVEN) + if is_given(delta_lo): + delta.live_options = NOT_GIVEN # type: ignore[assignment] + + # Let the base class handle model, language, extra. + changed = super().apply_update(delta) + + # Sync top-level model/language changes into stored live_options. + if "model" in changed: + self.live_options.model = self.model # type: ignore[union-attr] + if "language" in changed: + self.live_options.language = self.language # type: ignore[union-attr] + + # Merge live_options delta. Top-level model/language take precedence + # over conflicting values in live_options, so write them into the + # delta before merging. + if is_given(delta_lo): + if "model" in changed: + delta_lo.model = self.model + if "language" in changed: + delta_lo.language = self.language + + for key, old_val in self._merge_live_options_delta(delta_lo).items(): + changed.setdefault(key, old_val) + + return changed + + @classmethod + def from_mapping(cls: Type[_S], settings: Mapping[str, Any]) -> _S: + """Build a delta from a plain dict, routing LiveOptions keys correctly. + + Keys that are valid ``LiveOptions.__init__`` parameters (and not + top-level ``STTSettings`` fields like ``model`` / ``language``) are + collected into a ``LiveOptions`` object. ``model`` and ``language`` + are routed to the top-level settings fields. Truly unknown keys go + to ``extra``. + """ + lo_params = cls._get_live_options_params() + stt_field_names = {"model", "language"} + + kwargs: Dict[str, Any] = {} + lo_kwargs: Dict[str, Any] = {} + extra: Dict[str, Any] = {} + + for key, value in settings.items(): + canonical = cls._aliases.get(key, key) + if canonical in stt_field_names: + kwargs[canonical] = value + elif canonical in lo_params: + lo_kwargs[canonical] = value + else: + extra[key] = value + + if lo_kwargs: + kwargs["live_options"] = LiveOptions(**lo_kwargs) + + instance = cls(**kwargs) + instance.extra = extra + return instance + + +@dataclass +class DeepgramSTTSettings(_DeepgramSTTSettingsBase): + """Settings for the Deepgram STT service. + + See ``_DeepgramSTTSettingsBase`` for full documentation. + """ + + pass + + class DeepgramSTTService(STTService): """Deepgram speech-to-text service. @@ -63,6 +228,8 @@ class DeepgramSTTService(STTService): ... """ + _settings: DeepgramSTTSettings + def __init__( self, *, @@ -87,7 +254,9 @@ class DeepgramSTTService(STTService): base_url: Custom Deepgram API base URL. sample_rate: Audio sample rate. If None, uses default or live_options value. - live_options: Deepgram LiveOptions for detailed configuration. + live_options: Deepgram LiveOptions configuration. Treated as a + delta from a set of sensible defaults — only the fields you + set are overridden; all others keep their default values. addons: Additional Deepgram features to enable. should_interrupt: Determine whether the bot should be interrupted when Deepgram VAD events are enabled and the system detects that the user is speaking. @@ -102,7 +271,6 @@ class DeepgramSTTService(STTService): The `vad_events` option in LiveOptions is deprecated as of version 0.0.99 and will be removed in a future version. Please use the Silero VAD instead. """ sample_rate = sample_rate or (live_options.sample_rate if live_options else None) - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) if url: import warnings @@ -127,24 +295,25 @@ class DeepgramSTTService(STTService): vad_events=False, ) - merged_options = default_options.to_dict() + settings = DeepgramSTTSettings( + model=default_options.model, + language=default_options.language, + live_options=default_options, + ) if live_options: - default_model = default_options.model - merged_options.update(live_options.to_dict()) - # NOTE(aleix): Fixes an in deepgram-sdk where `model` is initialized - # to the string "None" instead of the value `None`. - if "model" in merged_options and merged_options["model"] == "None": - merged_options["model"] = default_model + settings._merge_live_options_delta(live_options) - if "language" in merged_options and isinstance(merged_options["language"], Language): - merged_options["language"] = merged_options["language"].value + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=settings, + **kwargs, + ) - self.set_model_name(merged_options["model"]) - self._settings = merged_options self._addons = addons self._should_interrupt = should_interrupt - if merged_options.get("vad_events"): + if self._settings.live_options.vad_events: import warnings with warnings.catch_warnings(): @@ -175,7 +344,7 @@ class DeepgramSTTService(STTService): Returns: True if VAD events are enabled in the current settings. """ - return self._settings["vad_events"] + return self._settings.live_options.vad_events def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -185,28 +354,17 @@ class DeepgramSTTService(STTService): """ return True - async def set_model(self, model: str): - """Set the Deepgram model and reconnect. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if anything changed.""" + changed = await super()._update_settings(delta) + + if not changed: + return changed - Args: - model: The Deepgram model name to use. - """ - await super().set_model(model) - logger.info(f"Switching STT model to: [{model}]") - self._settings["model"] = model await self._disconnect() await self._connect() - async def set_language(self, language: Language): - """Set the recognition language and reconnect. - - Args: - language: The language to use for speech recognition. - """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = language - await self._disconnect() - await self._connect() + return changed async def start(self, frame: StartFrame): """Start the Deepgram STT service. @@ -215,7 +373,6 @@ class DeepgramSTTService(STTService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["sample_rate"] = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -268,7 +425,11 @@ class DeepgramSTTService(STTService): self._on_utterance_end, ) - if not await self._connection.start(options=self._settings, addons=self._addons): + live_options = LiveOptions( + **{**self._settings.live_options.to_dict(), "sample_rate": self.sample_rate} + ) + + if not await self._connection.start(options=live_options, addons=self._addons): await self.push_error(error_msg=f"Unable to connect to Deepgram") else: headers = { @@ -310,7 +471,7 @@ class DeepgramSTTService(STTService): await self._call_event_handler("on_speech_started", *args, **kwargs) await self.broadcast_frame(UserStartedSpeakingFrame) if self._should_interrupt: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _on_utterance_end(self, *args, **kwargs): await self._call_event_handler("on_utterance_end", *args, **kwargs) diff --git a/src/pipecat/services/deepgram/stt_sagemaker.py b/src/pipecat/services/deepgram/stt_sagemaker.py index 8fc95b726..ba4b7dfda 100644 --- a/src/pipecat/services/deepgram/stt_sagemaker.py +++ b/src/pipecat/services/deepgram/stt_sagemaker.py @@ -14,7 +14,8 @@ languages, and various Deepgram features. import asyncio import json -from typing import AsyncGenerator, Optional +from dataclasses import dataclass +from typing import Any, AsyncGenerator, Dict, Optional from loguru import logger @@ -31,6 +32,8 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.aws.sagemaker.bidi_client import SageMakerBidiClient +from pipecat.services.deepgram.stt import _DeepgramSTTSettingsBase +from pipecat.services.settings import STTSettings from pipecat.services.stt_latency import DEEPGRAM_SAGEMAKER_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language @@ -47,6 +50,16 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class DeepgramSageMakerSTTSettings(_DeepgramSTTSettingsBase): + """Settings for the Deepgram SageMaker STT service. + + See ``_DeepgramSTTSettingsBase`` for full documentation. + """ + + pass + + class DeepgramSageMakerSTTService(STTService): """Deepgram speech-to-text service for AWS SageMaker. @@ -75,6 +88,8 @@ class DeepgramSageMakerSTTService(STTService): ) """ + _settings: DeepgramSageMakerSTTSettings + def __init__( self, *, @@ -93,19 +108,15 @@ class DeepgramSageMakerSTTService(STTService): region: AWS region where the endpoint is deployed (e.g., "us-east-2"). sample_rate: Audio sample rate in Hz. If None, uses value from live_options or defaults to the value from StartFrame. - live_options: Deepgram LiveOptions for detailed configuration. If None, - uses sensible defaults (nova-3 model, English, interim results enabled). + live_options: Deepgram LiveOptions configuration. Treated as a + delta from a set of sensible defaults — only the fields you + set are overridden; all others keep their default values. ttfs_p99_latency: P99 latency from speech end to final transcript in seconds. Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to the parent STTService. """ sample_rate = sample_rate or (live_options.sample_rate if live_options else None) - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - self._endpoint_name = endpoint_name - self._region = region - - # Create default options similar to DeepgramSTTService default_options = LiveOptions( encoding="linear16", language=Language.EN, @@ -115,21 +126,23 @@ class DeepgramSageMakerSTTService(STTService): punctuate=True, ) - # Merge with provided options - merged_options = default_options.to_dict() + settings = DeepgramSageMakerSTTSettings( + model=default_options.model, + language=default_options.language, + live_options=default_options, + ) if live_options: - default_model = default_options.model - merged_options.update(live_options.to_dict()) - # Handle the "None" string bug from deepgram-sdk - if "model" in merged_options and merged_options["model"] == "None": - merged_options["model"] = default_model + settings._merge_live_options_delta(live_options) - # Convert Language enum to string if needed - if "language" in merged_options and isinstance(merged_options["language"], Language): - merged_options["language"] = merged_options["language"].value + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=settings, + **kwargs, + ) - self.set_model_name(merged_options["model"]) - self._settings = merged_options + self._endpoint_name = endpoint_name + self._region = region self._client: Optional[SageMakerBidiClient] = None self._response_task: Optional[asyncio.Task] = None @@ -143,35 +156,21 @@ class DeepgramSageMakerSTTService(STTService): """ return True - async def set_model(self, model: str): - """Set the Deepgram model and reconnect. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta and warn about unhandled changes.""" + changed = await super()._update_settings(delta) - Disconnects from the current session, updates the model setting, and - establishes a new connection with the updated model. + if not changed: + return changed - Args: - model: The Deepgram model name to use (e.g., "nova-3"). - """ - await super().set_model(model) - logger.info(f"Switching STT model to: [{model}]") - self._settings["model"] = model - await self._disconnect() - await self._connect() + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() - async def set_language(self, language: Language): - """Set the recognition language and reconnect. + self._warn_unhandled_updated_settings(changed) - Disconnects from the current session, updates the language setting, and - establishes a new connection with the updated language. - - Args: - language: The language to use for speech recognition (e.g., Language.EN, - Language.ES). - """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = language - await self._disconnect() - await self._connect() + return changed async def start(self, frame: StartFrame): """Start the Deepgram SageMaker STT service. @@ -180,7 +179,6 @@ class DeepgramSageMakerSTTService(STTService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["sample_rate"] = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -226,12 +224,13 @@ class DeepgramSageMakerSTTService(STTService): """ logger.debug("Connecting to Deepgram on SageMaker...") - # Update sample rate in settings - self._settings["sample_rate"] = self.sample_rate + live_options = LiveOptions( + **{**self._settings.live_options.to_dict(), "sample_rate": self.sample_rate} + ) - # Build query string from settings, converting booleans to strings + # Build query string from live_options, converting booleans to strings query_params = {} - for key, value in self._settings.items(): + for key, value in live_options.to_dict().items(): if value is not None: # Convert boolean values to lowercase strings for Deepgram API if isinstance(value, bool): diff --git a/src/pipecat/services/deepgram/tts.py b/src/pipecat/services/deepgram/tts.py index 12aba4905..c05b90868 100644 --- a/src/pipecat/services/deepgram/tts.py +++ b/src/pipecat/services/deepgram/tts.py @@ -11,7 +11,8 @@ for generating speech from text using various voice models. """ import json -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional import aiohttp from loguru import logger @@ -29,6 +30,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService, WebsocketTTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -43,6 +45,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class DeepgramTTSSettings(TTSSettings): + """Settings for Deepgram TTS service. + + Parameters: + encoding: Audio encoding format (linear16, mulaw, alaw). + """ + + encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class DeepgramTTSService(WebsocketTTSService): """Deepgram WebSocket-based text-to-speech service. @@ -51,6 +64,8 @@ class DeepgramTTSService(WebsocketTTSService): message for conversational AI use cases. """ + _settings: DeepgramTTSSettings + SUPPORTED_ENCODINGS = ("linear16", "mulaw", "alaw") def __init__( @@ -86,15 +101,17 @@ class DeepgramTTSService(WebsocketTTSService): pause_frame_processing=True, push_stop_frames=True, append_trailing_space=True, + settings=DeepgramTTSSettings( + model=voice, + voice=voice, + language=None, + encoding=encoding, + ), **kwargs, ) self._api_key = api_key self._base_url = base_url - self._settings = { - "encoding": encoding, - } - self.set_voice(voice) self._receive_task = None self._context_id: Optional[str] = None @@ -166,6 +183,28 @@ class DeepgramTTSService(WebsocketTTSService): await self._disconnect_websocket() + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta. + + Args: + delta: A :class:`TTSSettings` (or ``DeepgramTTSSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(delta) + + # Deepgram uses voice as the model, so keep them in sync for metrics + if "voice" in changed: + self._settings.model = self._settings.voice + self._sync_model_name_to_metrics() + + if changed: + await self._disconnect() + await self._connect() + + return changed + async def _connect_websocket(self): """Connect to Deepgram WebSocket API with configured settings.""" try: @@ -176,8 +215,8 @@ class DeepgramTTSService(WebsocketTTSService): # Build WebSocket URL with query parameters params = [] - params.append(f"model={self._voice_id}") - params.append(f"encoding={self._settings['encoding']}") + params.append(f"model={self._settings.voice}") + params.append(f"encoding={self._settings.encoding}") params.append(f"sample_rate={self.sample_rate}") url = f"{self._base_url}/v1/speak?{'&'.join(params)}" @@ -330,6 +369,8 @@ class DeepgramHttpTTSService(TTSService): configurable sample rates and quality settings. """ + _settings: DeepgramTTSSettings + def __init__( self, *, @@ -352,15 +393,20 @@ class DeepgramHttpTTSService(TTSService): encoding: Audio encoding format. Defaults to "linear16". **kwargs: Additional arguments passed to parent TTSService class. """ - super().__init__(sample_rate=sample_rate, **kwargs) + super().__init__( + sample_rate=sample_rate, + settings=DeepgramTTSSettings( + model=voice, + voice=voice, + language=None, + encoding=encoding, + ), + **kwargs, + ) self._api_key = api_key self._session = aiohttp_session self._base_url = base_url - self._settings = { - "encoding": encoding, - } - self.set_voice(voice) def can_generate_metrics(self) -> bool: """Check if the service can generate metrics. @@ -389,8 +435,8 @@ class DeepgramHttpTTSService(TTSService): headers = {"Authorization": f"Token {self._api_key}", "Content-Type": "application/json"} params = { - "model": self._voice_id, - "encoding": self._settings["encoding"], + "model": self._settings.voice, + "encoding": self._settings.encoding, "sample_rate": self.sample_rate, "container": "none", } diff --git a/src/pipecat/services/deepgram/tts_sagemaker.py b/src/pipecat/services/deepgram/tts_sagemaker.py index 7c04bc299..b583ce76c 100644 --- a/src/pipecat/services/deepgram/tts_sagemaker.py +++ b/src/pipecat/services/deepgram/tts_sagemaker.py @@ -14,7 +14,8 @@ streaming audio output. import asyncio import json -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -32,10 +33,22 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.aws.sagemaker.bidi_client import SageMakerBidiClient +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts +@dataclass +class DeepgramSageMakerTTSSettings(TTSSettings): + """Settings for Deepgram SageMaker TTS service. + + Parameters: + encoding: Audio encoding format (e.g. "linear16"). + """ + + encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class DeepgramSageMakerTTSService(TTSService): """Deepgram text-to-speech service for AWS SageMaker. @@ -58,6 +71,8 @@ class DeepgramSageMakerTTSService(TTSService): ) """ + _settings: DeepgramSageMakerTTSSettings + def __init__( self, *, @@ -84,13 +99,17 @@ class DeepgramSageMakerTTSService(TTSService): push_stop_frames=True, pause_frame_processing=True, append_trailing_space=True, + settings=DeepgramSageMakerTTSSettings( + model=voice, + voice=voice, + language=None, + encoding=encoding, + ), **kwargs, ) self._endpoint_name = endpoint_name self._region = region - self._encoding = encoding - self.set_voice(voice) self._client: Optional[SageMakerBidiClient] = None self._response_task: Optional[asyncio.Task] = None @@ -156,7 +175,8 @@ class DeepgramSageMakerTTSService(TTSService): logger.debug("Connecting to Deepgram TTS on SageMaker...") query_string = ( - f"model={self._voice_id}&encoding={self._encoding}&sample_rate={self.sample_rate}" + f"model={self._settings.voice}&encoding={self._settings.encoding}" + f"&sample_rate={self.sample_rate}" ) self._client = SageMakerBidiClient( @@ -200,6 +220,31 @@ class DeepgramSageMakerTTSService(TTSService): logger.debug("Disconnected from Deepgram TTS on SageMaker") await self._call_event_handler("on_disconnected") + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if necessary. + + Since all settings are part of the SageMaker session query string, + any setting change requires reconnecting to apply the new values. + """ + changed = await super()._update_settings(delta) + + if not changed: + return changed + + # Deepgram uses voice as the model, so keep them in sync for metrics + if "voice" in changed: + self._settings.model = self._settings.voice + self._sync_model_name_to_metrics() + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + async def _process_responses(self): """Process streaming responses from Deepgram TTS on SageMaker. diff --git a/src/pipecat/services/deepseek/llm.py b/src/pipecat/services/deepseek/llm.py index 56f1ddd18..70318c9ba 100644 --- a/src/pipecat/services/deepseek/llm.py +++ b/src/pipecat/services/deepseek/llm.py @@ -65,18 +65,18 @@ class DeepSeekLLMService(OpenAILLMService): Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "stream_options": {"include_usage": True}, - "frequency_penalty": self._settings["frequency_penalty"], - "presence_penalty": self._settings["presence_penalty"], - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_tokens": self._settings["max_tokens"], + "frequency_penalty": self._settings.frequency_penalty, + "presence_penalty": self._settings.presence_penalty, + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_tokens": self._settings.max_tokens, } # Messages, tools, tool_choice params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params diff --git a/src/pipecat/services/elevenlabs/stt.py b/src/pipecat/services/elevenlabs/stt.py index 388f7146b..0cf13121e 100644 --- a/src/pipecat/services/elevenlabs/stt.py +++ b/src/pipecat/services/elevenlabs/stt.py @@ -11,11 +11,13 @@ using segmented audio processing. The service uploads audio files and receives transcription results directly. """ +import asyncio import base64 import io import json +from dataclasses import dataclass, field from enum import Enum -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional import aiohttp from loguru import logger @@ -33,6 +35,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import ELEVENLABS_REALTIME_TTFS_P99, ELEVENLABS_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService, WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -167,6 +170,51 @@ def language_to_elevenlabs_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +class CommitStrategy(str, Enum): + """Commit strategies for transcript segmentation.""" + + MANUAL = "manual" + VAD = "vad" + + +@dataclass +class ElevenLabsSTTSettings(STTSettings): + """Settings for the ElevenLabs file-based STT service. + + Parameters: + tag_audio_events: Whether to include audio event tags in transcription. + """ + + tag_audio_events: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class ElevenLabsRealtimeSTTSettings(STTSettings): + """Settings for the ElevenLabs Realtime STT service. + + See ``ElevenLabsRealtimeSTTService.InputParams`` for detailed descriptions. + + Parameters: + commit_strategy: How to segment speech - manual (Pipecat VAD) or vad (ElevenLabs VAD). + vad_silence_threshold_secs: Seconds of silence before VAD commits (0.3-3.0). + vad_threshold: VAD sensitivity (0.1-0.9, lower is more sensitive). + min_speech_duration_ms: Minimum speech duration for VAD (50-2000ms). + min_silence_duration_ms: Minimum silence duration for VAD (50-2000ms). + include_timestamps: Whether to include word-level timestamps in transcripts. + enable_logging: Whether to enable logging on ElevenLabs' side. + include_language_detection: Whether to include language detection in transcripts. + """ + + commit_strategy: CommitStrategy | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + vad_silence_threshold_secs: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + vad_threshold: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + min_speech_duration_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + min_silence_duration_ms: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + include_timestamps: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_logging: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + include_language_detection: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class ElevenLabsSTTService(SegmentedSTTService): """Speech-to-text service using ElevenLabs' file-based API. @@ -175,6 +223,8 @@ class ElevenLabsSTTService(SegmentedSTTService): The service uploads audio files to ElevenLabs and receives transcription results directly. """ + _settings: ElevenLabsSTTSettings + class InputParams(BaseModel): """Configuration parameters for ElevenLabs STT API. @@ -211,25 +261,24 @@ class ElevenLabsSTTService(SegmentedSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to SegmentedSTTService. """ + params = params or ElevenLabsSTTService.InputParams() + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, + settings=ElevenLabsSTTSettings( + model=model, + language=self.language_to_service_language(params.language) + if params.language + else "eng", + tag_audio_events=params.tag_audio_events, + ), **kwargs, ) - params = params or ElevenLabsSTTService.InputParams() - self._api_key = api_key self._base_url = base_url self._session = aiohttp_session - self._model_id = model - self._tag_audio_events = params.tag_audio_events - - self._settings = { - "language": self.language_to_service_language(params.language) - if params.language - else "eng", - } def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. @@ -250,28 +299,6 @@ class ElevenLabsSTTService(SegmentedSTTService): """ return language_to_elevenlabs_language(language) - async def set_language(self, language: Language): - """Set the transcription language. - - Args: - language: The language to use for speech-to-text transcription. - """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = self.language_to_service_language(language) - - async def set_model(self, model: str): - """Set the STT model. - - Args: - model: The model name to use for transcription. - - Note: - ElevenLabs STT API does not currently support model selection. - This method is provided for interface compatibility. - """ - await super().set_model(model) - logger.info(f"Model setting [{model}] noted, but ElevenLabs STT uses default model") - async def _transcribe_audio(self, audio_data: bytes) -> dict: """Upload audio data to ElevenLabs and get transcription result. @@ -297,9 +324,9 @@ class ElevenLabsSTTService(SegmentedSTTService): ) # Add required model_id, language_code, and tag_audio_events - data.add_field("model_id", self._model_id) - data.add_field("language_code", self._settings["language"]) - data.add_field("tag_audio_events", str(self._tag_audio_events).lower()) + data.add_field("model_id", self._settings.model) + data.add_field("language_code", self._settings.language) + data.add_field("tag_audio_events", str(self._settings.tag_audio_events).lower()) async with self._session.post(url, data=data, headers=headers) as response: if response.status != 200: @@ -385,13 +412,6 @@ def audio_format_from_sample_rate(sample_rate: int) -> str: return "pcm_16000" -class CommitStrategy(str, Enum): - """Commit strategies for transcript segmentation.""" - - MANUAL = "manual" - VAD = "vad" - - class ElevenLabsRealtimeSTTService(WebsocketSTTService): """Speech-to-text service using ElevenLabs' Realtime WebSocket API. @@ -404,6 +424,8 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): commit transcript segments, providing consistency with other STT services. """ + _settings: ElevenLabsRealtimeSTTSettings + class InputParams(BaseModel): """Configuration parameters for ElevenLabs Realtime STT API. @@ -456,24 +478,35 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to WebsocketSTTService. """ + params = params or ElevenLabsRealtimeSTTService.InputParams() + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, keepalive_timeout=10, keepalive_interval=5, + settings=ElevenLabsRealtimeSTTSettings( + model=model, + language=params.language_code, + commit_strategy=params.commit_strategy, + vad_silence_threshold_secs=params.vad_silence_threshold_secs, + vad_threshold=params.vad_threshold, + min_speech_duration_ms=params.min_speech_duration_ms, + min_silence_duration_ms=params.min_silence_duration_ms, + include_timestamps=params.include_timestamps, + enable_logging=params.enable_logging, + include_language_detection=params.include_language_detection, + ), **kwargs, ) - params = params or ElevenLabsRealtimeSTTService.InputParams() - self._api_key = api_key self._base_url = base_url - self._model_id = model - self._params = params self._audio_format = "" # initialized in start() self._receive_task = None - self._settings = {"language": params.language_code} + self._connected_event = asyncio.Event() + self._connected_event.set() def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. @@ -483,42 +516,24 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): """ return True - async def set_language(self, language: Language): - """Set the transcription language. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if anything changed. Args: - language: The language to use for speech-to-text transcription. + delta: A :class:`STTSettings` (or ``ElevenLabsRealtimeSTTSettings``) delta. - Note: - Changing language requires reconnecting to the WebSocket. + Returns: + Dict mapping changed field names to their previous values. """ - logger.info(f"Switching STT language to: [{language}]") - new_language = ( - language_to_elevenlabs_language(language) - if isinstance(language, Language) - else language - ) - self._params.language_code = new_language - self._settings["language"] = new_language - # Reconnect with new settings + changed = await super()._update_settings(delta) + + if not changed: + return changed + await self._disconnect() await self._connect() - async def set_model(self, model: str): - """Set the STT model. - - Args: - model: The model name to use for transcription. - - Note: - Changing model requires reconnecting to the WebSocket. - """ - await super().set_model(model) - logger.info(f"Switching STT model to: [{model}]") - self._model_id = model - # Reconnect with new settings - await self._disconnect() - await self._connect() + return changed async def start(self, frame: StartFrame): """Start the STT service and establish WebSocket connection. @@ -566,7 +581,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): await self._start_metrics() elif isinstance(frame, VADUserStoppedSpeakingFrame): # Send commit when user stops speaking (manual commit mode) - if self._params.commit_strategy == CommitStrategy.MANUAL: + if self._settings.commit_strategy == CommitStrategy.MANUAL: if self._websocket and self._websocket.state is State.OPEN: try: commit_message = { @@ -589,6 +604,9 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): Yields: None - transcription results are handled via WebSocket responses. """ + # Wait for any in-flight _connect() to finish before checking state + await self._connected_event.wait() + # Reconnect if connection is closed if not self._websocket or self._websocket.state is State.CLOSED: await self._connect() @@ -613,12 +631,18 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): async def _connect(self): """Establish WebSocket connection to ElevenLabs Realtime STT.""" - await self._connect_websocket() + self._connected_event.clear() + try: + await self._connect_websocket() - await super()._connect() + await super()._connect() - if self._websocket and not self._receive_task: - self._receive_task = self.create_task(self._receive_task_handler(self._report_error)) + if self._websocket and not self._receive_task: + self._receive_task = self.create_task( + self._receive_task_handler(self._report_error) + ) + finally: + self._connected_event.set() async def _disconnect(self): """Close WebSocket connection and cleanup tasks.""" @@ -654,38 +678,42 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): logger.debug("Connecting to ElevenLabs Realtime STT") # Build query parameters - params = [f"model_id={self._model_id}"] + params = [f"model_id={self._settings.model}"] - if self._params.language_code: - params.append(f"language_code={self._params.language_code}") + if self._settings.language: + params.append(f"language_code={self._settings.language}") params.append(f"audio_format={self._audio_format}") - params.append(f"commit_strategy={self._params.commit_strategy.value}") + params.append(f"commit_strategy={self._settings.commit_strategy.value}") # Add optional parameters - if self._params.include_timestamps: - params.append(f"include_timestamps={str(self._params.include_timestamps).lower()}") - - if self._params.enable_logging: - params.append(f"enable_logging={str(self._params.enable_logging).lower()}") - - if self._params.include_language_detection: + if self._settings.include_timestamps: params.append( - f"include_language_detection={str(self._params.include_language_detection).lower()}" + f"include_timestamps={str(self._settings.include_timestamps).lower()}" + ) + + if self._settings.enable_logging: + params.append(f"enable_logging={str(self._settings.enable_logging).lower()}") + + if self._settings.include_language_detection: + params.append( + f"include_language_detection={str(self._settings.include_language_detection).lower()}" ) # Add VAD parameters if using VAD commit strategy and values are specified - if self._params.commit_strategy == CommitStrategy.VAD: - if self._params.vad_silence_threshold_secs is not None: + if self._settings.commit_strategy == CommitStrategy.VAD: + if self._settings.vad_silence_threshold_secs is not None: params.append( - f"vad_silence_threshold_secs={self._params.vad_silence_threshold_secs}" + f"vad_silence_threshold_secs={self._settings.vad_silence_threshold_secs}" + ) + if self._settings.vad_threshold is not None: + params.append(f"vad_threshold={self._settings.vad_threshold}") + if self._settings.min_speech_duration_ms is not None: + params.append(f"min_speech_duration_ms={self._settings.min_speech_duration_ms}") + if self._settings.min_silence_duration_ms is not None: + params.append( + f"min_silence_duration_ms={self._settings.min_silence_duration_ms}" ) - if self._params.vad_threshold is not None: - params.append(f"vad_threshold={self._params.vad_threshold}") - if self._params.min_speech_duration_ms is not None: - params.append(f"min_speech_duration_ms={self._params.min_speech_duration_ms}") - if self._params.min_silence_duration_ms is not None: - params.append(f"min_silence_duration_ms={self._params.min_silence_duration_ms}") ws_url = f"wss://{self._base_url}/v1/speech-to-text/realtime?{'&'.join(params)}" @@ -817,7 +845,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): """ # If timestamps are enabled, skip this message and wait for the # committed_transcript_with_timestamps message which contains all the data - if self._params.include_timestamps: + if self._settings.include_timestamps: return text = data.get("text", "").strip() @@ -833,6 +861,8 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): await self._handle_transcription(text, True, language) + finalized = self._settings.commit_strategy == CommitStrategy.MANUAL + await self.push_frame( TranscriptionFrame( text, @@ -840,6 +870,7 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): time_now_iso8601(), language, result=data, + finalized=finalized, ) ) @@ -874,6 +905,8 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): await self._handle_transcription(text, True, language) + finalized = self._settings.commit_strategy == CommitStrategy.MANUAL + # This message is sent after committed_transcript when include_timestamps=true. # It contains the full transcript data including text and word-level timestamps. await self.push_frame( @@ -883,5 +916,6 @@ class ElevenLabsRealtimeSTTService(WebsocketSTTService): time_now_iso8601(), language, result=data, + finalized=finalized, ) ) diff --git a/src/pipecat/services/elevenlabs/tts.py b/src/pipecat/services/elevenlabs/tts.py index be57a1a3d..1811ed971 100644 --- a/src/pipecat/services/elevenlabs/tts.py +++ b/src/pipecat/services/elevenlabs/tts.py @@ -13,7 +13,19 @@ with support for streaming audio, word timestamps, and voice customization. import asyncio import base64 import json -from typing import Any, AsyncGenerator, Dict, List, Literal, Mapping, Optional, Tuple, Union +from dataclasses import dataclass, field +from typing import ( + Any, + AsyncGenerator, + ClassVar, + Dict, + List, + Literal, + Mapping, + Optional, + Tuple, + Union, +) import aiohttp from loguru import logger @@ -32,9 +44,11 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import ( - AudioContextWordTTSService, - WordTTSService, + AudioContextTTSService, + TextAggregationMode, + TTSService, ) from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -136,12 +150,12 @@ def output_format_from_sample_rate(sample_rate: int) -> str: def build_elevenlabs_voice_settings( - settings: Dict[str, Any], + settings: Union[Dict[str, Any], "TTSSettings"], ) -> Optional[Dict[str, Union[float, bool]]]: """Build voice settings dictionary for ElevenLabs based on provided settings. Args: - settings: Dictionary containing voice settings parameters. + settings: Dictionary or settings containing voice settings parameters. Returns: Dictionary of voice settings or None if no valid settings are provided. @@ -150,8 +164,11 @@ def build_elevenlabs_voice_settings( voice_settings = {} for key in voice_setting_keys: - if key in settings and settings[key] is not None: - voice_settings[key] = settings[key] + val = ( + getattr(settings, key, None) if isinstance(settings, TTSSettings) else settings.get(key) + ) + if val is not None: + voice_settings[key] = val return voice_settings or None @@ -168,6 +185,79 @@ class PronunciationDictionaryLocator(BaseModel): version_id: str +@dataclass +class ElevenLabsTTSSettings(TTSSettings): + """Settings for the ElevenLabs WebSocket TTS service. + + Fields that appear in the WebSocket URL (``voice``, ``model``, + ``language``) require a full reconnect when changed. Fields that + affect the voice character (``stability``, ``similarity_boost``, + ``style``, ``use_speaker_boost``, ``speed``) can be applied by closing + the current audio context so a new one is opened with updated settings. + + Parameters: + stability: Voice stability control (0.0 to 1.0). + similarity_boost: Similarity boost control (0.0 to 1.0). + style: Style control for voice expression (0.0 to 1.0). + use_speaker_boost: Whether to use speaker boost enhancement. + speed: Voice speed control (0.7 to 1.2). + auto_mode: Whether to enable automatic mode optimization. + enable_ssml_parsing: Whether to parse SSML tags in text. + enable_logging: Whether to enable ElevenLabs logging. + apply_text_normalization: Text normalization mode ("auto", "on", "off"). + """ + + stability: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + similarity_boost: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + style: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + use_speaker_boost: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + auto_mode: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_ssml_parsing: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_logging: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + apply_text_normalization: Literal["auto", "on", "off"] | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + + #: Fields in the WS URL — changing any of these requires a reconnect. + URL_FIELDS: ClassVar[frozenset[str]] = frozenset({"voice", "model", "language"}) + + #: Fields affecting voice character — changing these requires closing the + #: current audio context so the next one picks up new settings. + VOICE_SETTINGS_FIELDS: ClassVar[frozenset[str]] = frozenset( + {"stability", "similarity_boost", "style", "use_speaker_boost", "speed"} + ) + + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} + + +@dataclass +class ElevenLabsHttpTTSSettings(TTSSettings): + """Settings for the ElevenLabs HTTP TTS service. + + Parameters: + optimize_streaming_latency: Latency optimization level (0-4). + stability: Voice stability control (0.0 to 1.0). + similarity_boost: Similarity boost control (0.0 to 1.0). + style: Style control for voice expression (0.0 to 1.0). + use_speaker_boost: Whether to use speaker boost enhancement. + speed: Voice speed control (0.25 to 4.0). + apply_text_normalization: Text normalization mode ("auto", "on", "off"). + """ + + optimize_streaming_latency: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + stability: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + similarity_boost: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + style: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + use_speaker_boost: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + apply_text_normalization: Literal["auto", "on", "off"] | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} + + def calculate_word_times( alignment_info: Mapping[str, Any], cumulative_time: float, @@ -228,7 +318,7 @@ def calculate_word_times( return (word_times, new_partial_word, new_partial_word_start_time) -class ElevenLabsTTSService(AudioContextWordTTSService): +class ElevenLabsTTSService(AudioContextTTSService): """ElevenLabs WebSocket-based TTS service with word timestamps. Provides real-time text-to-speech using ElevenLabs' WebSocket streaming API. @@ -236,6 +326,8 @@ class ElevenLabsTTSService(AudioContextWordTTSService): customization options including stability, similarity boost, and speed controls. """ + _settings: ElevenLabsTTSSettings + class InputParams(BaseModel): """Input parameters for ElevenLabs TTS configuration. @@ -274,7 +366,8 @@ class ElevenLabsTTSService(AudioContextWordTTSService): url: str = "wss://api.elevenlabs.io", sample_rate: Optional[int] = None, params: Optional[InputParams] = None, - aggregate_sentences: Optional[bool] = True, + text_aggregation_mode: Optional[TextAggregationMode] = None, + aggregate_sentences: Optional[bool] = None, **kwargs, ): """Initialize the ElevenLabs TTS service. @@ -286,13 +379,20 @@ class ElevenLabsTTSService(AudioContextWordTTSService): url: WebSocket URL for ElevenLabs TTS API. sample_rate: Audio sample rate. If None, uses default. params: Additional input parameters for voice customization. + text_aggregation_mode: How to aggregate incoming text before synthesis. aggregate_sentences: Whether to aggregate sentences within the TTSService. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + **kwargs: Additional arguments passed to the parent service. """ - # Aggregating sentences still gives cleaner-sounding results and fewer - # artifacts than streaming one word at a time. On average, waiting for a - # full sentence should only "cost" us 15ms or so with GPT-4o or a Llama - # 3 model, and it's worth it for the better audio quality. + # By default, we aggregate sentences before sending to TTS. This adds + # ~200-300ms of latency per sentence (waiting for the sentence-ending + # punctuation token from the LLM). Setting + # text_aggregation_mode=TextAggregationMode.TOKEN streams tokens + # directly. To use this mode, you must set auto_mode=False. This + # eliminates aggregation time, but slows down ElevenLabs. # # We also don't want to automatically push LLM response text frames, # because the context aggregators will add them to the LLM context even @@ -303,35 +403,38 @@ class ElevenLabsTTSService(AudioContextWordTTSService): # Finally, ElevenLabs doesn't provide information on when the bot stops # speaking for a while, so we want the parent class to send TTSStopFrame # after a short period not receiving any audio. + params = params or ElevenLabsTTSService.InputParams() + super().__init__( + text_aggregation_mode=text_aggregation_mode, aggregate_sentences=aggregate_sentences, push_text_frames=False, push_stop_frames=True, pause_frame_processing=True, + supports_word_timestamps=True, sample_rate=sample_rate, + settings=ElevenLabsTTSSettings( + model=model, + voice=voice_id, + language=( + self.language_to_service_language(params.language) if params.language else None + ), + stability=params.stability, + similarity_boost=params.similarity_boost, + style=params.style, + use_speaker_boost=params.use_speaker_boost, + speed=params.speed, + auto_mode=str(params.auto_mode).lower(), + enable_ssml_parsing=params.enable_ssml_parsing, + enable_logging=params.enable_logging, + apply_text_normalization=params.apply_text_normalization, + ), **kwargs, ) - params = params or ElevenLabsTTSService.InputParams() - self._api_key = api_key self._url = url - self._settings = { - "language": self.language_to_service_language(params.language) - if params.language - else None, - "stability": params.stability, - "similarity_boost": params.similarity_boost, - "style": params.style, - "use_speaker_boost": params.use_speaker_boost, - "speed": params.speed, - "auto_mode": str(params.auto_mode).lower(), - "enable_ssml_parsing": params.enable_ssml_parsing, - "enable_logging": params.enable_logging, - "apply_text_normalization": params.apply_text_normalization, - } - self.set_model_name(model) - self.set_voice(voice_id) + self._output_format = "" # initialized in start() self._voice_settings = self._set_voice_settings() self._pronunciation_dictionary_locators = params.pronunciation_dictionary_locators @@ -365,54 +468,57 @@ class ElevenLabsTTSService(AudioContextWordTTSService): return language_to_elevenlabs_language(language) def _set_voice_settings(self): - return build_elevenlabs_voice_settings(self._settings) + ts = self._settings + voice_setting_keys = [ + "stability", + "similarity_boost", + "style", + "use_speaker_boost", + "speed", + ] + voice_settings = {} + for key in voice_setting_keys: + val = getattr(ts, key, None) + if val is not None: + voice_settings[key] = val + return voice_settings or None - async def set_model(self, model: str): - """Set the TTS model and reconnect. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta, reconnecting as needed. + + Uses the declarative ``URL_FIELDS`` and ``VOICE_SETTINGS_FIELDS`` + sets on :class:`ElevenLabsTTSSettings` to decide whether to + reconnect the WebSocket or close the current audio context. Args: - model: The model name to use for synthesis. + delta: A :class:`TTSSettings` (or ``ElevenLabsTTSSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. """ - await super().set_model(model) - logger.info(f"Switching TTS model to: [{model}]") - await self._disconnect() - await self._connect() + changed = await super()._update_settings(delta) - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect if voice, model, or language changed.""" - # Track previous values for settings that require reconnection - prev_voice = self._voice_id - prev_model = self.model_name - prev_language = self._settings.get("language") - # Create snapshot of current voice settings to detect changes after update - prev_voice_settings = self._voice_settings.copy() if self._voice_settings else None + if not changed: + return changed - await super()._update_settings(settings) - - # Update voice settings for the next context creation + # Rebuild voice settings for next context self._voice_settings = self._set_voice_settings() - # Check if URL-level settings changed (these require reconnection) - url_changed = ( - prev_voice != self._voice_id - or prev_model != self.model_name - or prev_language != self._settings.get("language") - ) - - # Check if only voice settings changed (speed, stability, etc.) - voice_settings_changed = prev_voice_settings != self._voice_settings + url_changed = bool(changed.keys() & ElevenLabsTTSSettings.URL_FIELDS) + voice_settings_changed = bool(changed.keys() & ElevenLabsTTSSettings.VOICE_SETTINGS_FIELDS) if url_changed: - # These settings are in the WebSocket URL, so we need to reconnect logger.debug( - f"URL-level setting changed (voice/model/language), reconnecting WebSocket" + f"URL-level setting changed ({changed.keys() & ElevenLabsTTSSettings.URL_FIELDS}), " + f"reconnecting WebSocket" ) await self._disconnect() await self._connect() elif voice_settings_changed and self.has_active_audio_context(): - # Voice settings can be updated by closing current context - # so new one gets created with updated voice settings - logger.debug(f"Voice settings changed, closing current context to apply changes") + logger.debug( + f"Voice settings changed ({changed.keys() & ElevenLabsTTSSettings.VOICE_SETTINGS_FIELDS}), " + f"closing current context to apply changes" + ) context_id = self.get_active_audio_context_id() try: if self._websocket: @@ -423,6 +529,14 @@ class ElevenLabsTTSService(AudioContextWordTTSService): await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e) self.reset_active_audio_context() + if not url_changed: + # Reconnect applies all settings; only warn about fields not handled + # by voice settings or URL changes. + handled = ElevenLabsTTSSettings.URL_FIELDS | ElevenLabsTTSSettings.VOICE_SETTINGS_FIELDS + self._warn_unhandled_updated_settings(changed.keys() - handled) + + return changed + async def start(self, frame: StartFrame): """Start the ElevenLabs TTS service. @@ -503,22 +617,22 @@ class ElevenLabsTTSService(AudioContextWordTTSService): logger.debug("Connecting to ElevenLabs") - voice_id = self._voice_id - model = self.model_name + voice_id = self._settings.voice + model = self._settings.model output_format = self._output_format - url = f"{self._url}/v1/text-to-speech/{voice_id}/multi-stream-input?model_id={model}&output_format={output_format}&auto_mode={self._settings['auto_mode']}" + url = f"{self._url}/v1/text-to-speech/{voice_id}/multi-stream-input?model_id={model}&output_format={output_format}&auto_mode={self._settings.auto_mode}" - if self._settings["enable_ssml_parsing"]: - url += f"&enable_ssml_parsing={self._settings['enable_ssml_parsing']}" + if self._settings.enable_ssml_parsing: + url += f"&enable_ssml_parsing={self._settings.enable_ssml_parsing}" - if self._settings["enable_logging"]: - url += f"&enable_logging={self._settings['enable_logging']}" + if self._settings.enable_logging: + url += f"&enable_logging={self._settings.enable_logging}" - if self._settings["apply_text_normalization"] is not None: - url += f"&apply_text_normalization={self._settings['apply_text_normalization']}" + if self._settings.apply_text_normalization is not None: + url += f"&apply_text_normalization={self._settings.apply_text_normalization}" # Language can only be used with the ELEVENLABS_MULTILINGUAL_MODELS - language = self._settings["language"] + language = self._settings.language if model in ELEVENLABS_MULTILINGUAL_MODELS and language is not None: url += f"&language_code={language}" logger.debug(f"Using language code: {language}") @@ -561,14 +675,11 @@ class ElevenLabsTTSService(AudioContextWordTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by closing the current context.""" - # Close the current context when interrupted without closing the websocket - context_id = self.get_active_audio_context_id() - await super()._handle_interruption(frame, direction) - + async def _close_context(self, context_id: str): + # ElevenLabs requires that Pipecat explicitly closes contexts to free + # server-side resources, both on interruption and on normal completion. if context_id and self._websocket: - logger.trace(f"Closing context {context_id} due to interruption") + logger.trace(f"{self}: Closing context {context_id}") try: # ElevenLabs requires that Pipecat manages the contexts and closes them # when they're not longer in use. Since an InterruptionFrame is pushed @@ -581,8 +692,21 @@ class ElevenLabsTTSService(AudioContextWordTTSService): ) except Exception as e: await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e) - self._partial_word = "" - self._partial_word_start_time = 0.0 + self._partial_word = "" + self._partial_word_start_time = 0.0 + + async def on_audio_context_interrupted(self, context_id: str): + """Close the ElevenLabs context when the bot is interrupted.""" + await self._close_context(context_id) + + async def on_audio_context_completed(self, context_id: str): + """Close the ElevenLabs context after all audio has been played. + + ElevenLabs does not send a server-side signal when a context is + exhausted, so Pipecat must explicitly close it with + ``close_context: True`` to free server-side resources. + """ + await self._close_context(context_id) async def _receive_messages(self): """Handle incoming WebSocket messages from ElevenLabs.""" @@ -734,7 +858,7 @@ class ElevenLabsTTSService(AudioContextWordTTSService): yield ErrorFrame(error=f"Unknown error occurred: {e}") -class ElevenLabsHttpTTSService(WordTTSService): +class ElevenLabsHttpTTSService(TTSService): """ElevenLabs HTTP-based TTS service with word timestamps. Provides text-to-speech using ElevenLabs' HTTP streaming API for simpler, @@ -742,6 +866,8 @@ class ElevenLabsHttpTTSService(WordTTSService): connection is not required or desired. """ + _settings: ElevenLabsHttpTTSSettings + class InputParams(BaseModel): """Input parameters for ElevenLabs HTTP TTS configuration. @@ -777,7 +903,8 @@ class ElevenLabsHttpTTSService(WordTTSService): base_url: str = "https://api.elevenlabs.io", sample_rate: Optional[int] = None, params: Optional[InputParams] = None, - aggregate_sentences: Optional[bool] = True, + text_aggregation_mode: Optional[TextAggregationMode] = None, + aggregate_sentences: Optional[bool] = None, **kwargs, ): """Initialize the ElevenLabs HTTP TTS service. @@ -790,38 +917,44 @@ class ElevenLabsHttpTTSService(WordTTSService): base_url: Base URL for ElevenLabs HTTP API. sample_rate: Audio sample rate. If None, uses default. params: Additional input parameters for voice customization. + text_aggregation_mode: How to aggregate incoming text before synthesis. aggregate_sentences: Whether to aggregate sentences within the TTSService. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + **kwargs: Additional arguments passed to the parent service. """ + params = params or ElevenLabsHttpTTSService.InputParams() + super().__init__( + text_aggregation_mode=text_aggregation_mode, aggregate_sentences=aggregate_sentences, push_text_frames=False, push_stop_frames=True, + supports_word_timestamps=True, sample_rate=sample_rate, + settings=ElevenLabsHttpTTSSettings( + model=model, + voice=voice_id, + language=self.language_to_service_language(params.language) + if params.language + else None, + optimize_streaming_latency=params.optimize_streaming_latency, + stability=params.stability, + similarity_boost=params.similarity_boost, + style=params.style, + use_speaker_boost=params.use_speaker_boost, + speed=params.speed, + apply_text_normalization=params.apply_text_normalization, + ), **kwargs, ) - params = params or ElevenLabsHttpTTSService.InputParams() - self._api_key = api_key self._base_url = base_url - self._params = params self._session = aiohttp_session - self._settings = { - "language": self.language_to_service_language(params.language) - if params.language - else None, - "optimize_streaming_latency": params.optimize_streaming_latency, - "stability": params.stability, - "similarity_boost": params.similarity_boost, - "style": params.style, - "use_speaker_boost": params.use_speaker_boost, - "speed": params.speed, - "apply_text_normalization": params.apply_text_normalization, - } - self.set_model_name(model) - self.set_voice(voice_id) self._output_format = "" # initialized in start() self._voice_settings = self._set_voice_settings() self._pronunciation_dictionary_locators = params.pronunciation_dictionary_locators @@ -858,10 +991,19 @@ class ElevenLabsHttpTTSService(WordTTSService): def _set_voice_settings(self): return build_elevenlabs_voice_settings(self._settings) - async def _update_settings(self, settings: Mapping[str, Any]): - await super()._update_settings(settings) - # Update voice settings for the next context creation - self._voice_settings = self._set_voice_settings() + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and rebuild voice settings. + + Args: + delta: A :class:`TTSSettings` (or ``ElevenLabsHttpTTSSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(delta) + if changed: + self._voice_settings = self._set_voice_settings() + return changed def _reset_state(self): """Reset internal state variables.""" @@ -979,11 +1121,11 @@ class ElevenLabsHttpTTSService(WordTTSService): logger.debug(f"{self}: Generating TTS [{text}]") # Use the with-timestamps endpoint - url = f"{self._base_url}/v1/text-to-speech/{self._voice_id}/stream/with-timestamps" + url = f"{self._base_url}/v1/text-to-speech/{self._settings.voice}/stream/with-timestamps" payload: Dict[str, Union[str, Dict[str, Union[float, bool]]]] = { "text": text, - "model_id": self._model_name, + "model_id": self._settings.model, } # Include previous text as context if available @@ -998,11 +1140,11 @@ class ElevenLabsHttpTTSService(WordTTSService): locator.model_dump() for locator in self._pronunciation_dictionary_locators ] - if self._settings["apply_text_normalization"] is not None: - payload["apply_text_normalization"] = self._settings["apply_text_normalization"] + if self._settings.apply_text_normalization is not None: + payload["apply_text_normalization"] = self._settings.apply_text_normalization - language = self._settings["language"] - if self._model_name in ELEVENLABS_MULTILINGUAL_MODELS and language: + language = self._settings.language + if self._settings.model in ELEVENLABS_MULTILINGUAL_MODELS and language: payload["language_code"] = language logger.debug(f"Using language code: {language}") elif language: @@ -1019,8 +1161,8 @@ class ElevenLabsHttpTTSService(WordTTSService): params = { "output_format": self._output_format, } - if self._settings["optimize_streaming_latency"] is not None: - params["optimize_streaming_latency"] = self._settings["optimize_streaming_latency"] + if self._settings.optimize_streaming_latency is not None: + params["optimize_streaming_latency"] = self._settings.optimize_streaming_latency try: await self.start_ttfb_metrics() diff --git a/src/pipecat/services/fal/image.py b/src/pipecat/services/fal/image.py index 412cedfbd..c16d31b43 100644 --- a/src/pipecat/services/fal/image.py +++ b/src/pipecat/services/fal/image.py @@ -13,6 +13,7 @@ for creating images from text prompts using various AI models. import asyncio import io import os +from dataclasses import dataclass from typing import AsyncGenerator, Dict, Optional, Union import aiohttp @@ -22,6 +23,7 @@ from pydantic import BaseModel from pipecat.frames.frames import ErrorFrame, Frame, URLImageRawFrame from pipecat.services.image_service import ImageGenService +from pipecat.services.settings import ImageGenSettings try: import fal_client @@ -31,6 +33,15 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class FalImageGenSettings(ImageGenSettings): + """Settings for the Fal image generation service. + + Parameters: + model: Fal.ai model identifier. + """ + + class FalImageGenService(ImageGenService): """Fal's image generation service. @@ -77,8 +88,7 @@ class FalImageGenService(ImageGenService): key: Optional API key for Fal.ai. If provided, sets FAL_KEY environment variable. **kwargs: Additional arguments passed to parent ImageGenService. """ - super().__init__(**kwargs) - self.set_model_name(model) + super().__init__(settings=FalImageGenSettings(model=model), **kwargs) self._params = params self._aiohttp_session = aiohttp_session if key: @@ -103,7 +113,7 @@ class FalImageGenService(ImageGenService): logger.debug(f"Generating image from prompt: {prompt}") response = await fal_client.run_async( - self.model_name, + self._settings.model, arguments={"prompt": prompt, **self._params.model_dump(exclude_none=True)}, ) diff --git a/src/pipecat/services/fal/stt.py b/src/pipecat/services/fal/stt.py index 4e8a655ec..bf70c1c2a 100644 --- a/src/pipecat/services/fal/stt.py +++ b/src/pipecat/services/fal/stt.py @@ -11,12 +11,14 @@ transcription using segmented audio processing. """ import os -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional from loguru import logger from pydantic import BaseModel from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import FAL_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -146,6 +148,22 @@ def language_to_fal_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class FalSTTSettings(STTSettings): + """Settings for the Fal Wizper STT service. + + Parameters: + task: Task to perform ('transcribe' or 'translate'). Defaults to + 'transcribe'. + chunk_level: Level of chunking ('segment'). Defaults to 'segment'. + version: Version of Wizper model to use. Defaults to '3'. + """ + + task: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + chunk_level: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + version: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class FalSTTService(SegmentedSTTService): """Speech-to-text service using Fal's Wizper API. @@ -153,6 +171,8 @@ class FalSTTService(SegmentedSTTService): segments. It inherits from SegmentedSTTService to handle audio buffering and speech detection. """ + _settings: FalSTTSettings + class InputParams(BaseModel): """Configuration parameters for Fal's Wizper API. @@ -187,14 +207,23 @@ class FalSTTService(SegmentedSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to SegmentedSTTService. """ + params = params or FalSTTService.InputParams() + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, + settings=FalSTTSettings( + model=None, + language=self.language_to_service_language(params.language) + if params.language + else "en", + task=params.task, + chunk_level=params.chunk_level, + version=params.version, + ), **kwargs, ) - params = params or FalSTTService.InputParams() - if api_key: os.environ["FAL_KEY"] = api_key elif "FAL_KEY" not in os.environ: @@ -203,14 +232,6 @@ class FalSTTService(SegmentedSTTService): ) self._fal_client = fal_client.AsyncClient(key=api_key or os.getenv("FAL_KEY")) - self._settings = { - "task": params.task, - "language": self.language_to_service_language(params.language) - if params.language - else "en", - "chunk_level": params.chunk_level, - "version": params.version, - } def can_generate_metrics(self) -> bool: """Check if the service can generate processing metrics. @@ -231,24 +252,6 @@ class FalSTTService(SegmentedSTTService): """ return language_to_fal_language(language) - async def set_language(self, language: Language): - """Set the transcription language. - - Args: - language: The language to use for speech-to-text transcription. - """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = self.language_to_service_language(language) - - async def set_model(self, model: str): - """Set the STT model. - - Args: - model: The model name to use for transcription. - """ - await super().set_model(model) - logger.info(f"Switching STT model to: [{model}]") - @traced_stt async def _handle_transcription( self, transcript: str, is_final: bool, language: Optional[str] = None @@ -276,19 +279,19 @@ class FalSTTService(SegmentedSTTService): data_uri = fal_client.encode(audio, "audio/x-wav") response = await self._fal_client.run( "fal-ai/wizper", - arguments={"audio_url": data_uri, **self._settings}, + arguments={"audio_url": data_uri, **self._settings.given_fields()}, ) if response and "text" in response: text = response["text"].strip() if text: # Only yield non-empty text - await self._handle_transcription(text, True, self._settings["language"]) + await self._handle_transcription(text, True, self._settings.language) logger.debug(f"Transcription: [{text}]") yield TranscriptionFrame( text, self._user_id, time_now_iso8601(), - Language(self._settings["language"]), + Language(self._settings.language), result=response, ) diff --git a/src/pipecat/services/fireworks/llm.py b/src/pipecat/services/fireworks/llm.py index d7bf57908..92deb00b9 100644 --- a/src/pipecat/services/fireworks/llm.py +++ b/src/pipecat/services/fireworks/llm.py @@ -66,17 +66,17 @@ class FireworksLLMService(OpenAILLMService): Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, - "frequency_penalty": self._settings["frequency_penalty"], - "presence_penalty": self._settings["presence_penalty"], - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_tokens": self._settings["max_tokens"], + "frequency_penalty": self._settings.frequency_penalty, + "presence_penalty": self._settings.presence_penalty, + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_tokens": self._settings.max_tokens, } # Messages, tools, tool_choice params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params diff --git a/src/pipecat/services/fish/tts.py b/src/pipecat/services/fish/tts.py index 93a718429..9f9d753de 100644 --- a/src/pipecat/services/fish/tts.py +++ b/src/pipecat/services/fish/tts.py @@ -11,7 +11,8 @@ for streaming text-to-speech synthesis with customizable voice parameters. """ import uuid -from typing import AsyncGenerator, Literal, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, ClassVar, Dict, Literal, Mapping, Optional from loguru import logger from pydantic import BaseModel @@ -28,6 +29,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import InterruptibleTTSService from pipecat.transcriptions.language import Language from pipecat.utils.tracing.service_decorators import traced_tts @@ -45,6 +47,41 @@ except ModuleNotFoundError as e: FishAudioOutputFormat = Literal["opus", "mp3", "pcm", "wav"] +@dataclass +class FishAudioTTSSettings(TTSSettings): + """Settings for Fish Audio TTS service. + + Parameters: + fish_sample_rate: Audio sample rate sent to the API. + latency: Latency mode ("normal" or "balanced"). Defaults to "normal". + format: Audio output format. + normalize: Whether to normalize audio output. Defaults to True. + prosody_speed: Speech speed multiplier (0.5-2.0). Defaults to 1.0. + prosody_volume: Volume adjustment in dB. Defaults to 0. + reference_id: Reference ID of the voice model. + """ + + fish_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + latency: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + normalize: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + prosody_speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + prosody_volume: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + reference_id: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice", "sample_rate": "fish_sample_rate"} + + @classmethod + def from_mapping(cls, settings: Mapping[str, Any]) -> "FishAudioTTSSettings": + """Construct settings from a plain dict, destructuring legacy nested ``prosody``.""" + flat = dict(settings) + nested = flat.pop("prosody", None) + if isinstance(nested, dict): + flat.setdefault("prosody_speed", nested.get("speed")) + flat.setdefault("prosody_volume", nested.get("volume")) + return super().from_mapping(flat) + + class FishAudioTTSService(InterruptibleTTSService): """Fish Audio text-to-speech service with WebSocket streaming. @@ -53,6 +90,8 @@ class FishAudioTTSService(InterruptibleTTSService): audio generation with interruption handling. """ + _settings: FishAudioTTSSettings + class InputParams(BaseModel): """Input parameters for Fish Audio TTS configuration. @@ -99,13 +138,6 @@ class FishAudioTTSService(InterruptibleTTSService): params: Additional input parameters for voice customization. **kwargs: Additional arguments passed to the parent service. """ - super().__init__( - push_stop_frames=True, - pause_frame_processing=True, - sample_rate=sample_rate, - **kwargs, - ) - params = params or FishAudioTTSService.InputParams() # Validation for model and reference_id parameters @@ -130,26 +162,30 @@ class FishAudioTTSService(InterruptibleTTSService): ) reference_id = model + super().__init__( + push_stop_frames=True, + pause_frame_processing=True, + sample_rate=sample_rate, + settings=FishAudioTTSSettings( + model=model_id, + voice=reference_id, + fish_sample_rate=0, + latency=params.latency, + format=output_format, + normalize=params.normalize, + prosody_speed=params.prosody_speed, + prosody_volume=params.prosody_volume, + reference_id=reference_id, + ), + **kwargs, + ) + self._api_key = api_key self._base_url = "wss://api.fish.audio/v1/tts/live" self._websocket = None self._receive_task = None self._request_id = None - self._settings = { - "sample_rate": 0, - "latency": params.latency, - "format": output_format, - "normalize": params.normalize, - "prosody": { - "speed": params.prosody_speed, - "volume": params.prosody_volume, - }, - "reference_id": reference_id, - } - - self.set_model_name(model_id) - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -158,16 +194,24 @@ class FishAudioTTSService(InterruptibleTTSService): """ return True - async def set_model(self, model: str): - """Set the TTS model and reconnect. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if needed. + + Any change to voice or model triggers a WebSocket reconnect. Args: - model: The model name to use for synthesis. + delta: A :class:`TTSSettings` (or ``FishAudioTTSSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. """ - await super().set_model(model) - logger.info(f"Switching TTS model to: [{model}]") - await self._disconnect() - await self._connect() + changed = await super()._update_settings(delta) + + if changed: + await self._disconnect() + await self._connect() + + return changed async def start(self, frame: StartFrame): """Start the Fish Audio TTS service. @@ -176,7 +220,7 @@ class FishAudioTTSService(InterruptibleTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["sample_rate"] = self.sample_rate + self._settings.fish_sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -221,11 +265,22 @@ class FishAudioTTSService(InterruptibleTTSService): logger.debug("Connecting to Fish Audio") headers = {"Authorization": f"Bearer {self._api_key}"} - headers["model"] = self.model_name + headers["model"] = self._settings.model self._websocket = await websocket_connect(self._base_url, additional_headers=headers) # Send initial start message with ormsgpack - start_message = {"event": "start", "request": {"text": "", **self._settings}} + request_settings = { + "sample_rate": self._settings.fish_sample_rate, + "latency": self._settings.latency, + "format": self._settings.format, + "normalize": self._settings.normalize, + "prosody": { + "speed": self._settings.prosody_speed, + "volume": self._settings.prosody_volume, + }, + "reference_id": self._settings.reference_id, + } + start_message = {"event": "start", "request": {"text": "", **request_settings}} await self._websocket.send(ormsgpack.packb(start_message)) logger.debug("Sent start message to Fish Audio") diff --git a/src/pipecat/services/gladia/stt.py b/src/pipecat/services/gladia/stt.py index 475a7213e..bba554b4a 100644 --- a/src/pipecat/services/gladia/stt.py +++ b/src/pipecat/services/gladia/stt.py @@ -14,6 +14,7 @@ import asyncio import base64 import json import warnings +from dataclasses import dataclass, field from typing import Any, AsyncGenerator, Dict, Literal, Optional import aiohttp @@ -31,7 +32,14 @@ from pipecat.frames.frames import ( UserStartedSpeakingFrame, UserStoppedSpeakingFrame, ) -from pipecat.services.gladia.config import GladiaInputParams +from pipecat.services.gladia.config import ( + GladiaInputParams, + LanguageConfig, + MessagesConfig, + PreProcessingConfig, + RealtimeProcessingConfig, +) +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import GLADIA_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -178,6 +186,43 @@ class _InputParamsDescriptor: return GladiaInputParams +@dataclass +class GladiaSTTSettings(STTSettings): + """Settings for Gladia STT service. + + Parameters: + encoding: Audio encoding format. + bit_depth: Audio bit depth. + channels: Number of audio channels. + custom_metadata: Additional metadata to include with requests. + endpointing: Silence duration in seconds to mark end of speech. + maximum_duration_without_endpointing: Maximum utterance duration without silence. + language_config: Detailed language configuration. + pre_processing: Audio pre-processing options. + realtime_processing: Real-time processing features. + messages_config: WebSocket message filtering options. + enable_vad: Enable VAD to trigger end of utterance detection. + """ + + encoding: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + bit_depth: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + channels: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + custom_metadata: Dict[str, Any] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + endpointing: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + maximum_duration_without_endpointing: int | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + language_config: LanguageConfig | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pre_processing: PreProcessingConfig | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + realtime_processing: RealtimeProcessingConfig | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + messages_config: MessagesConfig | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_vad: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class GladiaSTTService(WebsocketSTTService): """Speech-to-Text service using Gladia's API. @@ -191,6 +236,8 @@ class GladiaSTTService(WebsocketSTTService): Use :class:`~pipecat.services.gladia.config.GladiaInputParams` directly instead. """ + _settings: GladiaSTTSettings + # Maintain backward compatibility InputParams = _InputParamsDescriptor() @@ -231,14 +278,6 @@ class GladiaSTTService(WebsocketSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to the STTService parent class. """ - super().__init__( - sample_rate=sample_rate, - ttfs_p99_latency=ttfs_p99_latency, - keepalive_timeout=20, - keepalive_interval=5, - **kwargs, - ) - params = params or GladiaInputParams() if params.language is not None: @@ -261,13 +300,40 @@ class GladiaSTTService(WebsocketSTTService): stacklevel=2, ) + # Resolve deprecated language → language_config at init time + language_config = params.language_config + if not language_config and params.language: + language_code = self.language_to_service_language(params.language) + if language_code: + language_config = LanguageConfig(languages=[language_code], code_switching=False) + + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + keepalive_timeout=20, + keepalive_interval=5, + settings=GladiaSTTSettings( + model=model, + language=None, + encoding=params.encoding, + bit_depth=params.bit_depth, + channels=params.channels, + custom_metadata=params.custom_metadata, + endpointing=params.endpointing, + maximum_duration_without_endpointing=params.maximum_duration_without_endpointing, + language_config=language_config, + pre_processing=params.pre_processing, + realtime_processing=params.realtime_processing, + messages_config=params.messages_config, + enable_vad=params.enable_vad, + ), + **kwargs, + ) + self._api_key = api_key self._region = region self._url = url - self.set_model_name(model) - self._params = params self._receive_task = None - self._settings = {} # Session management self._session_url = None @@ -307,53 +373,43 @@ class GladiaSTTService(WebsocketSTTService): return language_to_gladia_language(language) def _prepare_settings(self) -> Dict[str, Any]: + s = self._settings + settings = { - "encoding": self._params.encoding or "wav/pcm", - "bit_depth": self._params.bit_depth or 16, + "encoding": s.encoding or "wav/pcm", + "bit_depth": s.bit_depth or 16, "sample_rate": self.sample_rate, - "channels": self._params.channels or 1, - "model": self._model_name, + "channels": s.channels or 1, + "model": s.model, } # Add custom_metadata if provided - settings["custom_metadata"] = dict(self._params.custom_metadata or {}) + settings["custom_metadata"] = dict(s.custom_metadata or {}) settings["custom_metadata"]["pipecat"] = pipecat_version() # Add endpointing parameters if provided - if self._params.endpointing is not None: - settings["endpointing"] = self._params.endpointing - if self._params.maximum_duration_without_endpointing is not None: + if s.endpointing is not None: + settings["endpointing"] = s.endpointing + if s.maximum_duration_without_endpointing is not None: settings["maximum_duration_without_endpointing"] = ( - self._params.maximum_duration_without_endpointing + s.maximum_duration_without_endpointing ) - # Add language configuration (prioritize language_config over deprecated language) - if self._params.language_config: - settings["language_config"] = self._params.language_config.model_dump(exclude_none=True) - elif self._params.language: # Backward compatibility for deprecated parameter - language_code = self.language_to_service_language(self._params.language) - if language_code: - settings["language_config"] = { - "languages": [language_code], - "code_switching": False, - } + # Add language configuration + if s.language_config: + settings["language_config"] = s.language_config.model_dump(exclude_none=True) # Add pre_processing configuration if provided - if self._params.pre_processing: - settings["pre_processing"] = self._params.pre_processing.model_dump(exclude_none=True) + if s.pre_processing: + settings["pre_processing"] = s.pre_processing.model_dump(exclude_none=True) # Add realtime_processing configuration if provided - if self._params.realtime_processing: - settings["realtime_processing"] = self._params.realtime_processing.model_dump( - exclude_none=True - ) + if s.realtime_processing: + settings["realtime_processing"] = s.realtime_processing.model_dump(exclude_none=True) # Add messages_config if provided - if self._params.messages_config: - settings["messages_config"] = self._params.messages_config.model_dump(exclude_none=True) - - # Store settings for tracing - self._settings = settings + if s.messages_config: + settings["messages_config"] = s.messages_config.model_dump(exclude_none=True) return settings @@ -366,6 +422,33 @@ class GladiaSTTService(WebsocketSTTService): await super().start(frame) await self._connect() + async def _update_settings(self, delta: GladiaSTTSettings) -> dict[str, Any]: + """Apply settings delta. + + Settings are stored but not applied to the active session. + + Args: + delta: A settings delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(delta) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # self._session_url = None + # self._session_id = None + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + async def stop(self, frame: EndFrame): """Stop the Gladia STT websocket connection. @@ -522,7 +605,7 @@ class GladiaSTTService(WebsocketSTTService): Broadcasts UserStartedSpeakingFrame and optionally triggers interruption when VAD is enabled. """ - if not self._params.enable_vad or self._is_speaking: + if not self._settings.enable_vad or self._is_speaking: return logger.debug(f"{self} User started speaking") @@ -530,14 +613,14 @@ class GladiaSTTService(WebsocketSTTService): await self.broadcast_frame(UserStartedSpeakingFrame) if self._should_interrupt: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _on_speech_ended(self): """Handle speech end event from Gladia. Broadcasts UserStoppedSpeakingFrame when VAD is enabled. """ - if not self._params.enable_vad or not self._is_speaking: + if not self._settings.enable_vad or not self._is_speaking: return self._is_speaking = False await self.broadcast_frame(UserStoppedSpeakingFrame) diff --git a/src/pipecat/services/google/gemini_live/llm.py b/src/pipecat/services/google/gemini_live/llm.py index e209f3d0a..2ed11c739 100644 --- a/src/pipecat/services/google/gemini_live/llm.py +++ b/src/pipecat/services/google/gemini_live/llm.py @@ -17,9 +17,9 @@ import io import time import uuid import warnings -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Any, ClassVar, Dict, List, Optional, Union from loguru import logger from PIL import Image @@ -47,7 +47,6 @@ from pipecat.frames.frames import ( LLMThoughtEndFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, - LLMUpdateSettingsFrame, StartFrame, TranscriptionFrame, TTSAudioRawFrame, @@ -77,6 +76,7 @@ from pipecat.services.openai.llm import ( OpenAIAssistantContextAggregator, OpenAIUserContextAggregator, ) +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.string import match_endofsentence from pipecat.utils.time import time_now_iso8601 @@ -602,6 +602,33 @@ class InputParams(BaseModel): extra: Optional[Dict[str, Any]] = Field(default_factory=dict) +@dataclass +class GeminiLiveLLMSettings(LLMSettings): + """Settings for Gemini Live LLM services. + + Parameters: + modalities: Response modalities. + language: Language for generation. + media_resolution: Media resolution setting. + vad: Voice activity detection parameters. + context_window_compression: Context window compression configuration. + thinking: Thinking configuration. + enable_affective_dialog: Whether to enable affective dialog. + proactivity: Proactivity configuration. + """ + + modalities: GeminiModalities | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language: Language | str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + media_resolution: GeminiMediaResolution | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + vad: GeminiVADParams | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + context_window_compression: ContextWindowCompressionParams | dict | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + thinking: ThinkingConfig | dict | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_affective_dialog: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + proactivity: ProactivityConfig | dict | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class GeminiLiveLLMService(LLMService): """Provides access to Google's Gemini Live API. @@ -610,6 +637,8 @@ class GeminiLiveLLMService(LLMService): responses, and tool usage. """ + _settings: GeminiLiveLLMSettings + # Overriding the default adapter to use the Gemini one. adapter_class = GeminiLLMAdapter @@ -666,13 +695,40 @@ class GeminiLiveLLMService(LLMService): stacklevel=2, ) - super().__init__(base_url=base_url, **kwargs) - params = params or InputParams() + super().__init__( + base_url=base_url, + settings=GeminiLiveLLMSettings( + model=model, + frequency_penalty=params.frequency_penalty, + max_tokens=params.max_tokens, + presence_penalty=params.presence_penalty, + temperature=params.temperature, + top_k=params.top_k, + top_p=params.top_p, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + modalities=params.modalities, + language=language_to_gemini_language(params.language) + if params.language + else "en-US", + media_resolution=params.media_resolution, + vad=params.vad, + context_window_compression=params.context_window_compression.model_dump() + if params.context_window_compression + else {}, + thinking=params.thinking or {}, + enable_affective_dialog=params.enable_affective_dialog or False, + proactivity=params.proactivity or {}, + extra=params.extra if isinstance(params.extra, dict) else {}, + ), + **kwargs, + ) + self._last_sent_time = 0 self._base_url = base_url - self.set_model_name(model) self._voice_id = voice_id self._language_code = params.language @@ -714,26 +770,6 @@ class GeminiLiveLLMService(LLMService): self._consecutive_failures = 0 self._connection_start_time = None - self._settings = { - "frequency_penalty": params.frequency_penalty, - "max_tokens": params.max_tokens, - "presence_penalty": params.presence_penalty, - "temperature": params.temperature, - "top_k": params.top_k, - "top_p": params.top_p, - "modalities": params.modalities, - "language": self._language_code, - "media_resolution": params.media_resolution, - "vad": params.vad, - "context_window_compression": params.context_window_compression.model_dump() - if params.context_window_compression - else {}, - "thinking": params.thinking or {}, - "enable_affective_dialog": params.enable_affective_dialog or False, - "proactivity": params.proactivity or {}, - "extra": params.extra if isinstance(params.extra, dict) else {}, - } - self._file_api_base_url = file_api_base_url self._file_api: Optional[GeminiFileAPI] = None @@ -776,6 +812,25 @@ class GeminiLiveLLMService(LLMService): """ return True + async def _update_settings(self, delta: LLMSettings) -> dict[str, Any]: + """Apply a settings delta. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(delta) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + def set_audio_input_paused(self, paused: bool): """Set the audio input pause state. @@ -798,7 +853,7 @@ class GeminiLiveLLMService(LLMService): Args: modalities: The modalities to use for responses. """ - self._settings["modalities"] = modalities + self._settings.modalities = modalities def set_language(self, language: Language): """Set the language for generation. @@ -808,7 +863,7 @@ class GeminiLiveLLMService(LLMService): """ self._language = language self._language_code = language_to_gemini_language(language) or "en-US" - self._settings["language"] = self._language_code + self._settings.language = self._language_code logger.info(f"Set Gemini language to: {self._language_code}") async def set_context(self, context: OpenAILLMContext): @@ -866,7 +921,7 @@ class GeminiLiveLLMService(LLMService): async def _handle_interruption(self): if self._bot_is_responding: await self._set_bot_is_responding(False) - if self._settings.get("modalities") == GeminiModalities.AUDIO: + if self._settings.modalities == GeminiModalities.AUDIO: await self.push_frame(TTSStoppedFrame()) # Do not send LLMFullResponseEndFrame here - an interruption # already tells the assistant context aggregator that the response @@ -947,10 +1002,9 @@ class GeminiLiveLLMService(LLMService): # uses this frame *without* a user context aggregator still works # (we have an example that does just that, actually). await self._create_single_response(frame.messages) - elif isinstance(frame, LLMUpdateSettingsFrame): - await self._update_settings(frame.settings) elif isinstance(frame, LLMSetToolsFrame): - await self._update_settings() + # TODO: implement runtime tool updates for Gemini Live. + pass else: await self.push_frame(frame, direction) @@ -1074,20 +1128,20 @@ class GeminiLiveLLMService(LLMService): # Assemble basic configuration config = LiveConnectConfig( generation_config=GenerationConfig( - frequency_penalty=self._settings["frequency_penalty"], - max_output_tokens=self._settings["max_tokens"], - presence_penalty=self._settings["presence_penalty"], - temperature=self._settings["temperature"], - top_k=self._settings["top_k"], - top_p=self._settings["top_p"], - response_modalities=[Modality(self._settings["modalities"].value)], + frequency_penalty=self._settings.frequency_penalty, + max_output_tokens=self._settings.max_tokens, + presence_penalty=self._settings.presence_penalty, + temperature=self._settings.temperature, + top_k=self._settings.top_k, + top_p=self._settings.top_p, + response_modalities=[Modality(self._settings.modalities.value)], speech_config=SpeechConfig( voice_config=VoiceConfig( prebuilt_voice_config={"voice_name": self._voice_id} ), - language_code=self._settings["language"], + language_code=self._settings.language, ), - media_resolution=MediaResolution(self._settings["media_resolution"].value), + media_resolution=MediaResolution(self._settings.media_resolution.value), ), input_audio_transcription=AudioTranscriptionConfig(), output_audio_transcription=AudioTranscriptionConfig(), @@ -1095,37 +1149,36 @@ class GeminiLiveLLMService(LLMService): ) # Add context window compression to configuration, if enabled - if self._settings.get("context_window_compression", {}).get("enabled", False): + cwc = self._settings.context_window_compression or {} + if cwc.get("enabled", False): compression_config = ContextWindowCompressionConfig() # Add sliding window (always true if compression is enabled) compression_config.sliding_window = SlidingWindow() # Add trigger_tokens if specified - trigger_tokens = self._settings.get("context_window_compression", {}).get( - "trigger_tokens" - ) + trigger_tokens = cwc.get("trigger_tokens") if trigger_tokens is not None: compression_config.trigger_tokens = trigger_tokens config.context_window_compression = compression_config # Add thinking configuration to configuration, if provided - if self._settings.get("thinking"): - config.thinking_config = self._settings["thinking"] + if self._settings.thinking: + config.thinking_config = self._settings.thinking # Add affective dialog setting, if provided - if self._settings.get("enable_affective_dialog", False): - config.enable_affective_dialog = self._settings["enable_affective_dialog"] + if self._settings.enable_affective_dialog: + config.enable_affective_dialog = self._settings.enable_affective_dialog # Add proactivity configuration to configuration, if provided - if self._settings.get("proactivity"): - config.proactivity = self._settings["proactivity"] + if self._settings.proactivity: + config.proactivity = self._settings.proactivity # Add VAD configuration to configuration, if provided - if self._settings.get("vad"): + if self._settings.vad: vad_config = AutomaticActivityDetection() - vad_params = self._settings["vad"] + vad_params = self._settings.vad has_vad_settings = False # Only add parameters that are explicitly set @@ -1183,7 +1236,9 @@ class GeminiLiveLLMService(LLMService): await self.push_error(error_msg=f"Initialization error: {e}", exception=e) async def _connection_task_handler(self, config: LiveConnectConfig): - async with self._client.aio.live.connect(model=self._model_name, config=config) as session: + async with self._client.aio.live.connect( + model=self._settings.model, config=config + ) as session: logger.info("Connected to Gemini service") # Mark connection start time @@ -1210,7 +1265,7 @@ class GeminiLiveLLMService(LLMService): # combination with the context aggregator default # turn strategies. logger.debug("Gemini VAD: interrupted signal received") - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() elif message.server_content and message.server_content.model_turn: await self._handle_msg_model_turn(message) elif ( @@ -1604,7 +1659,7 @@ class GeminiLiveLLMService(LLMService): text: The transcription text to push result: Optional LiveServerMessage that triggered this transcription """ - await self._handle_user_transcription(text, True, self._settings["language"]) + await self._handle_user_transcription(text, True, self._settings.language) await self.push_frame( TranscriptionFrame( text=text, diff --git a/src/pipecat/services/google/image.py b/src/pipecat/services/google/image.py index fcc8e41d0..e69faf65e 100644 --- a/src/pipecat/services/google/image.py +++ b/src/pipecat/services/google/image.py @@ -16,6 +16,7 @@ import os # Suppress gRPC fork warnings os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" +from dataclasses import dataclass from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -25,6 +26,7 @@ from pydantic import BaseModel, Field from pipecat.frames.frames import ErrorFrame, Frame, URLImageRawFrame from pipecat.services.google.utils import update_google_client_http_options from pipecat.services.image_service import ImageGenService +from pipecat.services.settings import ImageGenSettings try: from google import genai @@ -35,6 +37,15 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class GoogleImageGenSettings(ImageGenSettings): + """Settings for the Google image generation service. + + Parameters: + model: Google Imagen model identifier. + """ + + class GoogleImageGenService(ImageGenService): """Google AI image generation service using Imagen models. @@ -72,14 +83,14 @@ class GoogleImageGenService(ImageGenService): http_options: HTTP options for the client. **kwargs: Additional arguments passed to the parent ImageGenService. """ - super().__init__(**kwargs) - self._params = params or GoogleImageGenService.InputParams() + params = params or GoogleImageGenService.InputParams() + super().__init__(settings=GoogleImageGenSettings(model=params.model), **kwargs) + self._params = params # Add client header http_options = update_google_client_http_options(http_options) self._client = genai.Client(api_key=api_key, http_options=http_options) - self.set_model_name(self._params.model) def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. diff --git a/src/pipecat/services/google/llm.py b/src/pipecat/services/google/llm.py index 563acadb3..37ccfae9a 100644 --- a/src/pipecat/services/google/llm.py +++ b/src/pipecat/services/google/llm.py @@ -15,8 +15,8 @@ import io import json import os import uuid -from dataclasses import dataclass -from typing import Any, AsyncIterator, Dict, List, Literal, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncIterator, ClassVar, Dict, List, Literal, Optional from loguru import logger from PIL import Image @@ -39,7 +39,6 @@ from pipecat.frames.frames import ( LLMThoughtEndFrame, LLMThoughtStartFrame, LLMThoughtTextFrame, - LLMUpdateSettingsFrame, ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext @@ -59,6 +58,7 @@ from pipecat.services.openai.llm import ( OpenAIAssistantContextAggregator, OpenAIUserContextAggregator, ) +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven, is_given from pipecat.utils.tracing.service_decorators import traced_llm # Suppress gRPC fork warnings @@ -673,6 +673,62 @@ class GoogleLLMContext(OpenAILLMContext): self._messages = [m for m in self._messages if m.parts] +class GoogleThinkingConfig(BaseModel): + """Configuration for controlling the model's internal "thinking" process used before generating a response. + + Gemini 2.5 and 3 series models have this thinking process. + + Parameters: + thinking_level: Thinking level for Gemini 3 models. + For Gemini 3 Pro, this can be "low" or "high". + For Gemini 3 Flash, this can be "minimal", "low", "medium", or "high". + If not provided, Gemini 3 models default to "high". + Note: Gemini 2.5 series must use thinking_budget instead. + thinking_budget: Token budget for thinking, for Gemini 2.5 series. + -1 for dynamic thinking (model decides), 0 to disable thinking, + or a specific token count (e.g., 128-32768 for 2.5 Pro). + If not provided, most models today default to dynamic thinking. + See https://ai.google.dev/gemini-api/docs/thinking#set-budget + for default values and allowed ranges. + Note: Gemini 3 models must use thinking_level instead. + include_thoughts: Whether to include thought summaries in the response. + Today's models default to not including thoughts (False). + """ + + thinking_budget: Optional[int] = Field(default=None) + + # Why `| str` here? To not break compatibility in case Google adds more + # levels in the future. + thinking_level: Optional[Literal["low", "high", "medium", "minimal"] | str] = Field( + default=None + ) + + include_thoughts: Optional[bool] = Field(default=None) + + +@dataclass +class GoogleLLMSettings(LLMSettings): + """Settings for Google LLM services. + + Parameters: + thinking: Thinking configuration. + """ + + thinking: GoogleThinkingConfig | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + @classmethod + def from_mapping(cls, settings): + """Convert a plain dict to settings, coercing thinking dicts. + + For backward compatibility, a ``thinking`` value that is a plain dict + is converted to a :class:`GoogleThinkingConfig`. + """ + instance = super().from_mapping(settings) + if is_given(instance.thinking) and isinstance(instance.thinking, dict): + instance.thinking = GoogleThinkingConfig(**instance.thinking) + return instance + + class GoogleLLMService(LLMService): """Google AI (Gemini) LLM service implementation. @@ -681,40 +737,13 @@ class GoogleLLMService(LLMService): expected by the Google AI model. """ + _settings: GoogleLLMSettings + # Overriding the default adapter to use the Gemini one. adapter_class = GeminiLLMAdapter - class ThinkingConfig(BaseModel): - """Configuration for controlling the model's internal "thinking" process used before generating a response. - - Gemini 2.5 and 3 series models have this thinking process. - - Parameters: - thinking_level: Thinking level for Gemini 3 models. - For Gemini 3 Pro, this can be "low" or "high". - For Gemini 3 Flash, this can be "minimal", "low", "medium", or "high". - If not provided, Gemini 3 models default to "high". - Note: Gemini 2.5 series must use thinking_budget instead. - thinking_budget: Token budget for thinking, for Gemini 2.5 series. - -1 for dynamic thinking (model decides), 0 to disable thinking, - or a specific token count (e.g., 128-32768 for 2.5 Pro). - If not provided, most models today default to dynamic thinking. - See https://ai.google.dev/gemini-api/docs/thinking#set-budget - for default values and allowed ranges. - Note: Gemini 3 models must use thinking_level instead. - include_thoughts: Whether to include thought summaries in the response. - Today's models default to not including thoughts (False). - """ - - thinking_budget: Optional[int] = Field(default=None) - - # Why `| str` here? To not break compatibility in case Google adds more - # levels in the future. - thinking_level: Optional[Literal["low", "high", "medium", "minimal"] | str] = Field( - default=None - ) - - include_thoughts: Optional[bool] = Field(default=None) + # Backward compatibility: ThinkingConfig used to be defined inline here. + ThinkingConfig = GoogleThinkingConfig class InputParams(BaseModel): """Input parameters for Google AI models. @@ -737,7 +766,7 @@ class GoogleLLMService(LLMService): temperature: Optional[float] = Field(default=None, ge=0.0, le=2.0) top_k: Optional[int] = Field(default=None, ge=0) top_p: Optional[float] = Field(default=None, ge=0.0, le=1.0) - thinking: Optional["GoogleLLMService.ThinkingConfig"] = Field(default=None) + thinking: Optional[GoogleThinkingConfig] = Field(default=None) extra: Optional[Dict[str, Any]] = Field(default_factory=dict) def __init__( @@ -764,23 +793,29 @@ class GoogleLLMService(LLMService): http_options: HTTP options for the client. **kwargs: Additional arguments passed to parent class. """ - super().__init__(**kwargs) - params = params or GoogleLLMService.InputParams() - self.set_model_name(model) + super().__init__( + settings=GoogleLLMSettings( + model=model, + max_tokens=params.max_tokens, + temperature=params.temperature, + top_k=params.top_k, + top_p=params.top_p, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + thinking=params.thinking, + extra=params.extra if isinstance(params.extra, dict) else {}, + ), + **kwargs, + ) + self._api_key = api_key self._system_instruction = system_instruction self._http_options = update_google_client_http_options(http_options) - - self._settings = { - "max_tokens": params.max_tokens, - "temperature": params.temperature, - "top_k": params.top_k, - "top_p": params.top_p, - "thinking": params.thinking, - "extra": params.extra if isinstance(params.extra, dict) else {}, - } self._tools = tools self._tool_config = tool_config @@ -840,7 +875,7 @@ class GoogleLLMService(LLMService): # Use the new google-genai client's async method response = await self._client.aio.models.generate_content( - model=self._model_name, + model=self._settings.model, contents=messages, config=generation_config, ) @@ -874,10 +909,10 @@ class GoogleLLMService(LLMService): k: v for k, v in { "system_instruction": system_instruction, - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "top_k": self._settings["top_k"], - "max_output_tokens": self._settings["max_tokens"], + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "top_k": self._settings.top_k, + "max_output_tokens": self._settings.max_tokens, "tools": tools, "tool_config": tool_config, }.items() @@ -885,13 +920,13 @@ class GoogleLLMService(LLMService): } # Add thinking parameters if configured - if self._settings["thinking"]: - generation_params["thinking_config"] = self._settings["thinking"].model_dump( + if self._settings.thinking: + generation_params["thinking_config"] = self._settings.thinking.model_dump( exclude_unset=True ) - if self._settings["extra"]: - generation_params.update(self._settings["extra"]) + if self._settings.extra: + generation_params.update(self._settings.extra) return generation_params @@ -900,10 +935,10 @@ class GoogleLLMService(LLMService): # There's no way to introspect on model capabilities, so # to check for models that we know default to thinkin on # and can be configured to turn it off. - if not self._model_name.startswith("gemini-2.5-flash"): + if not self._settings.model.startswith("gemini-2.5-flash"): return # If we have an image model, we don't use a budget either. - if "image" in self._model_name: + if "image" in self._settings.model: return # If thinking_config is already set, don't override it. if "thinking_config" in generation_params: @@ -944,7 +979,7 @@ class GoogleLLMService(LLMService): await self.start_ttfb_metrics() return await self._client.aio.models.generate_content_stream( - model=self._model_name, + model=self._settings.model, contents=messages, config=generation_config, ) @@ -1190,8 +1225,6 @@ class GoogleLLMService(LLMService): # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal # LLMContext with it context = GoogleLLMContext(frame.messages) - elif isinstance(frame, LLMUpdateSettingsFrame): - await self._update_settings(frame.settings) else: await self.push_frame(frame, direction) @@ -1215,14 +1248,6 @@ class GoogleLLMService(LLMService): # Do nothing - we're shutting down anyway pass - async def _update_settings(self, settings): - """Override to handle ThinkingConfig validation.""" - # Convert thinking dict to ThinkingConfig if needed - if "thinking" in settings and isinstance(settings["thinking"], dict): - settings = dict(settings) # Make a copy to avoid modifying the original - settings["thinking"] = self.ThinkingConfig(**settings["thinking"]) - await super()._update_settings(settings) - def create_context_aggregator( self, context: OpenAILLMContext, diff --git a/src/pipecat/services/google/stt.py b/src/pipecat/services/google/stt.py index 23396b0b8..95d91d462 100644 --- a/src/pipecat/services/google/stt.py +++ b/src/pipecat/services/google/stt.py @@ -15,13 +15,15 @@ import asyncio import json import os import time +import warnings +from dataclasses import dataclass, field from pipecat.utils.tracing.service_decorators import traced_stt # Suppress gRPC fork warnings os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" -from typing import AsyncGenerator, List, Optional, Union +from typing import Any, AsyncGenerator, List, Optional, Union from loguru import logger from pydantic import BaseModel, Field, field_validator @@ -34,6 +36,7 @@ from pipecat.frames.frames import ( StartFrame, TranscriptionFrame, ) +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import GOOGLE_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language, resolve_language @@ -355,6 +358,46 @@ def language_to_google_stt_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class GoogleSTTSettings(STTSettings): + """Settings for Google Cloud Speech-to-Text V2. + + Parameters: + languages: List of ``Language`` enums for recognition + (e.g. ``[Language.EN_US]``). Preferred over ``language_codes``. + language_codes: List of Google STT language code strings + (e.g. ``["en-US"]``). + + .. deprecated:: 0.0.104 + Use ``languages`` instead. If both are provided, ``languages`` + takes precedence. This field is here just for backward + compatibility with dict-based settings updates. + use_separate_recognition_per_channel: Process each audio channel separately. + enable_automatic_punctuation: Add punctuation to transcripts. + enable_spoken_punctuation: Include spoken punctuation in transcript. + enable_spoken_emojis: Include spoken emojis in transcript. + profanity_filter: Filter profanity from transcript. + enable_word_time_offsets: Include timing information for each word. + enable_word_confidence: Include confidence scores for each word. + enable_interim_results: Stream partial recognition results. + enable_voice_activity_events: Detect voice activity in audio. + """ + + languages: List[Language] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language_codes: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + use_separate_recognition_per_channel: bool | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + enable_automatic_punctuation: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_spoken_punctuation: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_spoken_emojis: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + profanity_filter: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_word_time_offsets: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_word_confidence: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_interim_results: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_voice_activity_events: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class GoogleSTTService(STTService): """Google Cloud Speech-to-Text V2 service implementation. @@ -371,6 +414,8 @@ class GoogleSTTService(STTService): ValueError: If project ID is not found in credentials. """ + _settings: GoogleSTTSettings + # Google Cloud's STT service has a connection time limit of 5 minutes per stream. # They've shared an "endless streaming" example that guided this implementation: # https://cloud.google.com/speech-to-text/docs/transcribe-streaming-audio#endless-streaming @@ -454,10 +499,29 @@ class GoogleSTTService(STTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to STTService. """ - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - params = params or GoogleSTTService.InputParams() + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=GoogleSTTSettings( + language=None, + languages=list(params.language_list), + language_codes=None, + model=params.model, + use_separate_recognition_per_channel=params.use_separate_recognition_per_channel, + enable_automatic_punctuation=params.enable_automatic_punctuation, + enable_spoken_punctuation=params.enable_spoken_punctuation, + enable_spoken_emojis=params.enable_spoken_emojis, + profanity_filter=params.profanity_filter, + enable_word_time_offsets=params.enable_word_time_offsets, + enable_word_confidence=params.enable_word_confidence, + enable_interim_results=params.enable_interim_results, + enable_voice_activity_events=params.enable_voice_activity_events, + ), + **kwargs, + ) + self._location = location self._stream = None self._config = None @@ -508,22 +572,6 @@ class GoogleSTTService(STTService): self._client = speech_v2.SpeechAsyncClient(credentials=creds, client_options=client_options) - self._settings = { - "language_codes": [ - self.language_to_service_language(lang) for lang in params.language_list - ], - "model": params.model, - "use_separate_recognition_per_channel": params.use_separate_recognition_per_channel, - "enable_automatic_punctuation": params.enable_automatic_punctuation, - "enable_spoken_punctuation": params.enable_spoken_punctuation, - "enable_spoken_emojis": params.enable_spoken_emojis, - "profanity_filter": params.profanity_filter, - "enable_word_time_offsets": params.enable_word_time_offsets, - "enable_word_confidence": params.enable_word_confidence, - "enable_interim_results": params.enable_interim_results, - "enable_voice_activity_events": params.enable_voice_activity_events, - } - def can_generate_metrics(self) -> bool: """Check if the service can generate metrics. @@ -545,6 +593,21 @@ class GoogleSTTService(STTService): return [language_to_google_stt_language(lang) or "en-US" for lang in language] return language_to_google_stt_language(language) or "en-US" + def _get_language_codes(self) -> List[str]: + """Resolve the current language settings to Google STT language code strings. + + Prefers ``languages`` (``Language`` enums) over the deprecated + ``language_codes`` (raw strings). Falls back to ``["en-US"]``. + + Returns: + List[str]: Google STT language code strings. + """ + if self._settings.languages: + return [self.language_to_service_language(lang) for lang in self._settings.languages] + if self._settings.language_codes: + return list(self._settings.language_codes) + return ["en-US"] + async def _reconnect_if_needed(self): """Reconnect the stream if it's currently active.""" if self._streaming_task: @@ -552,41 +615,65 @@ class GoogleSTTService(STTService): await self._disconnect() await self._connect() - async def set_language(self, language: Language): - """Update the service's recognition language. - - A convenience method for setting a single language. - - Args: - language: New language for recognition. - """ - logger.debug(f"Switching STT language to: {language}") - await self.set_languages([language]) - async def set_languages(self, languages: List[Language]): """Update the service's recognition languages. + .. deprecated:: + Use ``STTUpdateSettingsFrame`` with ``GoogleSTTSettings(languages=...)`` + instead. + Args: languages: List of languages for recognition. First language is primary. """ + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "set_languages() is deprecated. Use STTUpdateSettingsFrame with " + "GoogleSTTSettings(languages=...) instead.", + DeprecationWarning, + ) logger.debug(f"Switching STT languages to: {languages}") - self._settings["language_codes"] = [ - self.language_to_service_language(lang) for lang in languages - ] - # Recreate stream with new languages - await self._reconnect_if_needed() + await self._update_settings(GoogleSTTSettings(languages=list(languages))) - async def set_model(self, model: str): - """Update the service's recognition model. + async def _update_settings(self, delta: GoogleSTTSettings) -> dict[str, Any]: + """Apply settings delta and reconnect if anything changed. + + Handles ``language`` from base ``set_language`` by converting it to + ``languages``. Emits a deprecation warning if ``language_codes`` is + used. All other fields (model, boolean flags) are applied directly. + Reconnects the stream on any change. Args: - model: The new recognition model to use. + delta: A settings delta. + + Returns: + Dict mapping changed field names to their previous values. """ - logger.debug(f"Switching STT model to: {model}") - await super().set_model(model) - self._settings["model"] = model - # Recreate stream with new model - await self._reconnect_if_needed() + from pipecat.services.settings import is_given + + # If base set_language sent a Language value, convert to languages list + if is_given(delta.language): + delta.languages = [delta.language] + # Clear language so the base class doesn't try to store it + delta.language = NOT_GIVEN + + # Warn on deprecated language_codes usage + if is_given(delta.language_codes): + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "GoogleSTTSettings.language_codes is deprecated. " + "Use GoogleSTTSettings.languages (List[Language]) instead.", + DeprecationWarning, + stacklevel=2, + ) + + changed = await super()._update_settings(delta) + + if changed: + await self._reconnect_if_needed() + + return changed async def start(self, frame: StartFrame): """Start the STT service and establish connection. @@ -632,6 +719,10 @@ class GoogleSTTService(STTService): ) -> None: """Update service options dynamically. + .. deprecated:: + Use ``STTUpdateSettingsFrame`` with ``GoogleSTTSettings(...)`` + instead. + Args: languages: New list of recognition languages. model: New recognition model. @@ -649,55 +740,42 @@ class GoogleSTTService(STTService): Changes that affect the streaming configuration will cause the stream to be reconnected. """ - # Update settings with new values + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "update_options() is deprecated. Use STTUpdateSettingsFrame with " + "GoogleSTTSettings(...) instead.", + DeprecationWarning, + ) + # Build a settings delta from the provided options + delta = GoogleSTTSettings() + if languages is not None: - logger.debug(f"Updating language to: {languages}") - self._settings["language_codes"] = [ - self.language_to_service_language(lang) for lang in languages - ] - + delta.languages = list(languages) if model is not None: - logger.debug(f"Updating model to: {model}") - self._settings["model"] = model - + delta.model = model if enable_automatic_punctuation is not None: - logger.debug(f"Updating automatic punctuation to: {enable_automatic_punctuation}") - self._settings["enable_automatic_punctuation"] = enable_automatic_punctuation - + delta.enable_automatic_punctuation = enable_automatic_punctuation if enable_spoken_punctuation is not None: - logger.debug(f"Updating spoken punctuation to: {enable_spoken_punctuation}") - self._settings["enable_spoken_punctuation"] = enable_spoken_punctuation - + delta.enable_spoken_punctuation = enable_spoken_punctuation if enable_spoken_emojis is not None: - logger.debug(f"Updating spoken emojis to: {enable_spoken_emojis}") - self._settings["enable_spoken_emojis"] = enable_spoken_emojis - + delta.enable_spoken_emojis = enable_spoken_emojis if profanity_filter is not None: - logger.debug(f"Updating profanity filter to: {profanity_filter}") - self._settings["profanity_filter"] = profanity_filter - + delta.profanity_filter = profanity_filter if enable_word_time_offsets is not None: - logger.debug(f"Updating word time offsets to: {enable_word_time_offsets}") - self._settings["enable_word_time_offsets"] = enable_word_time_offsets - + delta.enable_word_time_offsets = enable_word_time_offsets if enable_word_confidence is not None: - logger.debug(f"Updating word confidence to: {enable_word_confidence}") - self._settings["enable_word_confidence"] = enable_word_confidence - + delta.enable_word_confidence = enable_word_confidence if enable_interim_results is not None: - logger.debug(f"Updating interim results to: {enable_interim_results}") - self._settings["enable_interim_results"] = enable_interim_results - + delta.enable_interim_results = enable_interim_results if enable_voice_activity_events is not None: - logger.debug(f"Updating voice activity events to: {enable_voice_activity_events}") - self._settings["enable_voice_activity_events"] = enable_voice_activity_events + delta.enable_voice_activity_events = enable_voice_activity_events if location is not None: logger.debug(f"Updating location to: {location}") self._location = location - # Reconnect the stream for updates - await self._reconnect_if_needed() + await self._update_settings(delta) async def _connect(self): """Initialize streaming recognition config and stream.""" @@ -714,20 +792,20 @@ class GoogleSTTService(STTService): sample_rate_hertz=self.sample_rate, audio_channel_count=1, ), - language_codes=self._settings["language_codes"], - model=self._settings["model"], + language_codes=self._get_language_codes(), + model=self._settings.model, features=cloud_speech.RecognitionFeatures( - enable_automatic_punctuation=self._settings["enable_automatic_punctuation"], - enable_spoken_punctuation=self._settings["enable_spoken_punctuation"], - enable_spoken_emojis=self._settings["enable_spoken_emojis"], - profanity_filter=self._settings["profanity_filter"], - enable_word_time_offsets=self._settings["enable_word_time_offsets"], - enable_word_confidence=self._settings["enable_word_confidence"], + enable_automatic_punctuation=self._settings.enable_automatic_punctuation, + enable_spoken_punctuation=self._settings.enable_spoken_punctuation, + enable_spoken_emojis=self._settings.enable_spoken_emojis, + profanity_filter=self._settings.profanity_filter, + enable_word_time_offsets=self._settings.enable_word_time_offsets, + enable_word_confidence=self._settings.enable_word_confidence, ), ), streaming_features=cloud_speech.StreamingRecognitionFeatures( - enable_voice_activity_events=self._settings["enable_voice_activity_events"], - interim_results=self._settings["enable_interim_results"], + enable_voice_activity_events=self._settings.enable_voice_activity_events, + interim_results=self._settings.enable_interim_results, ), ) @@ -857,7 +935,7 @@ class GoogleSTTService(STTService): if not transcript: continue - primary_language = self._settings["language_codes"][0] + primary_language = self._get_language_codes()[0] if result.is_final: self._last_transcript_was_final = True diff --git a/src/pipecat/services/google/tts.py b/src/pipecat/services/google/tts.py index 4016286df..6c71977a0 100644 --- a/src/pipecat/services/google/tts.py +++ b/src/pipecat/services/google/tts.py @@ -23,7 +23,8 @@ from pipecat.utils.tracing.service_decorators import traced_tts # Suppress gRPC fork warnings os.environ["GRPC_ENABLE_FORK_SUPPORT"] = "false" -from typing import Any, AsyncGenerator, List, Literal, Mapping, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Dict, List, Literal, Optional from loguru import logger from pydantic import BaseModel @@ -36,6 +37,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven, is_given from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language @@ -474,6 +476,71 @@ def language_to_gemini_tts_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class GoogleHttpTTSSettings(TTSSettings): + """Settings for Google HTTP TTS service. + + Parameters: + pitch: Voice pitch adjustment (e.g., "+2st", "-50%"). + rate: Speaking rate adjustment (e.g., "slow", "fast", "125%"). Used for + SSML prosody tags (non-Chirp voices). + speaking_rate: Speaking rate for AudioConfig (Chirp/Journey voices). + Range [0.25, 2.0]. + volume: Volume adjustment (e.g., "loud", "soft", "+6dB"). + emphasis: Emphasis level for the text. + language: Language for synthesis. Defaults to English. + gender: Voice gender preference. + google_style: Google-specific voice style. + """ + + pitch: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + rate: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaking_rate: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + volume: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + emphasis: Literal["strong", "moderate", "reduced", "none"] | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + language: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + gender: Literal["male", "female", "neutral"] | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + google_style: ( + Literal["apologetic", "calm", "empathetic", "firm", "lively"] | None | _NotGiven + ) = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class GoogleStreamTTSSettings(TTSSettings): + """Settings for Google streaming TTS service. + + Parameters: + language: Language for synthesis. Defaults to English. + speaking_rate: The speaking rate, in the range [0.25, 2.0]. + """ + + language: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaking_rate: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class GeminiTTSSettings(TTSSettings): + """Settings for Gemini TTS service. + + Parameters: + language: Language for synthesis. Defaults to English. + prompt: Optional style instructions for how to synthesize the content. + multi_speaker: Whether to enable multi-speaker support. + speaker_configs: List of speaker configurations for multi-speaker mode. + """ + + language: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + prompt: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + multi_speaker: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaker_configs: list[dict[str, Any]] | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + + class GoogleHttpTTSService(TTSService): """Google Cloud Text-to-Speech HTTP service with SSML support. @@ -488,6 +555,8 @@ class GoogleHttpTTSService(TTSService): Chirp and Journey voices don't support SSML and will use plain text input. """ + _settings: GoogleHttpTTSSettings + class InputParams(BaseModel): """Input parameters for Google HTTP TTS voice customization. @@ -533,24 +602,28 @@ class GoogleHttpTTSService(TTSService): params: Voice customization parameters including pitch, rate, volume, etc. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or GoogleHttpTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=GoogleHttpTTSSettings( + model=None, + pitch=params.pitch, + rate=params.rate, + speaking_rate=params.speaking_rate, + volume=params.volume, + emphasis=params.emphasis, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + gender=params.gender, + google_style=params.google_style, + voice=voice_id, + ), + **kwargs, + ) + self._location = location - self._settings = { - "pitch": params.pitch, - "rate": params.rate, - "speaking_rate": params.speaking_rate, - "volume": params.volume, - "emphasis": params.emphasis, - "language": self.language_to_service_language(params.language) - if params.language - else "en-US", - "gender": params.gender, - "google_style": params.google_style, - } - self.set_voice(voice_id) self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path ) @@ -619,61 +692,60 @@ class GoogleHttpTTSService(TTSService): """ return language_to_google_tts_language(language) - async def _update_settings(self, settings: Mapping[str, Any]): - """Override to handle speaking_rate updates for Chirp/Journey voices. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Override to handle speaking_rate validation. Args: - settings: Dictionary of settings to update. Can include 'speaking_rate' (float) + delta: Settings delta. Can include 'speaking_rate' (float). """ - if "speaking_rate" in settings: - rate_value = float(settings["speaking_rate"]) - if 0.25 <= rate_value <= 2.0: - self._settings["speaking_rate"] = rate_value - else: + if isinstance(delta, GoogleHttpTTSSettings) and is_given(delta.speaking_rate): + rate_value = float(delta.speaking_rate) + if not (0.25 <= rate_value <= 2.0): logger.warning( f"Invalid speaking_rate value: {rate_value}. Must be between 0.25 and 2.0" ) - await super()._update_settings(settings) + delta.speaking_rate = NOT_GIVEN + return await super()._update_settings(delta) def _construct_ssml(self, text: str) -> str: ssml = "" # Voice tag - voice_attrs = [f"name='{self._voice_id}'"] + voice_attrs = [f"name='{self._settings.voice}'"] - language = self._settings["language"] + language = self._settings.language voice_attrs.append(f"language='{language}'") - if self._settings["gender"]: - voice_attrs.append(f"gender='{self._settings['gender']}'") + if self._settings.gender: + voice_attrs.append(f"gender='{self._settings.gender}'") ssml += f"" # Prosody tag prosody_attrs = [] - if self._settings["pitch"]: - prosody_attrs.append(f"pitch='{self._settings['pitch']}'") - if self._settings["rate"]: - prosody_attrs.append(f"rate='{self._settings['rate']}'") - if self._settings["volume"]: - prosody_attrs.append(f"volume='{self._settings['volume']}'") + if self._settings.pitch: + prosody_attrs.append(f"pitch='{self._settings.pitch}'") + if self._settings.rate: + prosody_attrs.append(f"rate='{self._settings.rate}'") + if self._settings.volume: + prosody_attrs.append(f"volume='{self._settings.volume}'") if prosody_attrs: ssml += f"" # Emphasis tag - if self._settings["emphasis"]: - ssml += f"" + if self._settings.emphasis: + ssml += f"" # Google style tag - if self._settings["google_style"]: - ssml += f"" + if self._settings.google_style: + ssml += f"" ssml += text # Close tags - if self._settings["google_style"]: + if self._settings.google_style: ssml += "" - if self._settings["emphasis"]: + if self._settings.emphasis: ssml += "" if prosody_attrs: ssml += "" @@ -698,8 +770,8 @@ class GoogleHttpTTSService(TTSService): await self.start_ttfb_metrics() # Check if the voice is a Chirp voice (including Chirp 3) or Journey voice - is_chirp_voice = "chirp" in self._voice_id.lower() - is_journey_voice = "journey" in self._voice_id.lower() + is_chirp_voice = "chirp" in self._settings.voice.lower() + is_journey_voice = "journey" in self._settings.voice.lower() # Create synthesis input based on voice_id if is_chirp_voice or is_journey_voice: @@ -710,7 +782,7 @@ class GoogleHttpTTSService(TTSService): synthesis_input = texttospeech_v1.SynthesisInput(ssml=ssml) voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings["language"], name=self._voice_id + language_code=self._settings.language, name=self._settings.voice ) # Build audio config with conditional speaking_rate audio_config_params = { @@ -719,8 +791,8 @@ class GoogleHttpTTSService(TTSService): } # For Chirp and Journey voices, include speaking_rate in AudioConfig - if (is_chirp_voice or is_journey_voice) and self._settings["speaking_rate"] is not None: - audio_config_params["speaking_rate"] = self._settings["speaking_rate"] + if (is_chirp_voice or is_journey_voice) and self._settings.speaking_rate is not None: + audio_config_params["speaking_rate"] = self._settings.speaking_rate audio_config = texttospeech_v1.AudioConfig(**audio_config_params) @@ -910,6 +982,8 @@ class GoogleTTSService(GoogleBaseTTSService): ) """ + _settings: GoogleStreamTTSSettings + class InputParams(BaseModel): """Input parameters for Google streaming TTS configuration. @@ -945,38 +1019,41 @@ class GoogleTTSService(GoogleBaseTTSService): params: Language configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or GoogleTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=GoogleStreamTTSSettings( + model=None, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + speaking_rate=params.speaking_rate, + voice=voice_id, + ), + **kwargs, + ) + self._location = location - self._settings = { - "language": self.language_to_service_language(params.language) - if params.language - else "en-US", - "speaking_rate": params.speaking_rate, - } - self.set_voice(voice_id) self._voice_cloning_key = voice_cloning_key self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path ) - async def _update_settings(self, settings: Mapping[str, Any]): - """Override to handle speaking_rate updates for streaming API. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Override to handle speaking_rate validation. Args: - settings: Dictionary of settings to update. Can include 'speaking_rate' (float) + delta: Settings delta. Can include 'speaking_rate' (float). """ - if "speaking_rate" in settings: - rate_value = float(settings["speaking_rate"]) - if 0.25 <= rate_value <= 2.0: - self._settings["speaking_rate"] = rate_value - else: + if isinstance(delta, GoogleStreamTTSSettings) and is_given(delta.speaking_rate): + rate_value = float(delta.speaking_rate) + if not (0.25 <= rate_value <= 2.0): logger.warning( f"Invalid speaking_rate value: {rate_value}. Must be between 0.25 and 2.0" ) - await super()._update_settings(settings) + delta.speaking_rate = NOT_GIVEN + return await super()._update_settings(delta) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -1000,11 +1077,11 @@ class GoogleTTSService(GoogleBaseTTSService): voice_cloning_key=self._voice_cloning_key ) voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings["language"], voice_clone=voice_clone_params + language_code=self._settings.language, voice_clone=voice_clone_params ) else: voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings["language"], name=self._voice_id + language_code=self._settings.language, name=self._settings.voice ) # Create streaming config @@ -1013,7 +1090,7 @@ class GoogleTTSService(GoogleBaseTTSService): streaming_audio_config=texttospeech_v1.StreamingAudioConfig( audio_encoding=texttospeech_v1.AudioEncoding.PCM, sample_rate_hertz=self.sample_rate, - speaking_rate=self._settings["speaking_rate"], + speaking_rate=self._settings.speaking_rate, ), ) @@ -1052,6 +1129,8 @@ class GeminiTTSService(GoogleBaseTTSService): ) """ + _settings: GeminiTTSSettings + GOOGLE_SAMPLE_RATE = 24000 # Google TTS always outputs at 24kHz # List of available Gemini TTS voices @@ -1149,25 +1228,27 @@ class GeminiTTSService(GoogleBaseTTSService): f"Google TTS only supports {self.GOOGLE_SAMPLE_RATE}Hz sample rate. " f"Current rate of {sample_rate}Hz may cause issues." ) - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or GeminiTTSService.InputParams() if voice_id not in self.AVAILABLE_VOICES: logger.warning(f"Voice '{voice_id}' not in known voices list. Using anyway.") - self._location = location - self._model = model - self._voice_id = voice_id - self._settings = { - "language": self.language_to_service_language(params.language) - if params.language - else "en-US", - "prompt": params.prompt, - "multi_speaker": params.multi_speaker, - "speaker_configs": params.speaker_configs, - } + super().__init__( + sample_rate=sample_rate, + settings=GeminiTTSSettings( + model=model, + language=self.language_to_service_language(params.language) + if params.language + else "en-US", + prompt=params.prompt, + multi_speaker=params.multi_speaker, + speaker_configs=params.speaker_configs, + voice=voice_id, + ), + **kwargs, + ) + self._location = location self._client: texttospeech_v1.TextToSpeechAsyncClient = self._create_client( credentials, credentials_path ) @@ -1183,16 +1264,6 @@ class GeminiTTSService(GoogleBaseTTSService): """ return language_to_gemini_tts_language(language) - def set_voice(self, voice_id: str): - """Set the voice for TTS generation. - - Args: - voice_id: Name of the voice to use from AVAILABLE_VOICES. - """ - if voice_id not in self.AVAILABLE_VOICES: - logger.warning(f"Voice '{voice_id}' not in known voices list. Using anyway.") - self._voice_id = voice_id - async def start(self, frame: StartFrame): """Start the Gemini TTS service. @@ -1206,15 +1277,19 @@ class GeminiTTSService(GoogleBaseTTSService): f"Current rate of {self.sample_rate}Hz may cause issues." ) - async def _update_settings(self, settings: Mapping[str, Any]): - """Override to handle prompt updates. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta with voice validation. Args: - settings: Dictionary of settings to update. Can include 'prompt' (str) + delta: Settings delta. Can include 'voice', 'prompt', etc. + + Returns: + Dict mapping changed field names to their previous values. """ - if "prompt" in settings: - self._settings["prompt"] = settings["prompt"] - await super()._update_settings(settings) + if is_given(delta.voice) and delta.voice not in self.AVAILABLE_VOICES: + logger.warning(f"Voice '{delta.voice}' not in known voices list. Using anyway.") + + return await super()._update_settings(delta) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -1234,14 +1309,14 @@ class GeminiTTSService(GoogleBaseTTSService): await self.start_ttfb_metrics() # Build voice selection params - if self._settings["multi_speaker"] and self._settings["speaker_configs"]: + if self._settings.multi_speaker and self._settings.speaker_configs: # Multi-speaker mode speaker_voice_configs = [] - for speaker_config in self._settings["speaker_configs"]: + for speaker_config in self._settings.speaker_configs: speaker_voice_configs.append( texttospeech_v1.MultispeakerPrebuiltVoice( speaker_alias=speaker_config["speaker_alias"], - speaker_id=speaker_config.get("speaker_id", self._voice_id), + speaker_id=speaker_config.get("speaker_id", self._settings.voice), ) ) @@ -1250,16 +1325,16 @@ class GeminiTTSService(GoogleBaseTTSService): ) voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings["language"], - model_name=self._model, + language_code=self._settings.language, + model_name=self._settings.model, multi_speaker_voice_config=multi_speaker_voice_config, ) else: # Single speaker mode voice = texttospeech_v1.VoiceSelectionParams( - language_code=self._settings["language"], - name=self._voice_id, - model_name=self._model, + language_code=self._settings.language, + name=self._settings.voice, + model_name=self._settings.model, ) # Create streaming config @@ -1273,7 +1348,7 @@ class GeminiTTSService(GoogleBaseTTSService): # Use base class streaming logic with prompt support async for frame in self._stream_tts( - streaming_config, text, context_id, self._settings["prompt"] + streaming_config, text, context_id, self._settings.prompt ): yield frame diff --git a/src/pipecat/services/gradium/stt.py b/src/pipecat/services/gradium/stt.py index 7433c2549..ac35c6e52 100644 --- a/src/pipecat/services/gradium/stt.py +++ b/src/pipecat/services/gradium/stt.py @@ -12,7 +12,8 @@ WebSocket API for streaming audio transcription. import base64 import json -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional from loguru import logger from pydantic import BaseModel @@ -27,6 +28,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import GRADIUM_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -64,6 +66,18 @@ def language_to_gradium_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class GradiumSTTSettings(STTSettings): + """Settings for the Gradium STT service. + + Parameters: + delay_in_frames: Delay in audio frames (80ms each) before text is + generated. Higher delays allow more context but increase latency. + """ + + delay_in_frames: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class GradiumSTTService(WebsocketSTTService): """Gradium real-time speech-to-text service. @@ -72,6 +86,8 @@ class GradiumSTTService(WebsocketSTTService): for audio processing and connection management. """ + _settings: GradiumSTTSettings + class InputParams(BaseModel): """Configuration parameters for Gradium STT API. @@ -113,8 +129,6 @@ class GradiumSTTService(WebsocketSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to parent STTService class. """ - super().__init__(sample_rate=SAMPLE_RATE, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - if json_config is not None: import warnings @@ -124,10 +138,22 @@ class GradiumSTTService(WebsocketSTTService): stacklevel=2, ) + params = params or GradiumSTTService.InputParams() + + super().__init__( + sample_rate=SAMPLE_RATE, + ttfs_p99_latency=ttfs_p99_latency, + settings=GradiumSTTSettings( + model=None, + language=params.language, + delay_in_frames=params.delay_in_frames or None, + ), + **kwargs, + ) + self._api_key = api_key self._api_endpoint_base_url = api_endpoint_base_url self._websocket = None - self._params = params or GradiumSTTService.InputParams() self._json_config = json_config self._receive_task = None @@ -149,16 +175,22 @@ class GradiumSTTService(WebsocketSTTService): """ return True - async def set_language(self, language: Language): - """Set the recognition language and reconnect. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta, sync params, and reconnect. Args: - language: The language to use for speech recognition. + delta: A :class:`STTSettings` (or ``GradiumSTTSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. """ - logger.info(f"Switching STT language to: [{language}]") - self._params.language = language + changed = await super()._update_settings(delta) + if not changed: + return changed + await self._disconnect() await self._connect() + return changed async def start(self, frame: StartFrame): """Start the speech-to-text service. @@ -298,12 +330,12 @@ class GradiumSTTService(WebsocketSTTService): json_config = {} if self._json_config: json_config = json.loads(self._json_config) - if self._params.language: - gradium_language = language_to_gradium_language(self._params.language) + if self._settings.language: + gradium_language = language_to_gradium_language(self._settings.language) if gradium_language: json_config["language"] = gradium_language - if self._params.delay_in_frames: - json_config["delay_in_frames"] = self._params.delay_in_frames + if self._settings.delay_in_frames: + json_config["delay_in_frames"] = self._settings.delay_in_frames if json_config: setup_msg["json_config"] = json_config await self._websocket.send(json.dumps(setup_msg)) diff --git a/src/pipecat/services/gradium/tts.py b/src/pipecat/services/gradium/tts.py index 98e08a9d3..c8a83a7f2 100644 --- a/src/pipecat/services/gradium/tts.py +++ b/src/pipecat/services/gradium/tts.py @@ -6,7 +6,8 @@ import base64 import json -from typing import Any, AsyncGenerator, Mapping, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional from loguru import logger from pydantic import BaseModel @@ -16,14 +17,13 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, - InterruptionFrame, StartFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.tts_service import AudioContextWordTTSService +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven +from pipecat.services.tts_service import AudioContextTTSService from pipecat.utils.tracing.service_decorators import traced_tts try: @@ -38,9 +38,22 @@ except ModuleNotFoundError as e: SAMPLE_RATE = 48000 -class GradiumTTSService(AudioContextWordTTSService): +@dataclass +class GradiumTTSSettings(TTSSettings): + """Settings for the Gradium TTS service. + + Parameters: + output_format: Audio output format. + """ + + output_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + +class GradiumTTSService(AudioContextTTSService): """Text-to-Speech service using Gradium's websocket API.""" + _settings: GradiumTTSSettings + class InputParams(BaseModel): """Configuration parameters for Gradium TTS service. @@ -72,27 +85,27 @@ class GradiumTTSService(AudioContextWordTTSService): params: Additional configuration parameters. **kwargs: Additional arguments passed to parent class. """ + params = params or GradiumTTSService.InputParams() + super().__init__( push_stop_frames=True, push_text_frames=False, pause_frame_processing=True, + supports_word_timestamps=True, sample_rate=SAMPLE_RATE, + settings=GradiumTTSSettings( + model=model, + voice=voice_id, + language=None, + output_format="pcm", + ), **kwargs, ) - params = params or GradiumTTSService.InputParams() - # Store service configuration self._api_key = api_key self._url = url - self._voice_id = voice_id self._json_config = json_config - self._model = model - self._settings = { - "voice_id": voice_id, - "model_name": model, - "output_format": "pcm", - } # State tracking self._receive_task = None @@ -105,24 +118,22 @@ class GradiumTTSService(AudioContextWordTTSService): """ return True - async def set_model(self, model: str): - """Update the TTS model. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if voice changed. Args: - model: The model name to use for synthesis. - """ - self._model = model - await super().set_model(model) + delta: A :class:`TTSSettings` (or ``GradiumTTSSettings``) delta. - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect if voice changed.""" - prev_voice = self._voice_id - await super()._update_settings(settings) - if not prev_voice == self._voice_id: - self._settings["voice_id"] = self._voice_id - logger.info(f"Switching TTS voice to: [{self._voice_id}]") + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(delta) + if "voice" in changed: await self._disconnect() await self._connect() + else: + self._warn_unhandled_updated_settings(changed) + return changed def _build_msg(self, text: str = "") -> dict: """Build JSON message for Gradium API.""" @@ -200,7 +211,7 @@ class GradiumTTSService(AudioContextWordTTSService): setup_msg = { "type": "setup", "output_format": "pcm", - "voice_id": self._voice_id, + "voice_id": self._settings.voice, "close_ws_on_eos": False, } if self._json_config is not None: @@ -252,21 +263,24 @@ class GradiumTTSService(AudioContextWordTTSService): except Exception as e: logger.error(f"{self} exception: {e}") - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by resetting context state. + async def on_audio_context_interrupted(self, context_id: str): + """Called when an audio context is cancelled due to an interruption. - The parent AudioContextTTSService._handle_interruption() cancels the audio context - task and creates a new one. We reset _context_id so the next run_tts() creates a - fresh context. No websocket reconnection needed — audio from the old client_req_id - will be silently dropped since the audio context no longer exists. - - Args: - frame: The interruption frame. - direction: The direction of the frame. + No WebSocket message is needed — audio from the interrupted + ``client_req_id`` will be silently dropped by the base class once the + audio context no longer exists. """ - await super()._handle_interruption(frame, direction) await self.stop_all_metrics() + async def on_audio_context_completed(self, context_id: str): + """Called after an audio context has finished playing all of its audio. + + No close message is needed: Gradium signals completion with an + ``end_of_stream`` message (handled in ``_receive_messages``), after + which the server-side context is already closed. + """ + pass + async def _receive_messages(self): """Process incoming websocket messages, demultiplexing by client_req_id.""" # TODO(laurent): This should not be necessary as it should happen when diff --git a/src/pipecat/services/grok/realtime/llm.py b/src/pipecat/services/grok/realtime/llm.py index e1355ce31..7a4e73806 100644 --- a/src/pipecat/services/grok/realtime/llm.py +++ b/src/pipecat/services/grok/realtime/llm.py @@ -13,8 +13,8 @@ https://docs.x.ai/docs/guides/voice/agent import base64 import json import time -from dataclasses import dataclass -from typing import Optional +from dataclasses import dataclass, field +from typing import Any, Optional from loguru import logger @@ -56,6 +56,7 @@ from pipecat.processors.aggregators.llm_response_universal import ( from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.utils.time import time_now_iso8601 from . import events @@ -85,6 +86,19 @@ class CurrentAudioResponse: total_size: int = 0 +@dataclass +class GrokRealtimeLLMSettings(LLMSettings): + """Settings for Grok Realtime LLM services. + + Parameters: + session_properties: Grok Realtime session configuration. + """ + + session_properties: events.SessionProperties | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + + class GrokRealtimeLLMService(LLMService): """Grok Realtime Voice Agent LLM service providing real-time audio and text communication. @@ -101,6 +115,8 @@ class GrokRealtimeLLMService(LLMService): - Server-side VAD (Voice Activity Detection) """ + _settings: GrokRealtimeLLMSettings + # Use the Grok-specific adapter adapter_class = GrokRealtimeLLMAdapter @@ -129,16 +145,27 @@ class GrokRealtimeLLMService(LLMService): start_audio_paused: Whether to start with audio input paused. Defaults to False. **kwargs: Additional arguments passed to parent LLMService. """ - super().__init__(base_url=base_url, **kwargs) + super().__init__( + base_url=base_url, + settings=GrokRealtimeLLMSettings( + model=None, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + session_properties=session_properties or events.SessionProperties(), + ), + **kwargs, + ) self.api_key = api_key self.base_url = base_url - # Initialize session_properties - self._session_properties: events.SessionProperties = ( - session_properties or events.SessionProperties() - ) - self._audio_input_paused = start_audio_paused self._websocket = None self._receive_task = None @@ -186,13 +213,13 @@ class GrokRealtimeLLMService(LLMService): Configured sample rate or None if not manually configured. For PCMU/PCMA formats, returns 8000 Hz (G.711 standard). """ - if not self._session_properties.audio: + if not self._settings.session_properties.audio: return None audio_config = ( - self._session_properties.audio.input + self._settings.session_properties.audio.input if direction == "input" - else self._session_properties.audio.output + else self._settings.session_properties.audio.output ) if audio_config and audio_config.format: @@ -222,8 +249,8 @@ class GrokRealtimeLLMService(LLMService): def _is_turn_detection_enabled(self) -> bool: """Check if server-side VAD is enabled.""" - if self._session_properties.turn_detection: - return self._session_properties.turn_detection.type == "server_vad" + if self._settings.session_properties.turn_detection: + return self._settings.session_properties.turn_detection.type == "server_vad" return False async def _handle_interruption(self): @@ -281,6 +308,27 @@ class GrokRealtimeLLMService(LLMService): # Standard AIService frame handling # + def _ensure_audio_config(self, input_sample_rate: int, output_sample_rate: int): + """Ensure session_properties.audio has input and output configs. + + Fills in any missing audio configuration using the given sample rates. + + Args: + input_sample_rate: Sample rate for audio input (Hz). + output_sample_rate: Sample rate for audio output (Hz). + """ + props = self._settings.session_properties + if not props.audio: + props.audio = events.AudioConfiguration() + if not props.audio.input: + props.audio.input = events.AudioInput( + format=events.PCMAudioFormat(rate=input_sample_rate) + ) + if not props.audio.output: + props.audio.output = events.AudioOutput( + format=events.PCMAudioFormat(rate=output_sample_rate) + ) + async def start(self, frame: StartFrame): """Start the service and establish WebSocket connection. @@ -288,23 +336,7 @@ class GrokRealtimeLLMService(LLMService): frame: The start frame triggering service initialization. """ await super().start(frame) - - # Ensure audio configuration exists with both input and output - if not self._session_properties.audio: - self._session_properties.audio = events.AudioConfiguration() - - # Fill in missing input configuration - if not self._session_properties.audio.input: - self._session_properties.audio.input = events.AudioInput( - format=events.PCMAudioFormat(rate=frame.audio_in_sample_rate) - ) - - # Fill in missing output configuration - if not self._session_properties.audio.output: - self._session_properties.audio.output = events.AudioOutput( - format=events.PCMAudioFormat(rate=frame.audio_out_sample_rate) - ) - + self._ensure_audio_config(frame.audio_in_sample_rate, frame.audio_out_sample_rate) await self._connect() async def stop(self, frame: EndFrame): @@ -336,6 +368,16 @@ class GrokRealtimeLLMService(LLMService): frame: The frame to process. direction: The direction of frame flow in the pipeline. """ + # Backward-compatible dict path: frame.settings contains SessionProperties + # fields, not our Settings fields, so we construct SessionProperties + # directly. The frame.delta path falls through to super, which calls + # _update_settings → our override handles the rest. + if isinstance(frame, LLMUpdateSettingsFrame) and frame.delta is None: + self._settings.session_properties = events.SessionProperties(**frame.settings) + await self._send_session_update() + await self.push_frame(frame, direction) + return + await super().process_frame(frame, direction) if isinstance(frame, TranscriptionFrame): @@ -355,11 +397,8 @@ class GrokRealtimeLLMService(LLMService): await self._handle_bot_stopped_speaking() elif isinstance(frame, LLMMessagesAppendFrame): await self._handle_messages_append(frame) - elif isinstance(frame, LLMUpdateSettingsFrame): - self._session_properties = events.SessionProperties(**frame.settings) - await self._update_settings() elif isinstance(frame, LLMSetToolsFrame): - await self._update_settings() + await self._send_session_update() await self.push_frame(frame, direction) @@ -436,9 +475,30 @@ class GrokRealtimeLLMService(LLMService): return await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) - async def _update_settings(self): + async def _update_settings(self, delta): + """Apply a settings delta, sending a session update if needed.""" + # Capture current sample rates before the update replaces them. + input_rate = self._get_configured_sample_rate("input") + output_rate = self._get_configured_sample_rate("output") + + changed = await super()._update_settings(delta) + + if "session_properties" in changed: + if input_rate and output_rate: + self._ensure_audio_config(input_rate, output_rate) + else: + logger.warning( + "Attempting to apply session properties update without configured sample rates. " + "Audio configuration may be incomplete." + ) + await self._send_session_update() + + self._warn_unhandled_updated_settings(changed.keys() - {"session_properties"}) + return changed + + async def _send_session_update(self): """Update session settings on the server.""" - settings = self._session_properties + settings = self._settings.session_properties adapter: GrokRealtimeLLMAdapter = self.get_llm_adapter() if self._context: @@ -511,12 +571,15 @@ class GrokRealtimeLLMService(LLMService): elif evt.type == "response.function_call_arguments.done": await self._handle_evt_function_call_arguments_done(evt) elif evt.type == "error": - await self._handle_evt_error(evt) - return + if evt.error.code == "response_cancel_not_active": + logger.debug(f"{self} {evt.error.message}") + else: + await self._handle_evt_error(evt) + return async def _handle_evt_conversation_created(self, evt): """Handle conversation.created event - first event after connecting.""" - await self._update_settings() + await self._send_session_update() async def _handle_evt_response_created(self, evt): """Handle response.created event - response generation started.""" @@ -671,7 +734,7 @@ class GrokRealtimeLLMService(LLMService): """Handle speech started event from VAD.""" await self._truncate_current_audio_response() await self.broadcast_frame(UserStartedSpeakingFrame) - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _handle_evt_speech_stopped(self, evt): """Handle speech stopped event from VAD.""" @@ -719,7 +782,7 @@ class GrokRealtimeLLMService(LLMService): self._messages_added_manually[evt.item.id] = True await self.send_client_event(evt) - await self._update_settings() + await self._send_session_update() self._llm_needs_conversation_setup = False logger.debug("Creating Grok response") diff --git a/src/pipecat/services/groq/stt.py b/src/pipecat/services/groq/stt.py index 52cb0a7cc..d51e93c68 100644 --- a/src/pipecat/services/groq/stt.py +++ b/src/pipecat/services/groq/stt.py @@ -62,7 +62,7 @@ class GroqSTTService(BaseWhisperSTTService): # Build kwargs dict with only set parameters kwargs = { "file": ("audio.wav", audio, "audio/wav"), - "model": self.model_name, + "model": self._settings.model, # Use verbose_json to get probability metrics "response_format": "verbose_json" if self._include_prob_metrics else "json", "language": self._language, diff --git a/src/pipecat/services/groq/tts.py b/src/pipecat/services/groq/tts.py index 331af8eb7..901b786c0 100644 --- a/src/pipecat/services/groq/tts.py +++ b/src/pipecat/services/groq/tts.py @@ -8,7 +8,8 @@ import io import wave -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import AsyncGenerator, ClassVar, Dict, Optional from loguru import logger from pydantic import BaseModel @@ -20,6 +21,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language from pipecat.utils.tracing.service_decorators import traced_tts @@ -32,6 +34,23 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class GroqTTSSettings(TTSSettings): + """Settings for the Groq TTS service. + + Parameters: + output_format: Audio output format. + speed: Speech speed multiplier. Defaults to 1.0. + groq_sample_rate: Audio sample rate. + """ + + output_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + groq_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice", "sample_rate": "groq_sample_rate"} + + class GroqTTSService(TTSService): """Groq text-to-speech service implementation. @@ -40,6 +59,8 @@ class GroqTTSService(TTSService): and output formats. """ + _settings: GroqTTSSettings + class InputParams(BaseModel): """Input parameters for Groq TTS configuration. @@ -78,28 +99,24 @@ class GroqTTSService(TTSService): if sample_rate != self.GROQ_SAMPLE_RATE: logger.warning(f"Groq TTS only supports {self.GROQ_SAMPLE_RATE}Hz sample rate. ") + params = params or GroqTTSService.InputParams() + super().__init__( pause_frame_processing=True, sample_rate=sample_rate, + settings=GroqTTSSettings( + model=model_name, + voice=voice_id, + language=str(params.language) if params.language else "en", + output_format=output_format, + speed=params.speed, + groq_sample_rate=sample_rate, + ), **kwargs, ) - params = params or GroqTTSService.InputParams() - self._api_key = api_key - self._model_name = model_name self._output_format = output_format - self._voice_id = voice_id - self._params = params - - self._settings = { - "model": model_name, - "voice_id": voice_id, - "output_format": output_format, - "language": str(params.language) if params.language else "en", - "speed": params.speed, - "sample_rate": sample_rate, - } self._client = AsyncGroq(api_key=self._api_key) @@ -129,9 +146,12 @@ class GroqTTSService(TTSService): try: response = await self._client.audio.speech.create( - model=self._model_name, - voice=self._voice_id, + model=self._settings.model, + voice=self._settings.voice, response_format=self._output_format, + # Note: as of 2026-02-25, only a speed of 1.0 is supported, but + # here we pass it for completeness and future-proofing + speed=self._settings.speed, input=text, ) diff --git a/src/pipecat/services/hathora/stt.py b/src/pipecat/services/hathora/stt.py index defdc355d..27f1aebfb 100644 --- a/src/pipecat/services/hathora/stt.py +++ b/src/pipecat/services/hathora/stt.py @@ -8,6 +8,7 @@ import base64 import os +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional import aiohttp @@ -18,6 +19,7 @@ from pipecat.frames.frames import ( Frame, TranscriptionFrame, ) +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import HATHORA_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language @@ -27,12 +29,27 @@ from pipecat.utils.tracing.service_decorators import traced_stt from .utils import ConfigOption +@dataclass +class HathoraSTTSettings(STTSettings): + """Settings for the Hathora STT service. + + Parameters: + config: Some models support additional config, refer to + `docs `_ for each model to see + what is supported. + """ + + config: list[ConfigOption] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class HathoraSTTService(SegmentedSTTService): """This service supports several different speech-to-text models hosted by Hathora. [Documentation](https://models.hathora.dev) """ + _settings: HathoraSTTSettings + class InputParams(BaseModel): """Optional input parameters for Hathora STT configuration. @@ -72,24 +89,21 @@ class HathoraSTTService(SegmentedSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to the parent class. """ + params = params or HathoraSTTService.InputParams() + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, + settings=HathoraSTTSettings( + model=model, + language=params.language, + config=params.config, + ), **kwargs, ) - self._model = model self._api_key = api_key or os.getenv("HATHORA_API_KEY") self._base_url = base_url - params = params or HathoraSTTService.InputParams() - - self._settings = { - "language": params.language, - "config": params.config, - } - - self.set_model_name(model) - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -120,15 +134,14 @@ class HathoraSTTService(SegmentedSTTService): url = f"{self._base_url}" payload = { - "model": self._model, + "model": self._settings.model, } - if self._settings["language"] is not None: - payload["language"] = self._settings["language"] - if self._settings["config"] is not None: + if self._settings.language is not None: + payload["language"] = self._settings.language + if self._settings.config is not None: payload["model_config"] = [ - {"name": option.name, "value": option.value} - for option in self._settings["config"] + {"name": option.name, "value": option.value} for option in self._settings.config ] base64_audio = base64.b64encode(audio).decode("utf-8") @@ -147,7 +160,7 @@ class HathoraSTTService(SegmentedSTTService): if text: # Only yield non-empty text # Hathora's API currently doesn't return language info # so we default to the requested language or "en" - response_language = self._settings["language"] or "en" + response_language = self._settings.language or "en" await self._handle_transcription(text, True, response_language) yield TranscriptionFrame( text, diff --git a/src/pipecat/services/hathora/tts.py b/src/pipecat/services/hathora/tts.py index 80cbd4fe8..3fb9e747b 100644 --- a/src/pipecat/services/hathora/tts.py +++ b/src/pipecat/services/hathora/tts.py @@ -9,6 +9,7 @@ import io import os import wave +from dataclasses import dataclass, field from typing import AsyncGenerator, Optional, Tuple import aiohttp @@ -21,6 +22,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -45,12 +47,29 @@ def _decode_audio_payload( return audio_bytes, fallback_sample_rate, fallback_channels +@dataclass +class HathoraTTSSettings(TTSSettings): + """Settings for Hathora TTS service. + + Parameters: + speed: Speech speed multiplier (if supported by model). + config: Some models support additional config, refer to + [docs](https://models.hathora.dev) for each model to see + what is supported. + """ + + speed: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + config: list[ConfigOption] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class HathoraTTSService(TTSService): """This service supports several different text-to-speech models hosted by Hathora. [Documentation](https://models.hathora.dev) """ + _settings: HathoraTTSSettings + class InputParams(BaseModel): """Optional input parameters for Hathora TTS configuration. @@ -88,23 +107,21 @@ class HathoraTTSService(TTSService): params: Configuration parameters. **kwargs: Additional arguments passed to the parent class. """ - super().__init__( - sample_rate=sample_rate, - **kwargs, - ) - self._model = model - self._api_key = api_key or os.getenv("HATHORA_API_KEY") - self._base_url = base_url - params = params or HathoraTTSService.InputParams() - self._settings = { - "speed": params.speed, - "config": params.config, - } - - self.set_model_name(model) - self.set_voice(voice_id) + super().__init__( + sample_rate=sample_rate, + settings=HathoraTTSSettings( + model=model, + voice=voice_id, + language=None, # Not applicable here + speed=params.speed, + config=params.config, + ), + **kwargs, + ) + self._api_key = api_key or os.getenv("HATHORA_API_KEY") + self._base_url = base_url def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -131,16 +148,15 @@ class HathoraTTSService(TTSService): url = f"{self._base_url}" - payload = {"model": self._model, "text": text} + payload = {"model": self._settings.model, "text": text} - if self._voice_id is not None: - payload["voice"] = self._voice_id - if self._settings["speed"] is not None: - payload["speed"] = self._settings["speed"] - if self._settings["config"] is not None: + if self._settings.voice is not None: + payload["voice"] = self._settings.voice + if self._settings.speed is not None: + payload["speed"] = self._settings.speed + if self._settings.config is not None: payload["model_config"] = [ - {"name": option.name, "value": option.value} - for option in self._settings["config"] + {"name": option.name, "value": option.value} for option in self._settings.config ] yield TTSStartedFrame(context_id=context_id) diff --git a/src/pipecat/services/heygen/client.py b/src/pipecat/services/heygen/client.py index 4018d3858..6d45d6114 100644 --- a/src/pipecat/services/heygen/client.py +++ b/src/pipecat/services/heygen/client.py @@ -62,10 +62,12 @@ class HeyGenCallbacks(BaseModel): """Callback handlers for HeyGen events. Parameters: - on_participant_connected: Called when a participant connects - on_participant_disconnected: Called when a participant disconnects + on_connected: Called when the bot connects to the LiveKit room. + on_participant_connected: Called when a participant connects. + on_participant_disconnected: Called when a participant disconnects. """ + on_connected: Callable[[], Awaitable[None]] on_participant_connected: Callable[[str], Awaitable[None]] on_participant_disconnected: Callable[[str], Awaitable[None]] @@ -251,6 +253,7 @@ class HeyGenClient: logger.debug(f"HeyGenClient send_interval: {self._send_interval}") await self._ws_connect() await self._livekit_connect() + self._call_event_callback(self._callbacks.on_connected) async def stop(self) -> None: """Stop the client and terminate all connections. diff --git a/src/pipecat/services/heygen/video.py b/src/pipecat/services/heygen/video.py index b97f4a5ed..7f3624f35 100644 --- a/src/pipecat/services/heygen/video.py +++ b/src/pipecat/services/heygen/video.py @@ -128,6 +128,7 @@ class HeyGenVideoService(AIService): session_request=self._session_request, service_type=self._service_type, callbacks=HeyGenCallbacks( + on_connected=self._on_connected, on_participant_connected=self._on_participant_connected, on_participant_disconnected=self._on_participant_disconnected, ), @@ -144,6 +145,10 @@ class HeyGenVideoService(AIService): await self._client.cleanup() self._client = None + async def _on_connected(self): + """Handle bot connected to LiveKit room.""" + logger.info("HeyGen bot connected to LiveKit room") + async def _on_participant_connected(self, participant_id: str): """Handle participant connected events.""" logger.info(f"Participant connected {participant_id}") diff --git a/src/pipecat/services/hume/tts.py b/src/pipecat/services/hume/tts.py index 2d98e1f8c..2a075ab36 100644 --- a/src/pipecat/services/hume/tts.py +++ b/src/pipecat/services/hume/tts.py @@ -6,6 +6,8 @@ import base64 import os +import warnings +from dataclasses import dataclass, field from typing import Any, AsyncGenerator, Optional import httpx @@ -24,7 +26,8 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.tts_service import WordTTSService +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven +from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts try: @@ -46,7 +49,22 @@ DEFAULT_HEADERS = { } -class HumeTTSService(WordTTSService): +@dataclass +class HumeTTSSettings(TTSSettings): + """Settings for Hume TTS service. + + Parameters: + description: Natural-language acting directions (up to 100 characters). + speed: Speaking-rate multiplier (0.5-2.0). + trailing_silence: Seconds of silence to append at the end (0-5). + """ + + description: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + trailing_silence: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + +class HumeTTSService(TTSService): """Hume Octave Text-to-Speech service. Streams PCM audio via Hume's HTTP output streaming (JSON chunks) endpoint @@ -61,6 +79,8 @@ class HumeTTSService(WordTTSService): - Provides metrics for Time To First Byte (TTFB) and TTS usage. """ + _settings: HumeTTSSettings + class InputParams(BaseModel): """Optional synthesis parameters for Hume TTS. @@ -101,11 +121,21 @@ class HumeTTSService(WordTTSService): f"Hume TTS streams at {HUME_SAMPLE_RATE} Hz; configured sample_rate={sample_rate}" ) - # WordTTSService sets push_text_frames=False by default, which we want + params = params or HumeTTSService.InputParams() + super().__init__( sample_rate=sample_rate, push_text_frames=False, push_stop_frames=True, + supports_word_timestamps=True, + settings=HumeTTSSettings( + model=None, + voice=voice_id, + language=None, # Not applicable here + description=params.description, + speed=params.speed, + trailing_silence=params.trailing_silence, + ), **kwargs, ) @@ -114,10 +144,6 @@ class HumeTTSService(WordTTSService): self._http_client = httpx.AsyncClient(headers=DEFAULT_HEADERS) self._client = AsyncHumeClient(api_key=api_key, httpx_client=self._http_client) - self._params = params or HumeTTSService.InputParams() - - # Store voice in the base class (mirrors other services) - self.set_voice(voice_id) self._audio_bytes = b"" @@ -183,7 +209,10 @@ class HumeTTSService(WordTTSService): await self.add_word_timestamps([("Reset", 0)]) async def update_setting(self, key: str, value: Any) -> None: - """Runtime updates via `TTSUpdateSettingsFrame`. + """Runtime updates via key/value pair. + + .. deprecated:: 0.0.104 + Use ``TTSUpdateSettingsFrame(delta=HumeTTSSettings(...))`` instead. Args: key: The name of the setting to update. Recognized keys are: @@ -193,20 +222,29 @@ class HumeTTSService(WordTTSService): - "trailing_silence" value: The new value for the setting. """ - key_l = (key or "").lower() + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'update_setting' is deprecated, use " + "'TTSUpdateSettingsFrame(delta=HumeTTSSettings(...))' instead.", + DeprecationWarning, + stacklevel=2, + ) - if key_l == "voice_id": - self.set_voice(str(value)) - logger.debug(f"HumeTTSService voice_id set to: {self.voice}") - elif key_l == "description": - self._params.description = None if value is None else str(value) - elif key_l == "speed": - self._params.speed = None if value is None else float(value) - elif key_l == "trailing_silence": - self._params.trailing_silence = None if value is None else float(value) - else: - # Defer unknown keys to the base class - await super().update_setting(key, value) + key_l = (key or "").lower() + known_keys = {"voice_id", "voice", "description", "speed", "trailing_silence"} + + if key_l in known_keys: + kwargs: dict[str, Any] = {} + if key_l in ("voice_id", "voice"): + kwargs["voice"] = str(value) + elif key_l == "description": + kwargs["description"] = None if value is None else str(value) + elif key_l == "speed": + kwargs["speed"] = None if value is None else float(value) + elif key_l == "trailing_silence": + kwargs["trailing_silence"] = None if value is None else float(value) + await self._update_settings(HumeTTSSettings(**kwargs)) @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -226,14 +264,14 @@ class HumeTTSService(WordTTSService): # Build the request payload utterance_kwargs: dict[str, Any] = { "text": text, - "voice": PostedUtteranceVoiceWithId(id=self._voice_id), + "voice": PostedUtteranceVoiceWithId(id=self._settings.voice), } - if self._params.description is not None: - utterance_kwargs["description"] = self._params.description - if self._params.speed is not None: - utterance_kwargs["speed"] = self._params.speed - if self._params.trailing_silence is not None: - utterance_kwargs["trailing_silence"] = self._params.trailing_silence + if self._settings.description is not None: + utterance_kwargs["description"] = self._settings.description + if self._settings.speed is not None: + utterance_kwargs["speed"] = self._settings.speed + if self._settings.trailing_silence is not None: + utterance_kwargs["trailing_silence"] = self._settings.trailing_silence utterance = PostedUtterance(**utterance_kwargs) @@ -257,7 +295,7 @@ class HumeTTSService(WordTTSService): # Use version "2" by default if no description is provided # Version "1" is needed when description is used - version = "1" if self._params.description is not None else "2" + version = "1" if self._settings.description is not None else "2" # Track the duration of this utterance based on the last timestamp utterance_duration = 0.0 diff --git a/src/pipecat/services/image_service.py b/src/pipecat/services/image_service.py index 58ab58fa4..f99909444 100644 --- a/src/pipecat/services/image_service.py +++ b/src/pipecat/services/image_service.py @@ -11,11 +11,12 @@ text prompts into images. """ from abc import abstractmethod -from typing import AsyncGenerator +from typing import AsyncGenerator, Optional from pipecat.frames.frames import Frame, TextFrame from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService +from pipecat.services.settings import ImageGenSettings class ImageGenService(AIService): @@ -26,13 +27,20 @@ class ImageGenService(AIService): generation functionality using their specific AI service. """ - def __init__(self, **kwargs): + def __init__(self, *, settings: Optional[ImageGenSettings] = None, **kwargs): """Initialize the image generation service. Args: + settings: The runtime-updatable settings for the image generation service. **kwargs: Additional arguments passed to the parent AIService. """ - super().__init__(**kwargs) + super().__init__( + settings=settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or ImageGenSettings(), + **kwargs, + ) # Renders the image. Returns an Image object. @abstractmethod diff --git a/src/pipecat/services/inworld/tts.py b/src/pipecat/services/inworld/tts.py index adf72ce9f..d3f64c16f 100644 --- a/src/pipecat/services/inworld/tts.py +++ b/src/pipecat/services/inworld/tts.py @@ -17,7 +17,8 @@ import asyncio import base64 import json import uuid -from typing import Any, AsyncGenerator, Dict, List, Literal, Optional, Tuple +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, ClassVar, Dict, List, Literal, Mapping, Optional, Tuple import aiohttp import websockets @@ -28,6 +29,8 @@ from pipecat import version as pipecat_version USER_AGENT = f"pipecat/{pipecat_version()}" from pydantic import BaseModel +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven + try: from websockets.asyncio.client import connect as websocket_connect from websockets.protocol import State @@ -48,17 +51,66 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.tts_service import AudioContextWordTTSService, WordTTSService +from pipecat.services.tts_service import AudioContextTTSService, TextAggregationMode, TTSService from pipecat.utils.tracing.service_decorators import traced_tts -class InworldHttpTTSService(WordTTSService): +@dataclass +class InworldTTSSettings(TTSSettings): + """Settings for Inworld TTS services. + + Parameters: + audio_encoding: Audio encoding format (e.g. LINEAR16). + audio_sample_rate: Audio sample rate in Hz. + speaking_rate: Speaking rate for speech synthesis. + temperature: Temperature for speech synthesis. + auto_mode: Whether to use auto mode. Recommended when texts are sent + in full sentences/phrases. When enabled, the server controls + flushing of buffered text to achieve minimal latency while + maintaining high quality audio output. If None (default), + automatically set based on aggregate_sentences. + apply_text_normalization: Whether to apply text normalization. + timestamp_transport_strategy: Strategy for timestamp transport ("ASYNC" or "SYNC"). + """ + + audio_encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audio_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaking_rate: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + auto_mode: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + apply_text_normalization: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + timestamp_transport_strategy: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + _aliases: ClassVar[Dict[str, str]] = { + "voice_id": "voice", + "voiceId": "voice", + "modelId": "model", + "applyTextNormalization": "apply_text_normalization", + "autoMode": "auto_mode", + "timestampTransportStrategy": "timestamp_transport_strategy", + } + + @classmethod + def from_mapping(cls, settings: Mapping[str, Any]) -> "InworldTTSSettings": + """Construct settings from a plain dict, destructuring legacy nested ``audioConfig``.""" + flat = dict(settings) + nested = flat.pop("audioConfig", None) + if isinstance(nested, dict): + flat.setdefault("audio_encoding", nested.get("audioEncoding")) + flat.setdefault("audio_sample_rate", nested.get("sampleRateHertz")) + flat.setdefault("speaking_rate", nested.get("speakingRate")) + return super().from_mapping(flat) + + +class InworldHttpTTSService(TTSService): """Inworld AI HTTP-based TTS service. Supports both streaming and non-streaming modes via the `streaming` parameter. Outputs LINEAR16 audio at configurable sample rates with word-level timestamps. """ + _settings: InworldTTSSettings + class InputParams(BaseModel): """Input parameters for Inworld TTS configuration. @@ -98,15 +150,28 @@ class InworldHttpTTSService(WordTTSService): params: Input parameters for Inworld TTS configuration. **kwargs: Additional arguments passed to the parent class. """ + params = params or InworldHttpTTSService.InputParams() + super().__init__( push_text_frames=False, push_stop_frames=True, + supports_word_timestamps=True, sample_rate=sample_rate, + settings=InworldTTSSettings( + model=model, + voice=voice_id, + language=None, + audio_encoding=encoding, + audio_sample_rate=0, + speaking_rate=params.speaking_rate, + temperature=params.temperature, + timestamp_transport_strategy=params.timestamp_transport_strategy, + auto_mode=None, # Not applicable for HTTP TTS + apply_text_normalization=None, # Not applicable for HTTP TTS + ), **kwargs, ) - params = params or InworldHttpTTSService.InputParams() - self._api_key = api_key self._session = aiohttp_session self._streaming = streaming @@ -117,27 +182,8 @@ class InworldHttpTTSService(WordTTSService): else: self._base_url = "https://api.inworld.ai/tts/v1/voice" - self._settings = { - "voiceId": voice_id, - "modelId": model, - "audioConfig": { - "audioEncoding": encoding, - "sampleRateHertz": 0, - }, - } - - if params.temperature is not None: - self._settings["temperature"] = params.temperature - if params.speaking_rate is not None: - self._settings["audioConfig"]["speakingRate"] = params.speaking_rate - if params.timestamp_transport_strategy is not None: - self._settings["timestampTransportStrategy"] = params.timestamp_transport_strategy - self._cumulative_time = 0.0 - self.set_voice(voice_id) - self.set_model_name(model) - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -153,7 +199,7 @@ class InworldHttpTTSService(WordTTSService): frame: The start frame. """ await super().start(frame) - self._settings["audioConfig"]["sampleRateHertz"] = self.sample_rate + self._settings.audio_sample_rate = self.sample_rate async def stop(self, frame: EndFrame): """Stop the Inworld TTS service. @@ -232,20 +278,27 @@ class InworldHttpTTSService(WordTTSService): """ logger.debug(f"{self}: Generating TTS [{text}] (streaming={self._streaming})") + audio_config = { + "audioEncoding": self._settings.audio_encoding, + "sampleRateHertz": self._settings.audio_sample_rate, + } + if self._settings.speaking_rate is not None: + audio_config["speakingRate"] = self._settings.speaking_rate + payload = { "text": text, - "voiceId": self._settings["voiceId"], - "modelId": self._settings["modelId"], - "audioConfig": self._settings["audioConfig"], + "voiceId": self._settings.voice, + "modelId": self._settings.model, + "audioConfig": audio_config, } - if "temperature" in self._settings: - payload["temperature"] = self._settings["temperature"] + if self._settings.temperature is not None: + payload["temperature"] = self._settings.temperature # Use WORD timestamps for simplicity and correct spacing/capitalization payload["timestampType"] = self._timestamp_type - if "timestampTransportStrategy" in self._settings: - payload["timestampTransportStrategy"] = self._settings["timestampTransportStrategy"] + if self._settings.timestamp_transport_strategy is not None: + payload["timestampTransportStrategy"] = self._settings.timestamp_transport_strategy request_id = str(uuid.uuid4()) headers = { @@ -411,7 +464,7 @@ class InworldHttpTTSService(WordTTSService): ) -class InworldTTSService(AudioContextWordTTSService): +class InworldTTSService(AudioContextTTSService): """Inworld AI WebSocket-based TTS service. Uses bidirectional WebSocket for lower latency streaming. Supports multiple @@ -419,6 +472,8 @@ class InworldTTSService(AudioContextWordTTSService): with word-level timestamps. """ + _settings: InworldTTSSettings + class InputParams(BaseModel): """Input parameters for Inworld WebSocket TTS configuration. @@ -454,7 +509,8 @@ class InworldTTSService(AudioContextWordTTSService): sample_rate: Optional[int] = None, encoding: str = "LINEAR16", params: InputParams = None, - aggregate_sentences: bool = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, append_trailing_space: bool = True, **kwargs: Any, ): @@ -468,48 +524,45 @@ class InworldTTSService(AudioContextWordTTSService): sample_rate: Audio sample rate in Hz. encoding: Audio encoding format. params: Input parameters for Inworld WebSocket TTS configuration. - aggregate_sentences: Whether to aggregate sentences before synthesis. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + + text_aggregation_mode: How to aggregate text before synthesis. append_trailing_space: Whether to append a trailing space to text before sending to TTS. **kwargs: Additional arguments passed to the parent class. """ + params = params or InworldTTSService.InputParams() + super().__init__( push_text_frames=False, push_stop_frames=True, pause_frame_processing=True, + supports_word_timestamps=True, sample_rate=sample_rate, aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, append_trailing_space=append_trailing_space, + settings=InworldTTSSettings( + model=model, + voice=voice_id, + language=None, + audio_encoding=encoding, + audio_sample_rate=0, + speaking_rate=params.speaking_rate, + temperature=params.temperature, + apply_text_normalization=params.apply_text_normalization, + timestamp_transport_strategy=params.timestamp_transport_strategy, + auto_mode=params.auto_mode if params.auto_mode is not None else aggregate_sentences, + ), **kwargs, ) - params = params or InworldTTSService.InputParams() - self._api_key = api_key self._url = url - self._settings: Dict[str, Any] = { - "voiceId": voice_id, - "modelId": model, - "audioConfig": { - "audioEncoding": encoding, - "sampleRateHertz": 0, - }, - } self._timestamp_type = "WORD" - if params.temperature is not None: - self._settings["temperature"] = params.temperature - if params.speaking_rate is not None: - self._settings["audioConfig"]["speakingRate"] = params.speaking_rate - if params.apply_text_normalization is not None: - self._settings["applyTextNormalization"] = params.apply_text_normalization - if params.timestamp_transport_strategy is not None: - self._settings["timestampTransportStrategy"] = params.timestamp_transport_strategy - - if params.auto_mode is not None: - self._settings["autoMode"] = params.auto_mode - else: - self._settings["autoMode"] = aggregate_sentences - self._buffer_settings = { "maxBufferDelayMs": params.max_buffer_delay_ms, "bufferCharThreshold": params.buffer_char_threshold, @@ -526,9 +579,6 @@ class InworldTTSService(AudioContextWordTTSService): # Track the end time of the last word in the current generation self._generation_end_time = 0.0 - self.set_voice(voice_id) - self.set_model_name(model) - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -544,7 +594,7 @@ class InworldTTSService(AudioContextWordTTSService): frame: The start frame. """ await super().start(frame) - self._settings["audioConfig"]["sampleRateHertz"] = self.sample_rate + self._settings.audio_sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -633,28 +683,23 @@ class InworldTTSService(AudioContextWordTTSService): return word_times - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle an interruption from the Inworld WebSocket TTS service. - - Args: - frame: The interruption frame. - direction: The direction of the interruption. - """ - old_context_id = self.get_active_audio_context_id() - logger.trace(f"{self}: Handling interruption, old context: {old_context_id}") - - await super()._handle_interruption(frame, direction) - - if old_context_id and self._websocket: - logger.trace(f"{self}: Closing context {old_context_id} due to interruption") + async def _close_context(self, context_id: str): + if context_id and self._websocket: + logger.info(f"{self}: Closing context {context_id} due to interruption or completion") try: - await self._send_close_context(old_context_id) + await self._send_close_context(context_id) except Exception as e: await self.push_error(error_msg=f"Unknown error occurred: {e}", exception=e) - self._cumulative_time = 0.0 self._generation_end_time = 0.0 - logger.trace(f"{self}: Interruption handled, context reset to None") + + async def on_audio_context_interrupted(self, context_id: str): + """Callback invoked when an audio context has been interrupted.""" + await self._close_context(context_id) + + async def on_audio_context_completed(self, context_id: str): + """Callback invoked when an audio context has been completed.""" + await self._close_context(context_id) def _get_websocket(self): """Get the websocket for the Inworld WebSocket TTS service. @@ -700,6 +745,21 @@ class InworldTTSService(AudioContextWordTTSService): await self._disconnect_websocket() + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(delta) + + if not changed: + return changed + + await self._disconnect() + await self._connect() + + return changed + async def _connect_websocket(self): """Connect to the Inworld WebSocket TTS service. @@ -883,22 +943,29 @@ class InworldTTSService(AudioContextWordTTSService): Args: context_id: The context ID. """ + audio_config = { + "audioEncoding": self._settings.audio_encoding, + "sampleRateHertz": self._settings.audio_sample_rate, + } + if self._settings.speaking_rate is not None: + audio_config["speakingRate"] = self._settings.speaking_rate + create_config: Dict[str, Any] = { - "voiceId": self._settings["voiceId"], - "modelId": self._settings["modelId"], - "audioConfig": self._settings["audioConfig"], + "voiceId": self._settings.voice, + "modelId": self._settings.model, + "audioConfig": audio_config, } - if "temperature" in self._settings: - create_config["temperature"] = self._settings["temperature"] - if "applyTextNormalization" in self._settings: - create_config["applyTextNormalization"] = self._settings["applyTextNormalization"] - if "autoMode" in self._settings: - create_config["autoMode"] = self._settings["autoMode"] - if "timestampTransportStrategy" in self._settings: - create_config["timestampTransportStrategy"] = self._settings[ - "timestampTransportStrategy" - ] + if self._settings.temperature is not None: + create_config["temperature"] = self._settings.temperature + if self._settings.apply_text_normalization is not None: + create_config["applyTextNormalization"] = self._settings.apply_text_normalization + if self._settings.auto_mode is not None: + create_config["autoMode"] = self._settings.auto_mode + if self._settings.timestamp_transport_strategy is not None: + create_config["timestampTransportStrategy"] = ( + self._settings.timestamp_transport_strategy + ) # Set buffer settings for timely audio generation. # Use provided values or defaults that work well for streaming LLM output. diff --git a/src/pipecat/services/kokoro/tts.py b/src/pipecat/services/kokoro/tts.py index 49ede2409..4b35fa46d 100644 --- a/src/pipecat/services/kokoro/tts.py +++ b/src/pipecat/services/kokoro/tts.py @@ -7,6 +7,7 @@ """Kokoro TTS service implementation using kokoro-onnx.""" import os +from dataclasses import dataclass, field from pathlib import Path from typing import AsyncGenerator, Optional @@ -22,6 +23,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -87,6 +89,17 @@ def language_to_kokoro_language(language: Language) -> str: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class KokoroTTSSettings(TTSSettings): + """Settings for the Kokoro TTS service. + + Parameters: + lang_code: Kokoro language code for synthesis. + """ + + lang_code: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class KokoroTTSService(TTSService): """Kokoro TTS service implementation. @@ -94,6 +107,8 @@ class KokoroTTSService(TTSService): Automatically downloads model files on first use. """ + _settings: KokoroTTSSettings + class InputParams(BaseModel): """Input parameters for Kokoro TTS configuration. @@ -122,11 +137,18 @@ class KokoroTTSService(TTSService): **kwargs: Additional arguments passed to parent `TTSService`. """ - super().__init__(**kwargs) - params = params or KokoroTTSService.InputParams() - self._voice_id = voice_id + super().__init__( + settings=KokoroTTSSettings( + model=None, + voice=voice_id, + language=language_to_kokoro_language(params.language), + lang_code=language_to_kokoro_language(params.language), + ), + **kwargs, + ) + self._lang_code = language_to_kokoro_language(params.language) model = Path(model_path) if model_path else KOKORO_CACHE_DIR / "kokoro-v1.0.onnx" @@ -161,7 +183,7 @@ class KokoroTTSService(TTSService): yield TTSStartedFrame(context_id=context_id) stream = self._kokoro.create_stream( - text, voice=self._voice_id, lang=self._lang_code, speed=1.0 + text, voice=self._settings.voice, lang=self._lang_code, speed=1.0 ) async for samples, sample_rate in stream: diff --git a/src/pipecat/services/llm_service.py b/src/pipecat/services/llm_service.py index c8af00b80..da0d57d66 100644 --- a/src/pipecat/services/llm_service.py +++ b/src/pipecat/services/llm_service.py @@ -44,6 +44,7 @@ from pipecat.frames.frames import ( LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMTextFrame, + LLMUpdateSettingsFrame, StartFrame, UserImageRequestFrame, ) @@ -58,8 +59,10 @@ from pipecat.processors.aggregators.llm_response import ( from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService +from pipecat.services.settings import LLMSettings from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionLLMServiceMixin from pipecat.utils.context.llm_context_summarization import ( + DEFAULT_SUMMARIZATION_TIMEOUT, LLMContextSummarizationUtil, ) @@ -172,12 +175,18 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): logger.info(f"Starting {len(function_calls)} function calls") """ + _settings: LLMSettings + # OpenAILLMAdapter is used as the default adapter since it aligns with most LLM implementations. # However, subclasses should override this with a more specific adapter when necessary. adapter_class: Type[BaseLLMAdapter] = OpenAILLMAdapter def __init__( - self, run_in_parallel: bool = True, function_call_timeout_secs: float = 10.0, **kwargs + self, + run_in_parallel: bool = True, + function_call_timeout_secs: float = 10.0, + settings: Optional[LLMSettings] = None, + **kwargs, ): """Initialize the LLM service. @@ -186,10 +195,17 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): Defaults to True. function_call_timeout_secs: Timeout in seconds for deferred function calls. Defaults to 10.0 seconds. + settings: The runtime-updatable settings for the LLM service. **kwargs: Additional arguments passed to the parent AIService. """ - super().__init__(**kwargs) + super().__init__( + settings=settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or LLMSettings(), + **kwargs, + ) self._run_in_parallel = run_in_parallel self._function_call_timeout_secs = function_call_timeout_secs self._filter_incomplete_user_turns: bool = False @@ -307,34 +323,30 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): await self._cancel_sequential_runner_task() await self._cancel_summary_task() - async def _update_settings(self, settings: Mapping[str, Any]): - """Update LLM service settings. - - Handles turn completion settings specially since they are not model - parameters and should not be passed to the underlying LLM API. + async def _update_settings(self, delta: LLMSettings) -> dict[str, Any]: + """Apply a settings delta, handling turn-completion fields. Args: - settings: Dictionary of settings to update. - """ - # Turn completion settings to extract (not model parameters) - turn_completion_keys = {"filter_incomplete_user_turns", "user_turn_completion_config"} + delta: An LLM settings delta. - # Handle turn completion settings - if "filter_incomplete_user_turns" in settings: - self._filter_incomplete_user_turns = settings["filter_incomplete_user_turns"] + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(delta) + + if "filter_incomplete_user_turns" in changed: + self._filter_incomplete_user_turns = ( + self._settings.filter_incomplete_user_turns or False + ) logger.info( - f"{self}: Incomplete turn filtering {'enabled' if self._filter_incomplete_user_turns else 'disabled'}" + f"{self}: Incomplete turn filtering " + f"{'enabled' if self._filter_incomplete_user_turns else 'disabled'}" ) - # Configure the mixin with config object - if self._filter_incomplete_user_turns and "user_turn_completion_config" in settings: - self.set_user_turn_completion_config(settings["user_turn_completion_config"]) + if "user_turn_completion_config" in changed and self._filter_incomplete_user_turns: + self.set_user_turn_completion_config(self._settings.user_turn_completion_config) - # Remove turn completion settings before passing to parent - settings = {k: v for k, v in settings.items() if k not in turn_completion_keys} - - # Let the parent handle remaining model parameters - await super()._update_settings(settings) + return changed async def process_frame(self, frame: Frame, direction: FrameDirection): """Process a frame. @@ -349,6 +361,21 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): await self._handle_interruptions(frame) elif isinstance(frame, LLMConfigureOutputFrame): self._skip_tts = frame.skip_tts + elif isinstance(frame, LLMUpdateSettingsFrame): + if frame.delta is not None: + await self._update_settings(frame.delta) + elif frame.settings: + # Backward-compatible path: convert legacy dict to settings object. + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Passing a dict via LLMUpdateSettingsFrame(settings={...}) is deprecated " + "since 0.0.104, use LLMUpdateSettingsFrame(delta=LLMSettings(...)) instead.", + DeprecationWarning, + stacklevel=2, + ) + delta = type(self._settings).from_mapping(frame.settings) + await self._update_settings(delta) elif isinstance(frame, LLMContextSummaryRequestFrame): await self._handle_summary_request(frame) @@ -410,8 +437,15 @@ class LLMService(UserTurnCompletionLLMServiceMixin, AIService): last_index = -1 error = None + timeout = frame.summarization_timeout or DEFAULT_SUMMARIZATION_TIMEOUT + try: - summary, last_index = await self._generate_summary(frame) + summary, last_index = await asyncio.wait_for( + self._generate_summary(frame), + timeout=timeout, + ) + except asyncio.TimeoutError: + await self.push_error(error_msg=f"Context summarization timed out after {timeout}s") except Exception as e: error = f"Error generating context summary: {e}" await self.push_error(error, exception=e) diff --git a/src/pipecat/services/lmnt/tts.py b/src/pipecat/services/lmnt/tts.py index 4c34e28d5..a2c500ca2 100644 --- a/src/pipecat/services/lmnt/tts.py +++ b/src/pipecat/services/lmnt/tts.py @@ -7,7 +7,8 @@ """LMNT text-to-speech service implementation.""" import json -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional from loguru import logger @@ -23,6 +24,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import InterruptibleTTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -71,6 +73,17 @@ def language_to_lmnt_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class LmntTTSSettings(TTSSettings): + """Settings for LMNT TTS service. + + Parameters: + format: Audio output format. Defaults to "raw". + """ + + format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class LmntTTSService(InterruptibleTTSService): """LMNT real-time text-to-speech service. @@ -79,6 +92,8 @@ class LmntTTSService(InterruptibleTTSService): language settings. """ + _settings: LmntTTSSettings + def __init__( self, *, @@ -103,16 +118,16 @@ class LmntTTSService(InterruptibleTTSService): push_stop_frames=True, pause_frame_processing=True, sample_rate=sample_rate, + settings=LmntTTSSettings( + model=model, + voice=voice_id, + language=self.language_to_service_language(language), + format="raw", + ), **kwargs, ) self._api_key = api_key - self.set_voice(voice_id) - self.set_model_name(model) - self._settings = { - "language": self.language_to_service_language(language), - "format": "raw", # Use raw format for direct PCM data - } self._receive_task = None self._context_id: Optional[str] = None @@ -190,6 +205,23 @@ class LmntTTSService(InterruptibleTTSService): await self._disconnect_websocket() + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta. + + Args: + delta: A :class:`TTSSettings` (or ``LmntTTSSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(delta) + + if changed: + await self._disconnect() + await self._connect() + + return changed + async def _connect_websocket(self): """Connect to LMNT websocket.""" try: @@ -201,11 +233,11 @@ class LmntTTSService(InterruptibleTTSService): # Build initial connection message init_msg = { "X-API-Key": self._api_key, - "voice": self._voice_id, - "format": self._settings["format"], + "voice": self._settings.voice, + "format": self._settings.format, "sample_rate": self.sample_rate, - "language": self._settings["language"], - "model": self.model_name, + "language": self._settings.language, + "model": self._settings.model, } # Connect to LMNT's websocket directly diff --git a/src/pipecat/services/minimax/tts.py b/src/pipecat/services/minimax/tts.py index 7284d9630..116d24a34 100644 --- a/src/pipecat/services/minimax/tts.py +++ b/src/pipecat/services/minimax/tts.py @@ -11,7 +11,8 @@ for streaming text-to-speech synthesis. """ import json -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, ClassVar, Dict, Mapping, Optional import aiohttp from loguru import logger @@ -25,6 +26,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -85,6 +87,69 @@ def language_to_minimax_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class MiniMaxTTSSettings(TTSSettings): + """Settings for MiniMax TTS service. + + Parameters: + stream: Whether to use streaming mode. + speed: Speech speed (range: 0.5 to 2.0). + volume: Speech volume (range: 0 to 10). + pitch: Pitch adjustment (range: -12 to 12). + emotion: Emotional tone (options: "happy", "sad", "angry", "fearful", + "disgusted", "surprised", "calm", "fluent"). + text_normalization: Enable text normalization (Chinese/English). + latex_read: Enable LaTeX formula reading. + audio_bitrate: Audio bitrate in bps. + audio_format: Audio output format. + audio_channel: Number of audio channels. + audio_sample_rate: Audio sample rate in Hz. + language_boost: Language boost string for multilingual support. + """ + + stream: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + volume: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pitch: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + emotion: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + text_normalization: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + latex_read: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audio_bitrate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audio_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audio_channel: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + audio_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language_boost: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} + + @classmethod + def from_mapping(cls, settings: Mapping[str, Any]) -> "MiniMaxTTSSettings": + """Construct settings from a plain dict, destructuring legacy nested dicts. + + Handles ``voice_setting`` (with ``vol`` → ``volume`` rename) and + ``audio_setting`` (with prefixed field mapping). + """ + flat = dict(settings) + + voice = flat.pop("voice_setting", None) + if isinstance(voice, dict): + flat.setdefault("speed", voice.get("speed")) + flat.setdefault("volume", voice.get("vol")) + flat.setdefault("pitch", voice.get("pitch")) + flat.setdefault("emotion", voice.get("emotion")) + flat.setdefault("text_normalization", voice.get("text_normalization")) + flat.setdefault("latex_read", voice.get("latex_read")) + + audio = flat.pop("audio_setting", None) + if isinstance(audio, dict): + flat.setdefault("audio_bitrate", audio.get("bitrate")) + flat.setdefault("audio_format", audio.get("format")) + flat.setdefault("audio_channel", audio.get("channel")) + flat.setdefault("audio_sample_rate", audio.get("sample_rate")) + + return super().from_mapping(flat) + + class MiniMaxHttpTTSService(TTSService): """Text-to-speech service using MiniMax's T2A (Text-to-Audio) API. @@ -96,6 +161,8 @@ class MiniMaxHttpTTSService(TTSService): https://www.minimax.io/platform/document/T2A%20V2?key=66719005a427f0c8a5701643 """ + _settings: MiniMaxTTSSettings + class InputParams(BaseModel): """Configuration parameters for MiniMax TTS. @@ -160,41 +227,40 @@ class MiniMaxHttpTTSService(TTSService): params: Additional configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or MiniMaxHttpTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=MiniMaxTTSSettings( + model=model, + voice=voice_id, + language=None, + stream=True, + speed=params.speed, + volume=params.volume, + pitch=params.pitch, + language_boost=None, + emotion=None, + text_normalization=None, + latex_read=None, + audio_bitrate=128000, + audio_format="pcm", + audio_channel=1, + audio_sample_rate=0, + ), + **kwargs, + ) + self._api_key = api_key self._group_id = group_id self._base_url = f"{base_url}?GroupId={group_id}" self._session = aiohttp_session - self._model_name = model - self._voice_id = voice_id - - # Create voice settings - self._settings = { - "stream": True, - "voice_setting": { - "speed": params.speed, - "vol": params.volume, - "pitch": params.pitch, - }, - "audio_setting": { - "bitrate": 128000, - "format": "pcm", - "channel": 1, - }, - } - - # Set voice and model - self.set_voice(voice_id) - self.set_model_name(model) # Add language boost if provided if params.language: service_lang = self.language_to_service_language(params.language) if service_lang: - self._settings["language_boost"] = service_lang + self._settings.language_boost = service_lang # Add optional emotion if provided if params.emotion: @@ -210,7 +276,7 @@ class MiniMaxHttpTTSService(TTSService): "fluent", ] if params.emotion in supported_emotions: - self._settings["voice_setting"]["emotion"] = params.emotion + self._settings.emotion = params.emotion else: logger.warning( f"Unsupported emotion: {params.emotion}. Supported emotions: {supported_emotions}" @@ -226,15 +292,15 @@ class MiniMaxHttpTTSService(TTSService): "Parameter `english_normalization` is deprecated and will be removed in a future version. Use `text_normalization` instead.", DeprecationWarning, ) - self._settings["voice_setting"]["text_normalization"] = params.english_normalization + self._settings.text_normalization = params.english_normalization # Add text_normalization if provided (corrected parameter name) if params.text_normalization is not None: - self._settings["voice_setting"]["text_normalization"] = params.text_normalization + self._settings.text_normalization = params.text_normalization # Add latex_read if provided if params.latex_read is not None: - self._settings["voice_setting"]["latex_read"] = params.latex_read + self._settings.latex_read = params.latex_read def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -255,24 +321,6 @@ class MiniMaxHttpTTSService(TTSService): """ return language_to_minimax_language(language) - def set_model_name(self, model: str): - """Set the TTS model to use. - - Args: - model: The model name to use for synthesis. - """ - self._model_name = model - - def set_voice(self, voice: str): - """Set the voice to use. - - Args: - voice: The voice identifier to use for synthesis. - """ - self._voice_id = voice - if "voice_setting" in self._settings: - self._settings["voice_setting"]["voice_id"] = voice - async def start(self, frame: StartFrame): """Start the MiniMax TTS service. @@ -280,7 +328,7 @@ class MiniMaxHttpTTSService(TTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["audio_setting"]["sample_rate"] = self.sample_rate + self._settings.audio_sample_rate = self.sample_rate logger.debug(f"MiniMax TTS initialized with sample_rate: {self.sample_rate}") @traced_tts @@ -302,10 +350,38 @@ class MiniMaxHttpTTSService(TTSService): "Authorization": f"Bearer {self._api_key}", } + # Build voice_setting dict for API + voice_setting = { + "voice_id": self._settings.voice, + "speed": self._settings.speed, + "vol": self._settings.volume, + "pitch": self._settings.pitch, + } + if self._settings.emotion is not None: + voice_setting["emotion"] = self._settings.emotion + if self._settings.text_normalization is not None: + voice_setting["text_normalization"] = self._settings.text_normalization + if self._settings.latex_read is not None: + voice_setting["latex_read"] = self._settings.latex_read + + # Build audio_setting dict for API + audio_setting = { + "bitrate": self._settings.audio_bitrate, + "format": self._settings.audio_format, + "channel": self._settings.audio_channel, + "sample_rate": self._settings.audio_sample_rate, + } + # Create payload from settings - payload = self._settings.copy() - payload["model"] = self._model_name - payload["text"] = text + payload = { + "stream": self._settings.stream, + "voice_setting": voice_setting, + "audio_setting": audio_setting, + "model": self._settings.model, + "text": text, + } + if self._settings.language_boost is not None: + payload["language_boost"] = self._settings.language_boost try: await self.start_ttfb_metrics() diff --git a/src/pipecat/services/mistral/llm.py b/src/pipecat/services/mistral/llm.py index 54361ef28..984ffb7dd 100644 --- a/src/pipecat/services/mistral/llm.py +++ b/src/pipecat/services/mistral/llm.py @@ -180,24 +180,24 @@ class MistralLLMService(OpenAILLMService): fixed_messages = self._apply_mistral_fixups(params_from_context["messages"]) params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "messages": fixed_messages, "tools": params_from_context["tools"], "tool_choice": params_from_context["tool_choice"], - "frequency_penalty": self._settings["frequency_penalty"], - "presence_penalty": self._settings["presence_penalty"], - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_tokens": self._settings["max_tokens"], + "frequency_penalty": self._settings.frequency_penalty, + "presence_penalty": self._settings.presence_penalty, + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_tokens": self._settings.max_tokens, } # Handle Mistral-specific parameter mapping # Mistral uses "random_seed" instead of "seed" - if self._settings["seed"]: - params["random_seed"] = self._settings["seed"] + if self._settings.seed: + params["random_seed"] = self._settings.seed # Add any extra parameters - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params diff --git a/src/pipecat/services/moondream/vision.py b/src/pipecat/services/moondream/vision.py index 6a180b4cb..53b98b77a 100644 --- a/src/pipecat/services/moondream/vision.py +++ b/src/pipecat/services/moondream/vision.py @@ -11,6 +11,7 @@ for image analysis and description generation. """ import asyncio +from dataclasses import dataclass from typing import AsyncGenerator, Optional from loguru import logger @@ -24,6 +25,7 @@ from pipecat.frames.frames import ( VisionFullResponseStartFrame, VisionTextFrame, ) +from pipecat.services.settings import VisionSettings from pipecat.services.vision_service import VisionService try: @@ -60,6 +62,15 @@ def detect_device(): return torch.device("cpu"), torch.float32 +@dataclass +class MoondreamSettings(VisionSettings): + """Settings for the Moondream vision service. + + Parameters: + model: Moondream model identifier. + """ + + class MoondreamService(VisionService): """Moondream vision-language model service. @@ -79,9 +90,7 @@ class MoondreamService(VisionService): use_cpu: Whether to force CPU usage instead of hardware acceleration. **kwargs: Additional arguments passed to the parent VisionService. """ - super().__init__(**kwargs) - - self.set_model_name(model) + super().__init__(settings=MoondreamSettings(model=model), **kwargs) if not use_cpu: device, dtype = detect_device() diff --git a/src/pipecat/services/neuphonic/tts.py b/src/pipecat/services/neuphonic/tts.py index 24eb05bd3..63411c3eb 100644 --- a/src/pipecat/services/neuphonic/tts.py +++ b/src/pipecat/services/neuphonic/tts.py @@ -13,7 +13,8 @@ text-to-speech API for real-time audio synthesis. import asyncio import base64 import json -from typing import Any, AsyncGenerator, Mapping, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional import aiohttp from loguru import logger @@ -34,7 +35,8 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.tts_service import InterruptibleTTSService, TTSService +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven +from pipecat.services.tts_service import InterruptibleTTSService, TextAggregationMode, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -72,6 +74,21 @@ def language_to_neuphonic_lang_code(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class NeuphonicTTSSettings(TTSSettings): + """Settings for Neuphonic TTS service. + + Parameters: + speed: Speech speed multiplier. Defaults to 1.0. + encoding: Audio encoding format. + sampling_rate: Audio sample rate. + """ + + speed: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + encoding: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + sampling_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class NeuphonicTTSService(InterruptibleTTSService): """Neuphonic real-time text-to-speech service using WebSocket streaming. @@ -80,6 +97,8 @@ class NeuphonicTTSService(InterruptibleTTSService): parameters for high-quality speech generation. """ + _settings: NeuphonicTTSSettings + class InputParams(BaseModel): """Input parameters for Neuphonic TTS configuration. @@ -100,7 +119,8 @@ class NeuphonicTTSService(InterruptibleTTSService): sample_rate: Optional[int] = 22050, encoding: str = "pcm_linear", params: Optional[InputParams] = None, - aggregate_sentences: Optional[bool] = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, **kwargs, ): """Initialize the Neuphonic TTS service. @@ -112,28 +132,35 @@ class NeuphonicTTSService(InterruptibleTTSService): sample_rate: Audio sample rate in Hz. Defaults to 22050. encoding: Audio encoding format. Defaults to "pcm_linear". params: Additional input parameters for TTS configuration. - aggregate_sentences: Whether to aggregate sentences within the TTSService. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + + text_aggregation_mode: How to aggregate text before synthesis. **kwargs: Additional arguments passed to parent InterruptibleTTSService. """ + params = params or NeuphonicTTSService.InputParams() + super().__init__( aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, push_stop_frames=True, stop_frame_timeout_s=2.0, sample_rate=sample_rate, + settings=NeuphonicTTSSettings( + model=None, + language=self.language_to_service_language(params.language), + speed=params.speed, + encoding=encoding, + sampling_rate=sample_rate, + voice=voice_id, + ), **kwargs, ) - params = params or NeuphonicTTSService.InputParams() - self._api_key = api_key self._url = url - self._settings = { - "lang_code": self.language_to_service_language(params.language), - "speed": params.speed, - "encoding": encoding, - "sampling_rate": sample_rate, - } - self.set_voice(voice_id) self._cumulative_time = 0 @@ -160,15 +187,14 @@ class NeuphonicTTSService(InterruptibleTTSService): """ return language_to_neuphonic_lang_code(language) - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect with new configuration.""" - if "voice_id" in settings: - self.set_voice(settings["voice_id"]) - - await super()._update_settings(settings) - await self._disconnect() - await self._connect() - logger.info(f"Switching TTS to settings: [{self._settings}]") + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect with new configuration.""" + changed = await super()._update_settings(delta) + if changed: + await self._disconnect() + await self._connect() + logger.info(f"Switching TTS to settings: [{self._settings}]") + return changed async def start(self, frame: StartFrame): """Start the Neuphonic TTS service. @@ -266,8 +292,11 @@ class NeuphonicTTSService(InterruptibleTTSService): logger.debug("Connecting to Neuphonic") tts_config = { - **self._settings, - "voice_id": self._voice_id, + "lang_code": self._settings.language, + "speed": self._settings.speed, + "encoding": self._settings.encoding, + "sampling_rate": self._settings.sampling_rate, + "voice_id": self._settings.voice, } query_params = [] @@ -275,7 +304,7 @@ class NeuphonicTTSService(InterruptibleTTSService): if value is not None: query_params.append(f"{key}={value}") - url = f"{self._url}/speak/{self._settings['lang_code']}" + url = f"{self._url}/speak/{self._settings.language}" if query_params: url += f"?{'&'.join(query_params)}" @@ -384,6 +413,8 @@ class NeuphonicHttpTTSService(TTSService): HTTP-based communication over WebSocket connections. """ + _settings: NeuphonicTTSSettings + class InputParams(BaseModel): """Input parameters for Neuphonic HTTP TTS configuration. @@ -419,17 +450,24 @@ class NeuphonicHttpTTSService(TTSService): params: Additional input parameters for TTS configuration. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or NeuphonicHttpTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=NeuphonicTTSSettings( + model=None, + voice=voice_id, + language=self.language_to_service_language(params.language) or "en", + speed=params.speed, + encoding=encoding, + sampling_rate=sample_rate, + ), + **kwargs, + ) + self._api_key = api_key self._session = aiohttp_session self._base_url = url.rstrip("/") - self._lang_code = self.language_to_service_language(params.language) or "en" - self._speed = params.speed - self._encoding = encoding - self.set_voice(voice_id) def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -513,7 +551,7 @@ class NeuphonicHttpTTSService(TTSService): """ logger.debug(f"Generating TTS: [{text}]") - url = f"{self._base_url}/sse/speak/{self._lang_code}" + url = f"{self._base_url}/sse/speak/{self._settings.language}" headers = { "X-API-KEY": self._api_key, @@ -522,14 +560,14 @@ class NeuphonicHttpTTSService(TTSService): payload = { "text": text, - "lang_code": self._lang_code, - "encoding": self._encoding, + "lang_code": self._settings.language, + "encoding": self._settings.encoding, "sampling_rate": self.sample_rate, - "speed": self._speed, + "speed": self._settings.speed, } - if self._voice_id: - payload["voice_id"] = self._voice_id + if self._settings.voice: + payload["voice_id"] = self._settings.voice try: await self.start_ttfb_metrics() diff --git a/src/pipecat/services/nvidia/stt.py b/src/pipecat/services/nvidia/stt.py index 8eb6d7bb5..950515096 100644 --- a/src/pipecat/services/nvidia/stt.py +++ b/src/pipecat/services/nvidia/stt.py @@ -8,7 +8,8 @@ import asyncio from concurrent.futures import CancelledError as FuturesCancelledError -from typing import AsyncGenerator, List, Mapping, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, List, Mapping, Optional from loguru import logger from pydantic import BaseModel @@ -22,6 +23,7 @@ from pipecat.frames.frames import ( StartFrame, TranscriptionFrame, ) +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import NVIDIA_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService, STTService from pipecat.transcriptions.language import Language, resolve_language @@ -89,6 +91,32 @@ def language_to_nvidia_riva_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class NvidiaSTTSettings(STTSettings): + """Settings for the NVIDIA Riva streaming STT service.""" + + pass + + +@dataclass +class NvidiaSegmentedSTTSettings(STTSettings): + """Settings for the NVIDIA Riva segmented STT service. + + Parameters: + profanity_filter: Whether to filter profanity from results. + automatic_punctuation: Whether to add automatic punctuation. + verbatim_transcripts: Whether to return verbatim transcripts. + boosted_lm_words: List of words to boost in language model. + boosted_lm_score: Score boost for specified words. + """ + + profanity_filter: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + automatic_punctuation: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + verbatim_transcripts: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + boosted_lm_words: List[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + boosted_lm_score: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class NvidiaSTTService(STTService): """Real-time speech-to-text service using NVIDIA Riva streaming ASR. @@ -97,6 +125,8 @@ class NvidiaSTTService(STTService): processing for low-latency applications. """ + _settings: NvidiaSTTSettings + class InputParams(BaseModel): """Configuration parameters for NVIDIA Riva STT service. @@ -134,19 +164,21 @@ class NvidiaSTTService(STTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to STTService. """ - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - params = params or NvidiaSTTService.InputParams() + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=NvidiaSTTSettings( + model=model_function_map.get("model_name"), + language=params.language, + ), + **kwargs, + ) + self._server = server self._api_key = api_key self._use_ssl = use_ssl - self._profanity_filter = False - self._automatic_punctuation = True - self._no_verbatim_transcripts = False - self._language_code = params.language - self._boosted_lm_words = None - self._boosted_lm_score = 4.0 self._start_history = -1 self._start_threshold = -1.0 self._stop_history = -1 @@ -156,17 +188,6 @@ class NvidiaSTTService(STTService): self._custom_configuration = "" self._function_id = model_function_map.get("function_id") - self._settings = { - "language": str(params.language), - "profanity_filter": self._profanity_filter, - "automatic_punctuation": self._automatic_punctuation, - "verbatim_transcripts": not self._no_verbatim_transcripts, - "boosted_lm_words": self._boosted_lm_words, - "boosted_lm_score": self._boosted_lm_score, - } - - self.set_model_name(model_function_map.get("model_name")) - self._asr_service = None self._queue = None self._config = None @@ -186,22 +207,18 @@ class NvidiaSTTService(STTService): config = riva.client.StreamingRecognitionConfig( config=riva.client.RecognitionConfig( encoding=riva.client.AudioEncoding.LINEAR_PCM, - language_code=self._language_code, + language_code=self._settings.language, model="", max_alternatives=1, - profanity_filter=self._profanity_filter, - enable_automatic_punctuation=self._automatic_punctuation, - verbatim_transcripts=not self._no_verbatim_transcripts, + profanity_filter=False, + enable_automatic_punctuation=True, + verbatim_transcripts=True, sample_rate_hertz=self.sample_rate, audio_channel_count=1, ), interim_results=True, ) - riva.client.add_word_boosting_to_config( - config, self._boosted_lm_words, self._boosted_lm_score - ) - riva.client.add_endpoint_parameters_to_config( config, self._start_history, @@ -226,18 +243,31 @@ class NvidiaSTTService(STTService): async def set_model(self, model: str): """Set the ASR model for transcription. + .. deprecated:: 0.0.104 + Model cannot be changed after initialization for NVIDIA Riva streaming STT. + Set model and function id in the constructor instead, e.g.:: + + NvidiaSTTService( + api_key=..., + model_function_map={"function_id": "", "model_name": ""}, + ) + Args: model: Model name to set. - - Note: - Model cannot be changed after initialization. Use model_function_map - parameter in constructor instead. """ - logger.warning(f"Cannot set model after initialization. Set model and function id like so:") - example = {"function_id": "", "model_name": ""} - logger.warning( - f"{self.__class__.__name__}(api_key=, model_function_map={example})" - ) + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_model' is deprecated. Model cannot be changed after initialization" + " for NVIDIA Riva streaming STT. Set model and function id in the" + " constructor instead, e.g.:" + " NvidiaSTTService(api_key=..., model_function_map=" + "{'function_id': '', 'model_name': ''})", + DeprecationWarning, + stacklevel=2, + ) async def start(self, frame: StartFrame): """Start the NVIDIA Riva STT service and initialize streaming configuration. @@ -254,7 +284,7 @@ class NvidiaSTTService(STTService): if not self._thread_task: self._thread_task = self.create_task(self._thread_task_handler()) - logger.debug(f"Initialized NvidiaSTTService with model: {self.model_name}") + logger.debug(f"Initialized NvidiaSTTService with model: {self._settings.model}") async def stop(self, frame: EndFrame): """Stop the NVIDIA Riva STT service and clean up resources. @@ -318,14 +348,14 @@ class NvidiaSTTService(STTService): transcript, self._user_id, time_now_iso8601(), - self._language_code, + self._settings.language, result=result, ) ) await self._handle_transcription( transcript=transcript, is_final=result.is_final, - language=self._language_code, + language=self._settings.language, ) else: await self.push_frame( @@ -333,7 +363,7 @@ class NvidiaSTTService(STTService): transcript, self._user_id, time_now_iso8601(), - self._language_code, + self._settings.language, result=result, ) ) @@ -386,6 +416,8 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): audio buffering and speech detection. """ + _settings: NvidiaSegmentedSTTSettings + class InputParams(BaseModel): """Configuration parameters for NVIDIA Riva segmented STT service. @@ -433,30 +465,29 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to SegmentedSTTService """ - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - params = params or NvidiaSegmentedSTTService.InputParams() - # Set model name - self.set_model_name(model_function_map.get("model_name")) + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=NvidiaSegmentedSTTSettings( + model=model_function_map.get("model_name"), + language=self.language_to_service_language(params.language or Language.EN_US) + or "en-US", + profanity_filter=params.profanity_filter, + automatic_punctuation=params.automatic_punctuation, + verbatim_transcripts=params.verbatim_transcripts, + boosted_lm_words=params.boosted_lm_words, + boosted_lm_score=params.boosted_lm_score, + ), + **kwargs, + ) # Initialize NVIDIA Riva settings self._api_key = api_key self._server = server self._use_ssl = use_ssl self._function_id = model_function_map.get("function_id") - self._model_name = model_function_map.get("model_name") - - # Store the language as a Language enum and as a string - self._language_enum = params.language or Language.EN_US - self._language = self.language_to_service_language(self._language_enum) or "en-US" - - # Configure transcription parameters - self._profanity_filter = params.profanity_filter - self._automatic_punctuation = params.automatic_punctuation - self._verbatim_transcripts = params.verbatim_transcripts - self._boosted_lm_words = params.boosted_lm_words - self._boosted_lm_score = params.boosted_lm_score # Voice activity detection thresholds (use NVIDIA Riva defaults) self._start_history = -1 @@ -467,10 +498,8 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): self._stop_threshold_eou = -1.0 self._custom_configuration = "" - # Create NVIDIA Riva client self._config = None self._asr_service = None - self._settings = {"language": self._language_enum} def language_to_service_language(self, language: Language) -> Optional[str]: """Convert pipecat Language enum to NVIDIA Riva's language code. @@ -498,21 +527,25 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): auth = riva.client.Auth(None, self._use_ssl, self._server, metadata) self._asr_service = riva.client.ASRService(auth) + def _get_language_code(self) -> str: + """Get the current NVIDIA Riva language code string.""" + return self._settings.language or "en-US" + def _create_recognition_config(self): """Create the NVIDIA Riva ASR recognition configuration.""" # Create base configuration config = riva.client.RecognitionConfig( - language_code=self._language, # Now using the string, not a tuple + language_code=self._get_language_code(), max_alternatives=1, - profanity_filter=self._profanity_filter, - enable_automatic_punctuation=self._automatic_punctuation, - verbatim_transcripts=self._verbatim_transcripts, + profanity_filter=self._settings.profanity_filter, + enable_automatic_punctuation=self._settings.automatic_punctuation, + verbatim_transcripts=self._settings.verbatim_transcripts, ) # Add word boosting if specified - if self._boosted_lm_words: + if self._settings.boosted_lm_words: riva.client.add_word_boosting_to_config( - config, self._boosted_lm_words, self._boosted_lm_score + config, self._settings.boosted_lm_words, self._settings.boosted_lm_score ) # Add voice activity detection parameters @@ -540,22 +573,6 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): """ return True - async def set_model(self, model: str): - """Set the ASR model for transcription. - - Args: - model: Model name to set. - - Note: - Model cannot be changed after initialization. Use model_function_map - parameter in constructor instead. - """ - logger.warning(f"Cannot set model after initialization. Set model and function id like so:") - example = {"function_id": "", "model_name": ""} - logger.warning( - f"{self.__class__.__name__}(api_key=, model_function_map={example})" - ) - async def start(self, frame: StartFrame): """Initialize the service when the pipeline starts. @@ -565,22 +582,23 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): await super().start(frame) self._initialize_client() self._config = self._create_recognition_config() - logger.debug(f"Initialized NvidiaSegmentedSTTService with model: {self.model_name}") + logger.debug(f"Initialized NvidiaSegmentedSTTService with model: {self._settings.model}") - async def set_language(self, language: Language): - """Set the language for the STT service. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta and sync internal state. Args: - language: Target language for transcription. - """ - logger.info(f"Switching STT language to: [{language}]") - self._language_enum = language - self._language = self.language_to_service_language(language) or "en-US" - self._settings["language"] = language + delta: A :class:`STTSettings` (or ``NvidiaSegmentedSTTSettings``) delta. - # Update configuration with new language - if self._config: - self._config.language_code = self._language + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(delta) + + if changed: + self._config = self._create_recognition_config() + + return changed @traced_stt async def _handle_transcription( @@ -633,11 +651,11 @@ class NvidiaSegmentedSTTService(SegmentedSTTService): text, self._user_id, time_now_iso8601(), - self._language_enum, + self._settings.language, ) transcription_found = True - await self._handle_transcription(text, True, self._language_enum) + await self._handle_transcription(text, True, self._settings.language) if not transcription_found: logger.debug(f"{self}: No transcription results found in NVIDIA Riva response") diff --git a/src/pipecat/services/nvidia/tts.py b/src/pipecat/services/nvidia/tts.py index 6bac54e3a..6785e9631 100644 --- a/src/pipecat/services/nvidia/tts.py +++ b/src/pipecat/services/nvidia/tts.py @@ -12,7 +12,8 @@ gRPC API for high-quality speech synthesis. import asyncio import os -from typing import AsyncGenerator, AsyncIterator, Generator, Mapping, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, AsyncIterator, Generator, Mapping, Optional from pipecat.utils.tracing.service_decorators import traced_tts @@ -30,6 +31,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language @@ -42,6 +44,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class NvidiaTTSSettings(TTSSettings): + """Settings for NVIDIA Riva TTS service. + + Parameters: + quality: Audio quality setting (0-100). + """ + + quality: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class NvidiaTTSService(TTSService): """NVIDIA Riva text-to-speech service. @@ -50,6 +63,8 @@ class NvidiaTTSService(TTSService): configurable quality settings. """ + _settings: NvidiaTTSSettings + class InputParams(BaseModel): """Input parameters for Riva TTS configuration. @@ -88,36 +103,66 @@ class NvidiaTTSService(TTSService): use_ssl: Whether to use SSL for the NVIDIA Riva server. Defaults to True. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or NvidiaTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=NvidiaTTSSettings( + model=model_function_map.get("model_name"), + voice=voice_id, + language=params.language, + quality=params.quality, + ), + **kwargs, + ) + self._server = server self._api_key = api_key - self._voice_id = voice_id - self._language_code = params.language - self._quality = params.quality self._function_id = model_function_map.get("function_id") self._use_ssl = use_ssl - self.set_model_name(model_function_map.get("model_name")) - self.set_voice(voice_id) self._service = None self._config = None async def set_model(self, model: str): - """Attempt to set the TTS model. + """Set the TTS model. - Note: Model cannot be changed after initialization for Riva service. + .. deprecated:: 0.0.104 + Model cannot be changed after initialization for NVIDIA Riva TTS. + Set model and function id in the constructor instead, e.g.:: + + NvidiaTTSService( + api_key=..., + model_function_map={"function_id": "", "model_name": ""}, + ) Args: - model: The model name to set (operation not supported). + model: The model name to set. """ - logger.warning(f"Cannot set model after initialization. Set model and function id like so:") - example = {"function_id": "", "model_name": ""} - logger.warning( - f"{self.__class__.__name__}(api_key=, model_function_map={example})" - ) + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_model' is deprecated. Model cannot be changed after initialization" + " for NVIDIA Riva TTS. Set model and function id in the constructor" + " instead, e.g.: NvidiaTTSService(api_key=..., model_function_map=" + "{'function_id': '', 'model_name': ''})", + DeprecationWarning, + stacklevel=2, + ) + + async def _update_settings(self, delta: NvidiaTTSSettings) -> dict[str, Any]: + """Apply a settings delta. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(delta) + if not changed: + return changed + # TODO: reconnect gRPC client to apply changed settings. + self._warn_unhandled_updated_settings(changed) + return changed def _initialize_client(self): if self._service is not None: @@ -150,7 +195,7 @@ class NvidiaTTSService(TTSService): await super().start(frame) self._initialize_client() self._config = self._create_synthesis_config() - logger.debug(f"Initialized NvidiaTTSService with model: {self.model_name}") + logger.debug(f"Initialized NvidiaTTSService with model: {self._settings.model}") @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -167,11 +212,11 @@ class NvidiaTTSService(TTSService): def read_audio_responses() -> Generator[rtts.SynthesizeSpeechResponse, None, None]: responses = self._service.synthesize_online( text, - self._voice_id, - self._language_code, + self._settings.voice, + self._settings.language, sample_rate_hz=self.sample_rate, zero_shot_audio_prompt_file=None, - zero_shot_quality=self._quality, + zero_shot_quality=self._settings.quality, custom_dictionary={}, ) return responses diff --git a/src/pipecat/services/openai/base_llm.py b/src/pipecat/services/openai/base_llm.py index ebe9eda91..40a2672f8 100644 --- a/src/pipecat/services/openai/base_llm.py +++ b/src/pipecat/services/openai/base_llm.py @@ -10,7 +10,8 @@ import asyncio import base64 import json from contextlib import asynccontextmanager -from typing import Any, Dict, List, Mapping, Optional +from dataclasses import dataclass, field +from typing import Any, ClassVar, Dict, List, Mapping, Optional import httpx from loguru import logger @@ -32,7 +33,6 @@ from pipecat.frames.frames import ( LLMFullResponseStartFrame, LLMMessagesFrame, LLMTextFrame, - LLMUpdateSettingsFrame, ) from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext @@ -42,9 +42,24 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService +from pipecat.services.settings import NOT_GIVEN as _NOT_GIVEN +from pipecat.services.settings import LLMSettings, _NotGiven from pipecat.utils.tracing.service_decorators import traced_llm +@dataclass +class OpenAILLMSettings(LLMSettings): + """Settings for OpenAI-compatible LLM services. + + Parameters: + max_completion_tokens: Maximum completion tokens to generate. + service_tier: Service tier to use (e.g., "auto", "flex", "priority"). + """ + + max_completion_tokens: int | _NotGiven = field(default_factory=lambda: _NOT_GIVEN) + service_tier: str | _NotGiven = field(default_factory=lambda: _NOT_GIVEN) + + class BaseOpenAILLMService(LLMService): """Base class for all services that use the AsyncOpenAI client. @@ -55,6 +70,8 @@ class BaseOpenAILLMService(LLMService): configurations. """ + _settings: OpenAILLMSettings + class InputParams(BaseModel): """Input parameters for OpenAI model configuration. @@ -116,24 +133,28 @@ class BaseOpenAILLMService(LLMService): retry_on_timeout: Whether to retry the request once if it times out. **kwargs: Additional arguments passed to the parent LLMService. """ - super().__init__(**kwargs) - params = params or BaseOpenAILLMService.InputParams() - self._settings = { - "frequency_penalty": params.frequency_penalty, - "presence_penalty": params.presence_penalty, - "seed": params.seed, - "temperature": params.temperature, - "top_p": params.top_p, - "max_tokens": params.max_tokens, - "max_completion_tokens": params.max_completion_tokens, - "service_tier": params.service_tier, - "extra": params.extra if isinstance(params.extra, dict) else {}, - } + super().__init__( + settings=OpenAILLMSettings( + model=model, + frequency_penalty=params.frequency_penalty, + presence_penalty=params.presence_penalty, + seed=params.seed, + temperature=params.temperature, + top_p=params.top_p, + top_k=None, + max_tokens=params.max_tokens, + max_completion_tokens=params.max_completion_tokens, + service_tier=params.service_tier, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + extra=params.extra if isinstance(params.extra, dict) else {}, + ), + **kwargs, + ) self._retry_timeout_secs = retry_timeout_secs self._retry_on_timeout = retry_on_timeout - self.set_model_name(model) self._full_model_name: str = "" self._client = self.create_client( api_key=api_key, @@ -247,23 +268,23 @@ class BaseOpenAILLMService(LLMService): Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "stream_options": {"include_usage": True}, - "frequency_penalty": self._settings["frequency_penalty"], - "presence_penalty": self._settings["presence_penalty"], - "seed": self._settings["seed"], - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_tokens": self._settings["max_tokens"], - "max_completion_tokens": self._settings["max_completion_tokens"], - "service_tier": self._settings["service_tier"], + "frequency_penalty": self._settings.frequency_penalty, + "presence_penalty": self._settings.presence_penalty, + "seed": self._settings.seed, + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_tokens": self._settings.max_tokens, + "max_completion_tokens": self._settings.max_completion_tokens, + "service_tier": self._settings.service_tier, } # Messages, tools, tool_choice params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params async def run_inference( @@ -517,8 +538,6 @@ class BaseOpenAILLMService(LLMService): # NOTE: LLMMessagesFrame is deprecated, so we don't support the newer universal # LLMContext with it context = OpenAILLMContext.from_messages(frame.messages) - elif isinstance(frame, LLMUpdateSettingsFrame): - await self._update_settings(frame.settings) else: await self.push_frame(frame, direction) diff --git a/src/pipecat/services/openai/image.py b/src/pipecat/services/openai/image.py index d6ca51ae7..f35a5ded8 100644 --- a/src/pipecat/services/openai/image.py +++ b/src/pipecat/services/openai/image.py @@ -11,6 +11,7 @@ for creating images from text prompts. """ import io +from dataclasses import dataclass from typing import AsyncGenerator, Literal, Optional import aiohttp @@ -24,6 +25,16 @@ from pipecat.frames.frames import ( URLImageRawFrame, ) from pipecat.services.image_service import ImageGenService +from pipecat.services.settings import ImageGenSettings + + +@dataclass +class OpenAIImageGenSettings(ImageGenSettings): + """Settings for the OpenAI image generation service. + + Parameters: + model: DALL-E model identifier. + """ class OpenAIImageGenService(ImageGenService): @@ -52,8 +63,7 @@ class OpenAIImageGenService(ImageGenService): image_size: Target size for generated images. model: DALL-E model to use for generation. Defaults to "dall-e-3". """ - super().__init__() - self.set_model_name(model) + super().__init__(settings=OpenAIImageGenSettings(model=model)) self._image_size = image_size self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) self._aiohttp_session = aiohttp_session @@ -70,7 +80,7 @@ class OpenAIImageGenService(ImageGenService): logger.debug(f"Generating image from prompt: {prompt}") image = await self._client.images.generate( - prompt=prompt, model=self.model_name, n=1, size=self._image_size + prompt=prompt, model=self._settings.model, n=1, size=self._image_size ) image_url = image.data[0].url diff --git a/src/pipecat/services/openai/realtime/llm.py b/src/pipecat/services/openai/realtime/llm.py index cf249408c..07b6aa82b 100644 --- a/src/pipecat/services/openai/realtime/llm.py +++ b/src/pipecat/services/openai/realtime/llm.py @@ -10,8 +10,8 @@ import base64 import io import json import time -from dataclasses import dataclass -from typing import Optional +from dataclasses import dataclass, field +from typing import Any, Optional from loguru import logger from PIL import Image @@ -59,6 +59,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.transcriptions.language import Language from pipecat.utils.time import time_now_iso8601 from pipecat.utils.tracing.service_decorators import traced_openai_realtime, traced_stt @@ -90,6 +91,19 @@ class CurrentAudioResponse: total_size: int = 0 +@dataclass +class OpenAIRealtimeLLMSettings(LLMSettings): + """Settings for OpenAI Realtime LLM services. + + Parameters: + session_properties: OpenAI Realtime session configuration. + """ + + session_properties: events.SessionProperties | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + + class OpenAIRealtimeLLMService(LLMService): """OpenAI Realtime LLM service providing real-time audio and text communication. @@ -98,6 +112,8 @@ class OpenAIRealtimeLLMService(LLMService): management, and real-time transcription. """ + _settings: OpenAIRealtimeLLMSettings + # Overriding the default adapter to use the OpenAIRealtimeLLMAdapter one. adapter_class = OpenAIRealtimeLLMAdapter @@ -105,7 +121,7 @@ class OpenAIRealtimeLLMService(LLMService): self, *, api_key: str, - model: str = "gpt-realtime", + model: str = "gpt-realtime-1.5", base_url: str = "wss://api.openai.com/v1/realtime", session_properties: Optional[events.SessionProperties] = None, start_audio_paused: bool = False, @@ -155,16 +171,26 @@ class OpenAIRealtimeLLMService(LLMService): # Build WebSocket URL with model query parameter # Source: https://platform.openai.com/docs/guides/realtime-websocket full_url = f"{base_url}?model={model}" - super().__init__(base_url=full_url, **kwargs) + super().__init__( + base_url=full_url, + settings=OpenAIRealtimeLLMSettings( + model=model, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + session_properties=session_properties or events.SessionProperties(), + ), + **kwargs, + ) self.api_key = api_key self.base_url = full_url - self.set_model_name(model) - - # Initialize session_properties - self._session_properties: events.SessionProperties = ( - session_properties or events.SessionProperties() - ) self._audio_input_paused = start_audio_paused self._video_input_paused = start_video_paused self._video_frame_detail = video_frame_detail @@ -227,12 +253,12 @@ class OpenAIRealtimeLLMService(LLMService): def _is_modality_enabled(self, modality: str) -> bool: """Check if a specific modality is enabled, "text" or "audio".""" - modalities = self._session_properties.output_modalities or ["audio", "text"] + modalities = self._settings.session_properties.output_modalities or ["audio", "text"] return modality in modalities def _get_enabled_modalities(self) -> list[str]: """Get the list of enabled modalities.""" - modalities = self._session_properties.output_modalities or ["audio", "text"] + modalities = self._settings.session_properties.output_modalities or ["audio", "text"] # API only supports single modality responses: either ["text"] or ["audio"] if "audio" in modalities: return ["audio"] @@ -305,9 +331,9 @@ class OpenAIRealtimeLLMService(LLMService): # None and False are different. Check for False. None means we're using OpenAI's # built-in turn detection defaults. turn_detection_disabled = ( - self._session_properties.audio - and self._session_properties.audio.input - and self._session_properties.audio.input.turn_detection is False + self._settings.session_properties.audio + and self._settings.session_properties.audio.input + and self._settings.session_properties.audio.input.turn_detection is False ) if turn_detection_disabled: await self.send_client_event(events.InputAudioBufferClearEvent()) @@ -327,9 +353,9 @@ class OpenAIRealtimeLLMService(LLMService): # None and False are different. Check for False. None means we're using OpenAI's # built-in turn detection defaults. turn_detection_disabled = ( - self._session_properties.audio - and self._session_properties.audio.input - and self._session_properties.audio.input.turn_detection is False + self._settings.session_properties.audio + and self._settings.session_properties.audio.input + and self._settings.session_properties.audio.input.turn_detection is False ) if turn_detection_disabled: await self.send_client_event(events.InputAudioBufferCommitEvent()) @@ -397,6 +423,16 @@ class OpenAIRealtimeLLMService(LLMService): frame: The frame to process. direction: The direction of frame flow in the pipeline. """ + # Backward-compatible dict path: frame.settings contains SessionProperties + # fields, not our Settings fields, so we construct SessionProperties + # directly. The frame.delta path falls through to super, which calls + # _update_settings → our override handles the rest. + if isinstance(frame, LLMUpdateSettingsFrame) and frame.delta is None: + self._settings.session_properties = events.SessionProperties(**frame.settings) + await self._send_session_update() + await self.push_frame(frame, direction) + return + await super().process_frame(frame, direction) if isinstance(frame, TranscriptionFrame): @@ -424,11 +460,8 @@ class OpenAIRealtimeLLMService(LLMService): await self._handle_bot_stopped_speaking() elif isinstance(frame, LLMMessagesAppendFrame): await self._handle_messages_append(frame) - elif isinstance(frame, LLMUpdateSettingsFrame): - self._session_properties = events.SessionProperties(**frame.settings) - await self._update_settings() elif isinstance(frame, LLMSetToolsFrame): - await self._update_settings() + await self._send_session_update() await self.push_frame(frame, direction) @@ -513,8 +546,16 @@ class OpenAIRealtimeLLMService(LLMService): # treat a send-side error as fatal. await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) - async def _update_settings(self): - settings = self._session_properties + async def _update_settings(self, delta): + """Apply a settings delta, sending a session update if needed.""" + changed = await super()._update_settings(delta) + if "session_properties" in changed: + await self._send_session_update() + self._warn_unhandled_updated_settings(changed.keys() - {"session_properties"}) + return changed + + async def _send_session_update(self): + settings = self._settings.session_properties adapter: OpenAIRealtimeLLMAdapter = self.get_llm_adapter() if self._context: @@ -577,15 +618,18 @@ class OpenAIRealtimeLLMService(LLMService): await self._handle_evt_function_call_arguments_done(evt) elif evt.type == "error": if not await self._maybe_handle_evt_retrieve_conversation_item_error(evt): - await self._handle_evt_error(evt) - # errors are fatal, so exit the receive loop - return + if evt.error.code == "response_cancel_not_active": + logger.debug(f"{self} {evt.error.message}") + else: + await self._handle_evt_error(evt) + # errors are fatal, so exit the receive loop + return @traced_openai_realtime(operation="llm_setup") async def _handle_evt_session_created(self, evt): # session.created is received right after connecting. Send a message # to configure the session properties. - await self._update_settings() + await self._send_session_update() async def _handle_evt_session_updated(self, evt): # If this is our first context frame, run the LLM @@ -795,7 +839,7 @@ class OpenAIRealtimeLLMService(LLMService): async def _handle_evt_speech_started(self, evt): await self._truncate_current_audio_response() await self.broadcast_frame(UserStartedSpeakingFrame) - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _handle_evt_speech_stopped(self, evt): await self.start_ttfb_metrics() @@ -868,7 +912,7 @@ class OpenAIRealtimeLLMService(LLMService): await self.send_client_event(evt) # Send new settings if needed - await self._update_settings() + await self._send_session_update() # We're done configuring the LLM for this session self._llm_needs_conversation_setup = False diff --git a/src/pipecat/services/openai/stt.py b/src/pipecat/services/openai/stt.py index 4dd16be6e..32895f8b5 100644 --- a/src/pipecat/services/openai/stt.py +++ b/src/pipecat/services/openai/stt.py @@ -16,7 +16,8 @@ Provides two STT services: import base64 import json -from typing import AsyncGenerator, Literal, Optional, Union +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Literal, Optional, Union from loguru import logger @@ -34,6 +35,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import OPENAI_REALTIME_TTFS_P99, OPENAI_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.services.whisper.base_stt import BaseWhisperSTTService, Transcription @@ -98,24 +100,24 @@ class OpenAISTTService(BaseWhisperSTTService): # Build kwargs dict with only set parameters kwargs = { "file": ("audio.wav", audio, "audio/wav"), - "model": self.model_name, - "language": self._language, + "model": self._settings.model, + "language": self._settings.language, } if self._include_prob_metrics: # GPT-4o-transcribe models only support logprobs (not verbose_json) - if self.model_name in ("gpt-4o-transcribe", "gpt-4o-mini-transcribe"): + if self._settings.model in ("gpt-4o-transcribe", "gpt-4o-mini-transcribe"): kwargs["response_format"] = "json" kwargs["include"] = ["logprobs"] else: # Whisper models support verbose_json kwargs["response_format"] = "verbose_json" - if self._prompt is not None: - kwargs["prompt"] = self._prompt + if self._settings.prompt is not None: + kwargs["prompt"] = self._settings.prompt - if self._temperature is not None: - kwargs["temperature"] = self._temperature + if self._settings.temperature is not None: + kwargs["temperature"] = self._settings.temperature return await self._client.audio.transcriptions.create(**kwargs) @@ -123,6 +125,17 @@ class OpenAISTTService(BaseWhisperSTTService): _OPENAI_SAMPLE_RATE = 24000 +@dataclass +class OpenAIRealtimeSTTSettings(STTSettings): + """Settings for the OpenAI Realtime STT service. + + Parameters: + prompt: Optional prompt text to guide transcription style. + """ + + prompt: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class OpenAIRealtimeSTTService(WebsocketSTTService): """OpenAI Realtime Speech-to-Text service using WebSocket transcription sessions. @@ -156,6 +169,8 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): ) """ + _settings: OpenAIRealtimeSTTSettings + def __init__( self, *, @@ -206,14 +221,17 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): super().__init__( ttfs_p99_latency=ttfs_p99_latency, + settings=OpenAIRealtimeSTTSettings( + model=model, + language=language, + prompt=prompt, + ), **kwargs, ) self._api_key = api_key self._base_url = base_url - self.set_model_name(model) - self._language_code = self._language_to_code(language) if language else None self._prompt = prompt self._turn_detection = turn_detection self._noise_reduction = noise_reduction @@ -248,19 +266,31 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): """ return True - async def set_language(self, language: Language): - """Set the language for speech recognition. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta and send session update if needed. - If the session is already active, sends an updated configuration - to the server. + Keeps ``_language_code`` and ``_prompt`` in sync with settings + and sends a ``session.update`` to the server when the session is active. Args: - language: The language to use for speech recognition. + delta: A :class:`STTSettings` (or ``OpenAIRealtimeSTTSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. """ - self._language_code = self._language_to_code(language) + changed = await super()._update_settings(delta) + + if not changed: + return changed + + if "prompt" in changed and isinstance(self._settings, OpenAIRealtimeSTTSettings): + self._prompt = self._settings.prompt + if self._session_ready: await self._send_session_update() + return changed + async def start(self, frame: StartFrame): """Start the service and establish WebSocket connection. @@ -405,10 +435,13 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): async def _send_session_update(self): """Send ``session.update`` to configure the transcription session.""" - transcription: dict = {"model": self.model_name} + transcription: dict = {"model": self._settings.model} - if self._language_code: - transcription["language"] = self._language_code + language_code = ( + self._language_to_code(self._settings.language) if self._settings.language else None + ) + if language_code: + transcription["language"] = language_code if self._prompt: transcription["prompt"] = self._prompt @@ -606,7 +639,7 @@ class OpenAIRealtimeSTTService(WebsocketSTTService): logger.debug("Server VAD: speech started") await self.broadcast_frame(UserStartedSpeakingFrame) if self._should_interrupt: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self.start_processing_metrics() async def _handle_speech_stopped(self, evt: dict): diff --git a/src/pipecat/services/openai/tts.py b/src/pipecat/services/openai/tts.py index f59f0b31b..f95d79134 100644 --- a/src/pipecat/services/openai/tts.py +++ b/src/pipecat/services/openai/tts.py @@ -10,6 +10,7 @@ This module provides integration with OpenAI's text-to-speech API for generating high-quality synthetic speech from text input. """ +from dataclasses import dataclass, field from typing import AsyncGenerator, Dict, Literal, Optional from loguru import logger @@ -24,6 +25,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -60,6 +62,19 @@ VALID_VOICES: Dict[str, ValidVoice] = { } +@dataclass +class OpenAITTSSettings(TTSSettings): + """Settings for OpenAI TTS service. + + Parameters: + instructions: Instructions to guide voice synthesis behavior. + speed: Voice speed control (0.25 to 4.0, default 1.0). + """ + + instructions: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speed: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class OpenAITTSService(TTSService): """OpenAI Text-to-Speech service that generates audio from text. @@ -68,6 +83,8 @@ class OpenAITTSService(TTSService): speech synthesis with streaming audio output. """ + _settings: OpenAITTSSettings + OPENAI_SAMPLE_RATE = 24000 # OpenAI TTS always outputs at 24kHz class InputParams(BaseModel): @@ -115,12 +132,6 @@ class OpenAITTSService(TTSService): f"OpenAI TTS only supports {self.OPENAI_SAMPLE_RATE}Hz sample rate. " f"Current rate of {sample_rate}Hz may cause issues." ) - super().__init__(sample_rate=sample_rate, **kwargs) - - self.set_model_name(model) - self.set_voice(voice) - self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) - if instructions or speed: import warnings @@ -132,10 +143,18 @@ class OpenAITTSService(TTSService): stacklevel=2, ) - self._settings = { - "instructions": params.instructions if params else instructions, - "speed": params.speed if params else speed, - } + super().__init__( + sample_rate=sample_rate, + settings=OpenAITTSSettings( + model=model, + voice=voice, + instructions=params.instructions if params else instructions, + speed=params.speed if params else speed, + ), + **kwargs, + ) + + self._client = AsyncOpenAI(api_key=api_key, base_url=base_url) def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -145,15 +164,6 @@ class OpenAITTSService(TTSService): """ return True - async def set_model(self, model: str): - """Set the TTS model to use. - - Args: - model: The model name to use for text-to-speech synthesis. - """ - logger.info(f"Switching TTS model to: [{model}]") - self.set_model_name(model) - async def start(self, frame: StartFrame): """Start the OpenAI TTS service. @@ -185,16 +195,16 @@ class OpenAITTSService(TTSService): # Setup API parameters create_params = { "input": text, - "model": self.model_name, - "voice": VALID_VOICES[self._voice_id], + "model": self._settings.model, + "voice": VALID_VOICES[self._settings.voice], "response_format": "pcm", } - if self._settings["instructions"]: - create_params["instructions"] = self._settings["instructions"] + if self._settings.instructions: + create_params["instructions"] = self._settings.instructions - if self._settings["speed"]: - create_params["speed"] = self._settings["speed"] + if self._settings.speed: + create_params["speed"] = self._settings.speed async with self._client.audio.speech.with_streaming_response.create( **create_params diff --git a/src/pipecat/services/openai_realtime_beta/openai.py b/src/pipecat/services/openai_realtime_beta/openai.py index 1199d8556..c912ed45c 100644 --- a/src/pipecat/services/openai_realtime_beta/openai.py +++ b/src/pipecat/services/openai_realtime_beta/openai.py @@ -10,7 +10,7 @@ import base64 import json import time import warnings -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Optional from loguru import logger @@ -54,6 +54,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService from pipecat.services.openai.llm import OpenAIContextAggregatorPair +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.transcriptions.language import Language from pipecat.utils.time import time_now_iso8601 from pipecat.utils.tracing.service_decorators import traced_openai_realtime, traced_stt @@ -91,6 +92,19 @@ class CurrentAudioResponse: total_size: int = 0 +@dataclass +class OpenAIRealtimeBetaLLMSettings(LLMSettings): + """Settings for OpenAI Realtime Beta LLM services. + + Parameters: + session_properties: OpenAI Realtime session configuration. + """ + + session_properties: events.SessionProperties | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + + class OpenAIRealtimeBetaLLMService(LLMService): """OpenAI Realtime Beta LLM service providing real-time audio and text communication. @@ -103,6 +117,8 @@ class OpenAIRealtimeBetaLLMService(LLMService): management, and real-time transcription. """ + _settings: OpenAIRealtimeBetaLLMSettings + # Overriding the default adapter to use the OpenAIRealtimeLLMAdapter one. adapter_class = OpenAIRealtimeLLMAdapter @@ -140,15 +156,26 @@ class OpenAIRealtimeBetaLLMService(LLMService): ) full_url = f"{base_url}?model={model}" - super().__init__(base_url=full_url, **kwargs) + super().__init__( + base_url=full_url, + settings=OpenAIRealtimeBetaLLMSettings( + model=model, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + session_properties=session_properties or events.SessionProperties(), + ), + **kwargs, + ) self.api_key = api_key self.base_url = full_url - self.set_model_name(model) - - self._session_properties: events.SessionProperties = ( - session_properties or events.SessionProperties() - ) self._audio_input_paused = start_audio_paused self._send_transcription_frames = send_transcription_frames self._websocket = None @@ -187,12 +214,12 @@ class OpenAIRealtimeBetaLLMService(LLMService): def _is_modality_enabled(self, modality: str) -> bool: """Check if a specific modality is enabled, "text" or "audio".""" - modalities = self._session_properties.modalities or ["audio", "text"] + modalities = self._settings.session_properties.modalities or ["audio", "text"] return modality in modalities def _get_enabled_modalities(self) -> list[str]: """Get the list of enabled modalities.""" - return self._session_properties.modalities or ["audio", "text"] + return self._settings.session_properties.modalities or ["audio", "text"] async def retrieve_conversation_item(self, item_id: str): """Retrieve a conversation item by ID from the server. @@ -259,7 +286,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): async def _handle_interruption(self): # None and False are different. Check for False. None means we're using OpenAI's # built-in turn detection defaults. - if self._session_properties.turn_detection is False: + if self._settings.session_properties.turn_detection is False: await self.send_client_event(events.InputAudioBufferClearEvent()) await self.send_client_event(events.ResponseCancelEvent()) await self._truncate_current_audio_response() @@ -276,7 +303,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): async def _handle_user_stopped_speaking(self, frame): # None and False are different. Check for False. None means we're using OpenAI's # built-in turn detection defaults. - if self._session_properties.turn_detection is False: + if self._settings.session_properties.turn_detection is False: await self.send_client_event(events.InputAudioBufferCommitEvent()) await self.send_client_event(events.ResponseCreateEvent()) @@ -342,6 +369,16 @@ class OpenAIRealtimeBetaLLMService(LLMService): frame: The frame to process. direction: The direction of frame flow in the pipeline. """ + # Backward-compatible dict path: frame.settings contains SessionProperties + # fields, not our Settings fields, so we construct SessionProperties + # directly. The frame.delta path falls through to super, which calls + # _update_settings → our override handles the rest. + if isinstance(frame, LLMUpdateSettingsFrame) and frame.delta is None: + self._settings.session_properties = events.SessionProperties(**frame.settings) + await self._send_session_update() + await self.push_frame(frame, direction) + return + await super().process_frame(frame, direction) if isinstance(frame, TranscriptionFrame): @@ -377,11 +414,8 @@ class OpenAIRealtimeBetaLLMService(LLMService): await self._handle_messages_append(frame) elif isinstance(frame, RealtimeMessagesUpdateFrame): self._context = frame.context - elif isinstance(frame, LLMUpdateSettingsFrame): - self._session_properties = events.SessionProperties(**frame.settings) - await self._update_settings() elif isinstance(frame, LLMSetToolsFrame): - await self._update_settings() + await self._send_session_update() elif isinstance(frame, RealtimeFunctionCallResultFrame): await self._handle_function_call_result(frame.result_frame) @@ -456,8 +490,15 @@ class OpenAIRealtimeBetaLLMService(LLMService): # treat a send-side error as fatal. await self.push_error(error_msg=f"Error sending client event: {e}", exception=e) - async def _update_settings(self): - settings = self._session_properties + async def _update_settings(self, delta): + """Apply a settings delta, sending a session update if needed.""" + changed = await super()._update_settings(delta) + if "session_properties" in changed: + await self._send_session_update() + return changed + + async def _send_session_update(self): + settings = self._settings.session_properties # tools given in the context override the tools in the session properties if self._context and self._context.tools: settings.tools = self._context.tools @@ -503,15 +544,18 @@ class OpenAIRealtimeBetaLLMService(LLMService): await self._handle_evt_audio_transcript_delta(evt) elif evt.type == "error": if not await self._maybe_handle_evt_retrieve_conversation_item_error(evt): - await self._handle_evt_error(evt) - # errors are fatal, so exit the receive loop - return + if evt.error.code == "response_cancel_not_active": + logger.debug(f"{self} {evt.error.message}") + else: + await self._handle_evt_error(evt) + # errors are fatal, so exit the receive loop + return @traced_openai_realtime(operation="llm_setup") async def _handle_evt_session_created(self, evt): # session.created is received right after connecting. Send a message # to configure the session properties. - await self._update_settings() + await self._send_session_update() async def _handle_evt_session_updated(self, evt): # If this is our first context frame, run the LLM @@ -665,7 +709,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): async def _handle_evt_speech_started(self, evt): await self._truncate_current_audio_response() await self.broadcast_frame(UserStartedSpeakingFrame) - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _handle_evt_speech_stopped(self, evt): await self.start_ttfb_metrics() @@ -750,7 +794,7 @@ class OpenAIRealtimeBetaLLMService(LLMService): self._context.llm_needs_initial_messages = False if self._context.llm_needs_settings_update: - await self._update_settings() + await self._send_session_update() self._context.llm_needs_settings_update = False logger.debug(f"Creating response: {self._context.get_messages_for_logging()}") diff --git a/src/pipecat/services/openrouter/llm.py b/src/pipecat/services/openrouter/llm.py index a86b18573..c33fda2fc 100644 --- a/src/pipecat/services/openrouter/llm.py +++ b/src/pipecat/services/openrouter/llm.py @@ -72,8 +72,7 @@ class OpenRouterLLMService(OpenAILLMService): Transformed parameters ready for the API call. """ params = super().build_chat_completion_params(params_from_context) - model = getattr(self, "model_name", getattr(self, "model", "")).lower() - if "gemini" in model: + if "gemini" in self._settings.model.lower(): messages = params.get("messages", []) if not messages: return params diff --git a/src/pipecat/services/perplexity/llm.py b/src/pipecat/services/perplexity/llm.py index 4ea23aa82..e03bace8d 100644 --- a/src/pipecat/services/perplexity/llm.py +++ b/src/pipecat/services/perplexity/llm.py @@ -11,8 +11,6 @@ an OpenAI-compatible interface. It handles Perplexity's unique token usage reporting patterns while maintaining compatibility with the Pipecat framework. """ -from openai import NOT_GIVEN - from pipecat.adapters.services.open_ai_adapter import OpenAILLMInvocationParams from pipecat.metrics.metrics import LLMTokenUsage from pipecat.processors.aggregators.llm_context import LLMContext @@ -66,22 +64,22 @@ class PerplexityLLMService(OpenAILLMService): Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "messages": params_from_context["messages"], } # Add OpenAI-compatible parameters if they're set - if self._settings["frequency_penalty"] is not NOT_GIVEN: - params["frequency_penalty"] = self._settings["frequency_penalty"] - if self._settings["presence_penalty"] is not NOT_GIVEN: - params["presence_penalty"] = self._settings["presence_penalty"] - if self._settings["temperature"] is not NOT_GIVEN: - params["temperature"] = self._settings["temperature"] - if self._settings["top_p"] is not NOT_GIVEN: - params["top_p"] = self._settings["top_p"] - if self._settings["max_tokens"] is not NOT_GIVEN: - params["max_tokens"] = self._settings["max_tokens"] + if self._settings.frequency_penalty is not None: + params["frequency_penalty"] = self._settings.frequency_penalty + if self._settings.presence_penalty is not None: + params["presence_penalty"] = self._settings.presence_penalty + if self._settings.temperature is not None: + params["temperature"] = self._settings.temperature + if self._settings.top_p is not None: + params["top_p"] = self._settings.top_p + if self._settings.max_tokens is not None: + params["max_tokens"] = self._settings.max_tokens return params diff --git a/src/pipecat/services/piper/tts.py b/src/pipecat/services/piper/tts.py index a1a038826..c4831b839 100644 --- a/src/pipecat/services/piper/tts.py +++ b/src/pipecat/services/piper/tts.py @@ -7,8 +7,9 @@ """Piper TTS service implementation.""" import asyncio +from dataclasses import dataclass from pathlib import Path -from typing import AsyncGenerator, AsyncIterator, Optional +from typing import Any, AsyncGenerator, AsyncIterator, Optional import aiohttp from loguru import logger @@ -19,6 +20,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import TTSSettings from pipecat.services.tts_service import TTSService from pipecat.utils.tracing.service_decorators import traced_tts @@ -31,6 +33,13 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class PiperTTSSettings(TTSSettings): + """Settings for Piper TTS service.""" + + pass + + class PiperTTSService(TTSService): """Piper TTS service implementation. @@ -39,6 +48,8 @@ class PiperTTSService(TTSService): match the configured sample rate. """ + _settings: PiperTTSSettings + def __init__( self, *, @@ -58,9 +69,10 @@ class PiperTTSService(TTSService): use_cuda: Use CUDA for GPU-accelerated inference. **kwargs: Additional arguments passed to the parent `TTSService`. """ - super().__init__(**kwargs) - - self._voice_id = voice_id + super().__init__( + settings=PiperTTSSettings(model=None, voice=voice_id, language=None), + **kwargs, + ) download_dir = download_dir or Path.cwd() @@ -85,6 +97,18 @@ class PiperTTSService(TTSService): """ return True + async def _update_settings(self, delta: PiperTTSSettings) -> dict[str, Any]: + """Apply a settings delta. + + Settings are stored but not applied to the active connection. + """ + changed = await super()._update_settings(delta) + if not changed: + return changed + # TODO: voice changes would require re-downloading and loading the model. + self._warn_unhandled_updated_settings(changed) + return changed + @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: """Generate speech from text using Piper. @@ -143,6 +167,13 @@ class PiperTTSService(TTSService): # $ uv pip install "piper-tts[http]" # $ uv run python -m piper.http_server -m en_US-ryan-high # +@dataclass +class PiperHttpTTSSettings(TTSSettings): + """Settings for Piper HTTP TTS service.""" + + pass + + class PiperHttpTTSService(TTSService): """Piper HTTP TTS service implementation. @@ -151,6 +182,8 @@ class PiperHttpTTSService(TTSService): rates and automatic WAV header removal. """ + _settings: PiperHttpTTSSettings + def __init__( self, *, @@ -167,7 +200,10 @@ class PiperHttpTTSService(TTSService): voice_id: Piper voice model identifier (e.g. `en_US-ryan-high`). **kwargs: Additional arguments passed to the parent TTSService. """ - super().__init__(**kwargs) + super().__init__( + settings=PiperHttpTTSSettings(model=None, voice=voice_id, language=None), + **kwargs, + ) if base_url.endswith("/"): logger.warning("Base URL ends with a slash, this is not allowed.") @@ -175,7 +211,6 @@ class PiperHttpTTSService(TTSService): self._base_url = base_url self._session = aiohttp_session - self._model_id = voice_id def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -205,7 +240,7 @@ class PiperHttpTTSService(TTSService): data = { "text": text, - "voice": self._model_id, + "voice": self._settings.voice, } async with self._session.post(self._base_url, json=data, headers=headers) as response: diff --git a/src/pipecat/services/playht/__init__.py b/src/pipecat/services/playht/__init__.py deleted file mode 100644 index 500ea0fdc..000000000 --- a/src/pipecat/services/playht/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -import sys - -from pipecat.services import DeprecatedModuleProxy - -from .tts import * - -sys.modules[__name__] = DeprecatedModuleProxy(globals(), "playht", "playht.tts") diff --git a/src/pipecat/services/playht/tts.py b/src/pipecat/services/playht/tts.py deleted file mode 100644 index 287463186..000000000 --- a/src/pipecat/services/playht/tts.py +++ /dev/null @@ -1,651 +0,0 @@ -# -# Copyright (c) 2024-2026, Daily -# -# SPDX-License-Identifier: BSD 2-Clause License -# - -"""PlayHT text-to-speech service implementations. - -This module provides integration with PlayHT's text-to-speech API -supporting both WebSocket streaming and HTTP-based synthesis. -""" - -import io -import json -import struct -import uuid -import warnings -from typing import AsyncGenerator, Optional - -import aiohttp -from loguru import logger -from pydantic import BaseModel - -from pipecat.frames.frames import ( - CancelFrame, - EndFrame, - ErrorFrame, - Frame, - InterruptionFrame, - StartFrame, - TTSAudioRawFrame, - TTSStartedFrame, - TTSStoppedFrame, -) -from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.tts_service import InterruptibleTTSService, TTSService -from pipecat.transcriptions.language import Language, resolve_language -from pipecat.utils.tracing.service_decorators import traced_tts - -try: - from websockets.asyncio.client import connect as websocket_connect - from websockets.protocol import State -except ModuleNotFoundError as e: - logger.error(f"Exception: {e}") - logger.error("In order to use PlayHTTTSService, you need to `pip install pipecat-ai[playht]`.") - raise Exception(f"Missing module: {e}") - - -def language_to_playht_language(language: Language) -> Optional[str]: - """Convert a Language enum to PlayHT language code. - - Args: - language: The Language enum value to convert. - - Returns: - The corresponding PlayHT language code, or None if not supported. - """ - LANGUAGE_MAP = { - Language.AF: "afrikans", - Language.AM: "amharic", - Language.AR: "arabic", - Language.BN: "bengali", - Language.BG: "bulgarian", - Language.CA: "catalan", - Language.CS: "czech", - Language.DA: "danish", - Language.DE: "german", - Language.EL: "greek", - Language.EN: "english", - Language.ES: "spanish", - Language.FR: "french", - Language.GL: "galician", - Language.HE: "hebrew", - Language.HI: "hindi", - Language.HR: "croatian", - Language.HU: "hungarian", - Language.ID: "indonesian", - Language.IT: "italian", - Language.JA: "japanese", - Language.KO: "korean", - Language.MS: "malay", - Language.NL: "dutch", - Language.PL: "polish", - Language.PT: "portuguese", - Language.RU: "russian", - Language.SQ: "albanian", - Language.SR: "serbian", - Language.SV: "swedish", - Language.TH: "thai", - Language.TL: "tagalog", - Language.TR: "turkish", - Language.UK: "ukrainian", - Language.UR: "urdu", - Language.XH: "xhosa", - Language.ZH: "mandarin", - } - - return resolve_language(language, LANGUAGE_MAP, use_base_code=False) - - -class PlayHTTTSService(InterruptibleTTSService): - """PlayHT WebSocket-based text-to-speech service. - - .. deprecated:: 0.0.88 - - This class is deprecated and will be removed in a future version. - PlayHT is shutting down their API on December 31st, 2025. - - Provides real-time text-to-speech synthesis using PlayHT's WebSocket API. - Supports streaming audio generation with configurable voice engines and - language settings. - """ - - class InputParams(BaseModel): - """Input parameters for PlayHT TTS configuration. - - Parameters: - language: Language for synthesis. Defaults to English. - speed: Speech speed multiplier. Defaults to 1.0. - seed: Random seed for voice consistency. - """ - - language: Optional[Language] = Language.EN - speed: Optional[float] = 1.0 - seed: Optional[int] = None - - def __init__( - self, - *, - api_key: str, - user_id: str, - voice_url: str, - voice_engine: str = "Play3.0-mini", - sample_rate: Optional[int] = None, - output_format: str = "wav", - params: Optional[InputParams] = None, - **kwargs, - ): - """Initialize the PlayHT WebSocket TTS service. - - Args: - api_key: PlayHT API key for authentication. - user_id: PlayHT user ID for authentication. - voice_url: URL of the voice to use for synthesis. - voice_engine: Voice engine to use. Defaults to "Play3.0-mini". - sample_rate: Audio sample rate. If None, uses default. - output_format: Audio output format. Defaults to "wav". - params: Additional input parameters for voice customization. - **kwargs: Additional arguments passed to parent InterruptibleTTSService. - """ - super().__init__( - pause_frame_processing=True, - sample_rate=sample_rate, - **kwargs, - ) - - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "PlayHT is shutting down their API on December 31st, 2025. " - "'PlayHTTTSService' is deprecated and will be removed in a future version.", - DeprecationWarning, - stacklevel=2, - ) - - params = params or PlayHTTTSService.InputParams() - - self._api_key = api_key - self._user_id = user_id - self._websocket_url = None - self._receive_task = None - self._context_id = None - - self._settings = { - "language": self.language_to_service_language(params.language) - if params.language - else "english", - "output_format": output_format, - "voice_engine": voice_engine, - "speed": params.speed, - "seed": params.seed, - } - self.set_model_name(voice_engine) - self.set_voice(voice_url) - - def can_generate_metrics(self) -> bool: - """Check if this service can generate processing metrics. - - Returns: - True, as PlayHT service supports metrics generation. - """ - return True - - def language_to_service_language(self, language: Language) -> Optional[str]: - """Convert a Language enum to PlayHT service language format. - - Args: - language: The language to convert. - - Returns: - The PlayHT-specific language code, or None if not supported. - """ - return language_to_playht_language(language) - - async def start(self, frame: StartFrame): - """Start the PlayHT TTS service. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - await self._connect() - - async def stop(self, frame: EndFrame): - """Stop the PlayHT TTS service. - - Args: - frame: The end frame. - """ - await super().stop(frame) - await self._disconnect() - - async def cancel(self, frame: CancelFrame): - """Cancel the PlayHT TTS service. - - Args: - frame: The cancel frame. - """ - await super().cancel(frame) - await self._disconnect() - - async def _connect(self): - """Connect to PlayHT WebSocket and start receive task.""" - await super()._connect() - - await self._connect_websocket() - - if self._websocket and not self._receive_task: - self._receive_task = self.create_task(self._receive_task_handler(self._report_error)) - - async def _disconnect(self): - """Disconnect from PlayHT WebSocket and clean up tasks.""" - await super()._disconnect() - - if self._receive_task: - await self.cancel_task(self._receive_task) - self._receive_task = None - - await self._disconnect_websocket() - - async def _connect_websocket(self): - """Connect to PlayHT websocket.""" - try: - if self._websocket and self._websocket.state is State.OPEN: - return - - logger.debug("Connecting to PlayHT") - - if not self._websocket_url: - await self._get_websocket_url() - - if not isinstance(self._websocket_url, str): - raise ValueError("WebSocket URL is not a string") - - self._websocket = await websocket_connect(self._websocket_url) - - await self._call_event_handler("on_connected") - except ValueError as e: - logger.error(f"{self} initialization error: {e}") - self._websocket = None - await self._call_event_handler("on_connection_error", f"{e}") - except Exception as e: - await self.push_error(error_msg=f"Error connecting: {e}", exception=e) - self._websocket = None - await self._call_event_handler("on_connection_error", f"{e}") - - async def _disconnect_websocket(self): - """Disconnect from PlayHT websocket.""" - try: - await self.stop_all_metrics() - - if self._websocket: - logger.debug("Disconnecting from PlayHT") - await self._websocket.close() - except Exception as e: - await self.push_error(error_msg=f"Error disconnecting: {e}", exception=e) - finally: - self._context_id = None - self._websocket = None - await self._call_event_handler("on_disconnected") - - async def _get_websocket_url(self): - """Retrieve WebSocket URL from PlayHT API.""" - async with aiohttp.ClientSession() as session: - async with session.post( - "https://api.play.ht/api/v4/websocket-auth", - headers={ - "Authorization": f"Bearer {self._api_key}", - "X-User-Id": self._user_id, - "Content-Type": "application/json", - }, - ) as response: - if response.status in (200, 201): - data = await response.json() - # Handle the new response format with multiple URLs - if "websocket_urls" in data: - # Select URL based on voice_engine - if self._settings["voice_engine"] in data["websocket_urls"]: - self._websocket_url = data["websocket_urls"][ - self._settings["voice_engine"] - ] - else: - raise ValueError( - f"Unsupported voice engine: {self._settings['voice_engine']}" - ) - else: - raise ValueError("Invalid response: missing websocket_urls") - else: - raise Exception(f"Failed to get WebSocket URL: {response.status}") - - def _get_websocket(self): - """Get the WebSocket connection if available.""" - if self._websocket: - return self._websocket - raise Exception("Websocket not connected") - - def create_context_id(self) -> str: - """Generate a unique context ID for a TTS request in case we don't have one already in progress. - - Returns: - A unique string identifier for the TTS context. - """ - # If a context ID does not exist, create a new one. - # If an ID exists, continue using the current ID. - # When interruptions happen, user speech results in - # an interruption, which resets the context ID. - if not self._context_id: - return str(uuid.uuid4()) - return self._context_id - - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by stopping metrics and clearing request ID.""" - await super()._handle_interruption(frame, direction) - await self.stop_all_metrics() - self._context_id = None - - async def _receive_messages(self): - """Receive messages from PlayHT websocket.""" - async for message in self._get_websocket(): - if isinstance(message, bytes): - # Skip the WAV header message - if message.startswith(b"RIFF"): - continue - await self.stop_ttfb_metrics() - frame = TTSAudioRawFrame(message, self.sample_rate, 1, context_id=self._context_id) - await self.push_frame(frame) - else: - logger.debug(f"Received text message: {message}") - try: - msg = json.loads(message) - if msg.get("type") == "start": - # Handle start of stream - logger.debug(f"Started processing request: {msg.get('request_id')}") - elif msg.get("type") == "end": - # Handle end of stream - if "request_id" in msg and msg["request_id"] == self._context_id: - await self.push_frame(TTSStoppedFrame(context_id=self._context_id)) - self._context_id = None - elif "error" in msg: - await self.push_error(error_msg=f"Error: {msg['error']}") - except json.JSONDecodeError: - logger.error(f"Invalid JSON message: {message}") - - @traced_tts - async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: - """Generate TTS audio from text using PlayHT's WebSocket API. - - Args: - text: The text to synthesize into speech. - context_id: The context ID for tracking audio frames. - - Yields: - Frame: Audio frames containing the synthesized speech. - """ - logger.debug(f"{self}: Generating TTS [{text}]") - - try: - # Reconnect if the websocket is closed - if not self._websocket or self._websocket.state is State.CLOSED: - await self._connect() - - if not self._context_id: - await self.start_ttfb_metrics() - yield TTSStartedFrame(context_id=context_id) - self._context_id = context_id - - tts_command = { - "text": text, - "voice": self._voice_id, - "voice_engine": self._settings["voice_engine"], - "output_format": self._settings["output_format"], - "sample_rate": self.sample_rate, - "language": self._settings["language"], - "speed": self._settings["speed"], - "seed": self._settings["seed"], - "request_id": self._context_id, - } - - try: - await self._get_websocket().send(json.dumps(tts_command)) - await self.start_tts_usage_metrics(text) - except Exception as e: - yield ErrorFrame(error=f"Unknown error occurred: {e}") - yield TTSStoppedFrame(context_id=context_id) - await self._disconnect() - await self._connect() - return - - # The actual audio frames will be handled in _receive_task_handler - yield None - - except Exception as e: - yield ErrorFrame(error=f"Unknown error occurred: {e}") - - -class PlayHTHttpTTSService(TTSService): - """PlayHT HTTP-based text-to-speech service. - - .. deprecated:: 0.0.88 - - This class is deprecated and will be removed in a future version. - PlayHT is shutting down their API on December 31st, 2025. - - Provides text-to-speech synthesis using PlayHT's HTTP API for simpler, - non-streaming synthesis. Suitable for use cases where streaming is not - required and simpler integration is preferred. - """ - - class InputParams(BaseModel): - """Input parameters for PlayHT HTTP TTS configuration. - - Parameters: - language: Language for synthesis. Defaults to English. - speed: Speech speed multiplier. Defaults to 1.0. - seed: Random seed for voice consistency. - """ - - language: Optional[Language] = Language.EN - speed: Optional[float] = 1.0 - seed: Optional[int] = None - - def __init__( - self, - *, - api_key: str, - user_id: str, - voice_url: str, - voice_engine: str = "Play3.0-mini", - protocol: Optional[str] = None, - output_format: str = "wav", - sample_rate: Optional[int] = None, - params: Optional[InputParams] = None, - **kwargs, - ): - """Initialize the PlayHT HTTP TTS service. - - Args: - api_key: PlayHT API key for authentication. - user_id: PlayHT user ID for authentication. - voice_url: URL of the voice to use for synthesis. - voice_engine: Voice engine to use. Defaults to "Play3.0-mini". - protocol: Protocol to use ("http" or "ws"). - - .. deprecated:: 0.0.80 - This parameter no longer has any effect and will be removed in a future version. - Use PlayHTTTSService for WebSocket or PlayHTHttpTTSService for HTTP. - - output_format: Audio output format. Defaults to "wav". - sample_rate: Audio sample rate. If None, uses default. - params: Additional input parameters for voice customization. - **kwargs: Additional arguments passed to parent TTSService. - """ - super().__init__(sample_rate=sample_rate, **kwargs) - - # Warn about deprecated protocol parameter if explicitly provided - if protocol: - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "The 'protocol' parameter is deprecated and will be removed in a future version.", - DeprecationWarning, - stacklevel=2, - ) - - with warnings.catch_warnings(): - warnings.simplefilter("always") - warnings.warn( - "PlayHT is shutting down their API on December 31st, 2025. " - "'PlayHTHttpTTSService' is deprecated and will be removed in a future version.", - DeprecationWarning, - stacklevel=2, - ) - - params = params or PlayHTHttpTTSService.InputParams() - - self._user_id = user_id - self._api_key = api_key - - # Check if voice_engine contains protocol information (backward compatibility) - if "-http" in voice_engine: - # Extract the base engine name - voice_engine = voice_engine.replace("-http", "") - elif "-ws" in voice_engine: - # Extract the base engine name - voice_engine = voice_engine.replace("-ws", "") - - self._settings = { - "language": self.language_to_service_language(params.language) - if params.language - else "english", - "output_format": output_format, - "voice_engine": voice_engine, - "speed": params.speed, - "seed": params.seed, - } - self.set_model_name(voice_engine) - self.set_voice(voice_url) - - async def start(self, frame: StartFrame): - """Start the PlayHT HTTP TTS service. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - self._settings["sample_rate"] = self.sample_rate - - def can_generate_metrics(self) -> bool: - """Check if this service can generate processing metrics. - - Returns: - True, as PlayHT HTTP service supports metrics generation. - """ - return True - - def language_to_service_language(self, language: Language) -> Optional[str]: - """Convert a Language enum to PlayHT service language format. - - Args: - language: The language to convert. - - Returns: - The PlayHT-specific language code, or None if not supported. - """ - return language_to_playht_language(language) - - @traced_tts - async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: - """Generate TTS audio from text using PlayHT's HTTP API. - - Args: - text: The text to synthesize into speech. - context_id: The context ID for tracking audio frames. - - Yields: - Frame: Audio frames containing the synthesized speech. - """ - logger.debug(f"{self}: Generating TTS [{text}]") - - try: - await self.start_ttfb_metrics() - - # Prepare the request payload - payload = { - "text": text, - "voice": self._voice_id, - "voice_engine": self._settings["voice_engine"], - "output_format": self._settings["output_format"], - "sample_rate": self.sample_rate, - "language": self._settings["language"], - } - - # Add optional parameters if they exist - if self._settings["speed"] is not None: - payload["speed"] = self._settings["speed"] - if self._settings["seed"] is not None: - payload["seed"] = self._settings["seed"] - - headers = { - "Authorization": f"Bearer {self._api_key}", - "X-User-Id": self._user_id, - "Content-Type": "application/json", - "Accept": "*/*", - } - - await self.start_tts_usage_metrics(text) - - yield TTSStartedFrame(context_id=context_id) - - async with aiohttp.ClientSession() as session: - async with session.post( - "https://api.play.ht/api/v2/tts/stream", - headers=headers, - json=payload, - ) as response: - if response.status not in (200, 201): - error_text = await response.text() - raise Exception(f"PlayHT API error {response.status}: {error_text}") - - in_header = True - buffer = b"" - - CHUNK_SIZE = self.chunk_size - - async for chunk in response.content.iter_chunked(CHUNK_SIZE): - if len(chunk) == 0: - continue - - # Skip the RIFF header - if in_header: - buffer += chunk - if len(buffer) <= 36: - continue - else: - fh = io.BytesIO(buffer) - fh.seek(36) - (data, size) = struct.unpack("<4sI", fh.read(8)) - while data != b"data": - fh.read(size) - (data, size) = struct.unpack("<4sI", fh.read(8)) - # Extract audio data after header - audio_data = buffer[fh.tell() :] - if len(audio_data) > 0: - await self.stop_ttfb_metrics() - frame = TTSAudioRawFrame( - audio_data, self.sample_rate, 1, context_id=context_id - ) - yield frame - in_header = False - elif len(chunk) > 0: - await self.stop_ttfb_metrics() - frame = TTSAudioRawFrame( - chunk, self.sample_rate, 1, context_id=context_id - ) - yield frame - - except Exception as e: - yield ErrorFrame(error=f"Unknown error occurred: {e}") - finally: - await self.stop_ttfb_metrics() - yield TTSStoppedFrame(context_id=context_id) diff --git a/src/pipecat/services/resembleai/tts.py b/src/pipecat/services/resembleai/tts.py index c51ccc07b..1c2953b72 100644 --- a/src/pipecat/services/resembleai/tts.py +++ b/src/pipecat/services/resembleai/tts.py @@ -8,7 +8,8 @@ import base64 import json -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import AsyncGenerator, ClassVar, Dict, Optional from loguru import logger @@ -17,14 +18,13 @@ from pipecat.frames.frames import ( EndFrame, ErrorFrame, Frame, - InterruptionFrame, StartFrame, TTSAudioRawFrame, TTSStartedFrame, TTSStoppedFrame, ) -from pipecat.processors.frame_processor import FrameDirection -from pipecat.services.tts_service import AudioContextWordTTSService +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven +from pipecat.services.tts_service import AudioContextTTSService from pipecat.utils.tracing.service_decorators import traced_tts try: @@ -36,7 +36,27 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") -class ResembleAITTSService(AudioContextWordTTSService): +@dataclass +class ResembleAITTSSettings(TTSSettings): + """Settings for Resemble AI TTS service. + + Parameters: + precision: PCM bit depth (PCM_32, PCM_24, PCM_16, or MULAW). + output_format: Audio format (wav or mp3). + resemble_sample_rate: Audio sample rate sent to the API. + """ + + precision: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_format: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + resemble_sample_rate: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + _aliases: ClassVar[Dict[str, str]] = { + "voice_id": "voice", + "sample_rate": "resemble_sample_rate", + } + + +class ResembleAITTSService(AudioContextTTSService): """Resemble AI TTS service with WebSocket streaming and word timestamps. Provides text-to-speech using Resemble AI's streaming WebSocket API. @@ -44,6 +64,8 @@ class ResembleAITTSService(AudioContextWordTTSService): multiple simultaneous synthesis requests with proper interruption support. """ + _settings: ResembleAITTSSettings + def __init__( self, *, @@ -69,17 +91,20 @@ class ResembleAITTSService(AudioContextWordTTSService): super().__init__( sample_rate=sample_rate, reuse_context_id_within_turn=False, + supports_word_timestamps=True, + settings=ResembleAITTSSettings( + model=None, + voice=voice_id, + language=None, + precision=precision, + output_format=output_format, + resemble_sample_rate=sample_rate, + ), **kwargs, ) self._api_key = api_key - self._voice_id = voice_id self._url = url - self._settings = { - "precision": precision, - "output_format": output_format, - "sample_rate": sample_rate, - } self._websocket = None self._request_id_counter = 0 @@ -100,8 +125,6 @@ class ResembleAITTSService(AudioContextWordTTSService): self._jitter_buffer_bytes = 44100 # ~1000ms at 22050Hz to handle 400ms+ network gaps self._playback_started: dict[str, bool] = {} # Track if we've started playback per request - self.set_voice(voice_id) - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -120,13 +143,13 @@ class ResembleAITTSService(AudioContextWordTTSService): JSON string containing the request payload. """ msg = { - "voice_uuid": self._voice_id, + "voice_uuid": self._settings.voice, "data": text, "binary_response": False, # Use JSON frames to get timestamps "request_id": self._request_id_counter, # ResembleAI only accepts number - "output_format": self._settings["output_format"], - "sample_rate": self._settings["sample_rate"], - "precision": self._settings["precision"], + "output_format": self._settings.output_format, + "sample_rate": self._settings.resemble_sample_rate, + "precision": self._settings.precision, "no_audio_header": True, } @@ -140,7 +163,7 @@ class ResembleAITTSService(AudioContextWordTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["sample_rate"] = self.sample_rate + self._settings.resemble_sample_rate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -222,16 +245,19 @@ class ResembleAITTSService(AudioContextWordTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by stopping current synthesis. - - Args: - frame: The interruption frame. - direction: The direction of frame processing. - """ - await super()._handle_interruption(frame, direction) + async def on_audio_context_interrupted(self, context_id: str): + """Stop metrics when the bot is interrupted.""" await self.stop_all_metrics() + async def on_audio_context_completed(self, context_id: str): + """Stop metrics after the Resemble AI context finishes playing. + + No close message is needed: Resemble AI signals completion with an + ``audio_end`` message (handled in ``_process_messages``), after which + the server-side context is already closed. + """ + pass + async def flush_audio(self): """Flush any pending audio and finalize the current context.""" logger.trace(f"{self}: flushing audio") diff --git a/src/pipecat/services/rime/tts.py b/src/pipecat/services/rime/tts.py index c4b1c870a..944ff4e58 100644 --- a/src/pipecat/services/rime/tts.py +++ b/src/pipecat/services/rime/tts.py @@ -12,7 +12,8 @@ using Rime's API for streaming and batch audio synthesis. import base64 import json -from typing import Any, AsyncGenerator, Mapping, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, ClassVar, Dict, Optional import aiohttp from loguru import logger @@ -30,9 +31,11 @@ from pipecat.frames.frames import ( TTSStoppedFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import ( - AudioContextWordTTSService, + AudioContextTTSService, InterruptibleTTSService, + TextAggregationMode, TTSService, ) from pipecat.transcriptions.language import Language, resolve_language @@ -68,7 +71,67 @@ def language_to_rime_language(language: Language) -> str: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) -class RimeTTSService(AudioContextWordTTSService): +@dataclass +class RimeTTSSettings(TTSSettings): + """Settings for Rime WS JSON and HTTP TTS services. + + Parameters: + audioFormat: Audio output format. + samplingRate: Audio sample rate. + segment: Text segmentation mode ("immediate", "bySentence", "never"). + speedAlpha: Speech speed multiplier (mistv2 only). + reduceLatency: Whether to reduce latency at potential quality cost (mistv2 only). + pauseBetweenBrackets: Whether to add pauses between bracketed content (mistv2 only). + phonemizeBetweenBrackets: Whether to phonemize bracketed content (mistv2 only). + noTextNormalization: Whether to disable text normalization (mistv2 only). + saveOovs: Whether to save out-of-vocabulary words (mistv2 only). + inlineSpeedAlpha: Inline speed control markup. + repetition_penalty: Token repetition penalty (arcana only, 1.0-2.0). + temperature: Sampling temperature (arcana only, 0.0-1.0). + top_p: Cumulative probability threshold (arcana only, 0.0-1.0). + """ + + audioFormat: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + samplingRate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + segment: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speedAlpha: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + reduceLatency: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pauseBetweenBrackets: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + phonemizeBetweenBrackets: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + noTextNormalization: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + saveOovs: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + inlineSpeedAlpha: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + repetition_penalty: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + top_p: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + _aliases: ClassVar[Dict[str, str]] = {"speaker": "voice"} + + +@dataclass +class RimeNonJsonTTSSettings(TTSSettings): + """Settings for Rime non-JSON WS TTS service. + + Parameters: + audioFormat: Audio output format. + samplingRate: Audio sample rate. + segment: Text segmentation mode ("immediate", "bySentence", "never"). + repetition_penalty: Token repetition penalty (1.0-2.0). + temperature: Sampling temperature (0.0-1.0). + top_p: Cumulative probability threshold (0.0-1.0). + """ + + audioFormat: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + samplingRate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + segment: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + repetition_penalty: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + top_p: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + _aliases: ClassVar[Dict[str, str]] = {"speaker": "voice"} + + +class RimeTTSService(AudioContextTTSService): """Text-to-Speech service using Rime's websocket API. Uses Rime's websocket JSON API to convert text to speech with word-level timing @@ -76,16 +139,18 @@ class RimeTTSService(AudioContextWordTTSService): within a turn. """ + _settings: RimeTTSSettings + class InputParams(BaseModel): """Configuration parameters for Rime TTS service. Parameters: language: Language for synthesis. Defaults to English. segment: Text segmentation mode ("immediate", "bySentence", "never"). + speed_alpha: Speech speed multiplier. repetition_penalty: Token repetition penalty (arcana only). temperature: Sampling temperature (arcana only). top_p: Cumulative probability threshold (arcana only). - speed_alpha: Speech speed multiplier (mistv2 only). reduce_latency: Whether to reduce latency at potential quality cost (mistv2 only). pause_between_brackets: Whether to add pauses between bracketed content (mistv2 only). phonemize_between_brackets: Whether to phonemize bracketed content (mistv2 only). @@ -95,12 +160,12 @@ class RimeTTSService(AudioContextWordTTSService): language: Optional[Language] = Language.EN segment: Optional[str] = None + speed_alpha: Optional[float] = None # Arcana params repetition_penalty: Optional[float] = None temperature: Optional[float] = None top_p: Optional[float] = None # Mistv2 params - speed_alpha: Optional[float] = None reduce_latency: Optional[bool] = None pause_between_brackets: Optional[bool] = None phonemize_between_brackets: Optional[bool] = None @@ -117,7 +182,8 @@ class RimeTTSService(AudioContextWordTTSService): sample_rate: Optional[int] = None, params: Optional[InputParams] = None, text_aggregator: Optional[BaseTextAggregator] = None, - aggregate_sentences: Optional[bool] = True, + text_aggregation_mode: Optional[TextAggregationMode] = None, + aggregate_sentences: Optional[bool] = None, **kwargs, ): """Initialize Rime TTS service. @@ -134,17 +200,48 @@ class RimeTTSService(AudioContextWordTTSService): .. deprecated:: 0.0.95 Use an LLMTextProcessor before the TTSService for custom text aggregation. - aggregate_sentences: Whether to aggregate sentences within the TTSService. + text_aggregation_mode: How to aggregate incoming text before synthesis. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + **kwargs: Additional arguments passed to parent class. """ # Initialize with parent class settings for proper frame handling + params = params or RimeTTSService.InputParams() + super().__init__( + text_aggregation_mode=text_aggregation_mode, aggregate_sentences=aggregate_sentences, push_text_frames=False, push_stop_frames=True, pause_frame_processing=True, + supports_word_timestamps=True, append_trailing_space=True, sample_rate=sample_rate, + settings=RimeTTSSettings( + model=model, + voice=voice_id, + audioFormat="pcm", + samplingRate=0, # updated in start() + language=self.language_to_service_language(params.language) + if params.language + else None, + segment=params.segment, + inlineSpeedAlpha=None, # Not applicable here + speedAlpha=params.speed_alpha, + # Arcana params + repetition_penalty=params.repetition_penalty, + temperature=params.temperature, + top_p=params.top_p, + # Mistv2 params + reduceLatency=params.reduce_latency, + pauseBetweenBrackets=params.pause_between_brackets, + phonemizeBetweenBrackets=params.phonemize_between_brackets, + noTextNormalization=params.no_text_normalization, + saveOovs=params.save_oovs, + ), **kwargs, ) @@ -154,16 +251,13 @@ class RimeTTSService(AudioContextWordTTSService): # The preferred way of taking advantage of Rime spelling is # to use an LLMTextProcessor and/or a text_transformer to identify # and insert these tags for the purpose of the TTS service alone. - self._text_aggregator = SkipTagsAggregator([("spell(", ")")]) - - self._params = params or RimeTTSService.InputParams() + self._text_aggregator = SkipTagsAggregator( + [("spell(", ")")], aggregation_type=self._text_aggregation_mode + ) # Store service configuration self._api_key = api_key self._url = url - self._voice_id = voice_id - self._model = model - self._settings = self._build_settings() # State tracking self._receive_task = None @@ -189,60 +283,49 @@ class RimeTTSService(AudioContextWordTTSService): """ return language_to_rime_language(language) - def _build_settings(self) -> dict: - """Build query params for the WebSocket URL based on the current model and params. + def _build_ws_params(self) -> dict[str, Any]: + """Build query params for the WebSocket URL from current settings. Returns: - Dictionary of query parameters. Only explicitly-set values are included. + Dictionary of query parameters for the WebSocket URL. + Only explicitly-set values are included. Boolean mistv2 params + are serialized with ``json.dumps()`` for the wire format. """ - settings = { - "speaker": self._voice_id, - "modelId": self._model, - "audioFormat": "pcm", - "samplingRate": self.sample_rate or 0, + params: dict[str, Any] = { + "speaker": self._settings.voice, + "modelId": self._settings.model, + "audioFormat": self._settings.audioFormat, + "samplingRate": self._settings.samplingRate, } - if self._params.language: - settings["lang"] = self.language_to_service_language(self._params.language) or "eng" - if self._params.segment is not None: - settings["segment"] = self._params.segment + if self._settings.language is not None: + params["lang"] = self._settings.language + if self._settings.segment is not None: + params["segment"] = self._settings.segment + if self._settings.speedAlpha is not None: + params["speedAlpha"] = self._settings.speedAlpha - if self._model == "arcana": - if self._params.repetition_penalty is not None: - settings["repetition_penalty"] = self._params.repetition_penalty - if self._params.temperature is not None: - settings["temperature"] = self._params.temperature - if self._params.top_p is not None: - settings["top_p"] = self._params.top_p + if self._settings.model == "arcana": + if self._settings.repetition_penalty is not None: + params["repetition_penalty"] = self._settings.repetition_penalty + if self._settings.temperature is not None: + params["temperature"] = self._settings.temperature + if self._settings.top_p is not None: + params["top_p"] = self._settings.top_p else: # mistv2/mist - if self._params.speed_alpha is not None: - settings["speedAlpha"] = self._params.speed_alpha - if self._params.reduce_latency is not None: - settings["reduceLatency"] = self._params.reduce_latency - if self._params.pause_between_brackets is not None: - settings["pauseBetweenBrackets"] = json.dumps(self._params.pause_between_brackets) - if self._params.phonemize_between_brackets is not None: - settings["phonemizeBetweenBrackets"] = json.dumps( - self._params.phonemize_between_brackets + if self._settings.reduceLatency is not None: + params["reduceLatency"] = self._settings.reduceLatency + if self._settings.pauseBetweenBrackets is not None: + params["pauseBetweenBrackets"] = json.dumps(self._settings.pauseBetweenBrackets) + if self._settings.phonemizeBetweenBrackets is not None: + params["phonemizeBetweenBrackets"] = json.dumps( + self._settings.phonemizeBetweenBrackets ) - if self._params.no_text_normalization is not None: - settings["noTextNormalization"] = json.dumps(self._params.no_text_normalization) - if self._params.save_oovs is not None: - settings["saveOovs"] = json.dumps(self._params.save_oovs) + if self._settings.noTextNormalization is not None: + params["noTextNormalization"] = json.dumps(self._settings.noTextNormalization) + if self._settings.saveOovs is not None: + params["saveOovs"] = json.dumps(self._settings.saveOovs) - return settings - - async def set_model(self, model: str): - """Update the TTS model and reconnect. - - Args: - model: The model name to use for synthesis. - """ - self._model = model - self._settings = self._build_settings() - await super().set_model(model) - if self._websocket: - await self._disconnect() - await self._connect() + return params # A set of Rime-specific helpers for text transformations def SPELL(text: str) -> str: @@ -269,72 +352,20 @@ class RimeTTSService(AudioContextWordTTSService): self._extra_msg_fields["inlineSpeedAlpha"] = ",".join(speed_vals + [str(speed)]) return f"[{text}]" - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect if necessary. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if necessary. Since all settings are WebSocket URL query parameters, any setting change requires reconnecting to apply the new values. """ - prev_settings = self._settings.copy() - await super()._update_settings(settings) + changed = await super()._update_settings(delta) - needs_reconnect = False - - if "voice" in settings or "voice_id" in settings: - self._settings["speaker"] = self._voice_id - if prev_settings.get("speaker") != self._voice_id: - logger.info(f"Switching TTS voice to: [{self._voice_id}]") - needs_reconnect = True - - if "model" in settings: - self._settings = self._build_settings() - needs_reconnect = True - - if "language" in settings: - new_lang = self.language_to_service_language(settings["language"]) - if new_lang and new_lang != prev_settings.get("lang"): - logger.info(f"Updating language to: [{new_lang}]") - self._settings["lang"] = new_lang - needs_reconnect = True - - # Arcana params - for key, settings_key in [ - ("repetition_penalty", "repetition_penalty"), - ("temperature", "temperature"), - ("top_p", "top_p"), - ]: - if key in settings and settings[key] != prev_settings.get(settings_key): - self._settings[settings_key] = settings[key] - needs_reconnect = True - - # Mistv2 params - for key, settings_key in [ - ("speed_alpha", "speedAlpha"), - ("reduce_latency", "reduceLatency"), - ]: - if key in settings and settings[key] != prev_settings.get(settings_key): - self._settings[settings_key] = settings[key] - needs_reconnect = True - - # Mistv2 boolean params (need json.dumps) - for key, settings_key in [ - ("pause_between_brackets", "pauseBetweenBrackets"), - ("phonemize_between_brackets", "phonemizeBetweenBrackets"), - ("no_text_normalization", "noTextNormalization"), - ("save_oovs", "saveOovs"), - ]: - if key in settings and json.dumps(settings[key]) != prev_settings.get(settings_key): - self._settings[settings_key] = json.dumps(settings[key]) - needs_reconnect = True - - if "segment" in settings and settings["segment"] != prev_settings.get("segment"): - self._settings["segment"] = settings["segment"] - needs_reconnect = True - - if needs_reconnect and self._websocket: + if changed and self._websocket: await self._disconnect() await self._connect() + return changed + def _build_msg(self, text: str = "") -> dict: """Build JSON message for Rime API.""" msg = {"text": text, "contextId": self.get_active_audio_context_id()} @@ -358,7 +389,7 @@ class RimeTTSService(AudioContextWordTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings = self._build_settings() + self._settings.samplingRate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -404,7 +435,8 @@ class RimeTTSService(AudioContextWordTTSService): if self._websocket and self._websocket.state is State.OPEN: return - params = "&".join(f"{k}={v}" for k, v in self._settings.items() if v is not None) + ws_params = self._build_ws_params() + params = "&".join(f"{k}={v}" for k, v in ws_params.items() if v is not None) url = f"{self._url}?{params}" headers = {"Authorization": f"Bearer {self._api_key}"} self._websocket = await websocket_connect(url, additional_headers=headers) @@ -435,14 +467,25 @@ class RimeTTSService(AudioContextWordTTSService): return self._websocket raise Exception("Websocket not connected") - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - """Handle interruption by clearing current context.""" - context_id = self.get_active_audio_context_id() - await super()._handle_interruption(frame, direction) + async def _close_context(self, context_id: str): + """Clear the Rime speech queue and stop metrics.""" await self.stop_all_metrics() if context_id: await self._get_websocket().send(json.dumps(self._build_clear_msg())) + async def on_audio_context_interrupted(self, context_id: str): + """Clear the Rime speech queue and stop metrics when the bot is interrupted.""" + await self._close_context(context_id) + + async def on_audio_context_completed(self, context_id: str): + """Clear server-side state and stop metrics after the Rime context finishes playing. + + Rime does not send a server-side completion signal (e.g. ``done`` / ``end_of_stream`` / + ``audio_end``), so we explicitly send a ``clear`` message to clean up + any residual server-side state once all audio has been delivered. + """ + await self._close_context(context_id) + def _calculate_word_times(self, words: list, starts: list, ends: list) -> list: """Calculate word timing pairs with proper spacing and punctuation. @@ -580,6 +623,8 @@ class RimeHttpTTSService(TTSService): Suitable for use cases where streaming is not required. """ + _settings: RimeTTSSettings + class InputParams(BaseModel): """Configuration parameters for Rime HTTP TTS service. @@ -621,27 +666,36 @@ class RimeHttpTTSService(TTSService): params: Additional configuration parameters. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or RimeHttpTTSService.InputParams() + super().__init__( + sample_rate=sample_rate, + settings=RimeTTSSettings( + model=model, + language=self.language_to_service_language(params.language) + if params.language + else "eng", + audioFormat="pcm", + samplingRate=0, + segment=None, + speedAlpha=params.speed_alpha, + reduceLatency=params.reduce_latency, + pauseBetweenBrackets=params.pause_between_brackets, + phonemizeBetweenBrackets=params.phonemize_between_brackets, + noTextNormalization=None, + saveOovs=None, + inlineSpeedAlpha=params.inline_speed_alpha if params.inline_speed_alpha else None, + repetition_penalty=None, + temperature=None, + top_p=None, + voice=voice_id, + ), + **kwargs, + ) + self._api_key = api_key self._session = aiohttp_session self._base_url = "https://users.rime.ai/v1/rime-tts" - self._settings = { - "lang": self.language_to_service_language(params.language) - if params.language - else "eng", - "speedAlpha": params.speed_alpha, - "reduceLatency": params.reduce_latency, - "pauseBetweenBrackets": params.pause_between_brackets, - "phonemizeBetweenBrackets": params.phonemize_between_brackets, - } - self.set_voice(voice_id) - self.set_model_name(model) - - if params.inline_speed_alpha: - self._settings["inlineSpeedAlpha"] = params.inline_speed_alpha def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -681,10 +735,18 @@ class RimeHttpTTSService(TTSService): "Content-Type": "application/json", } - payload = self._settings.copy() + payload = { + "lang": self._settings.language, + "speedAlpha": self._settings.speedAlpha, + "reduceLatency": self._settings.reduceLatency, + "pauseBetweenBrackets": self._settings.pauseBetweenBrackets, + "phonemizeBetweenBrackets": self._settings.phonemizeBetweenBrackets, + } + if self._settings.inlineSpeedAlpha is not None: + payload["inlineSpeedAlpha"] = self._settings.inlineSpeedAlpha payload["text"] = text - payload["speaker"] = self._voice_id - payload["modelId"] = self._model_name + payload["speaker"] = self._settings.voice + payload["modelId"] = self._settings.model payload["samplingRate"] = self.sample_rate # Arcana does not support PCM audio @@ -743,6 +805,8 @@ class RimeNonJsonTTSService(InterruptibleTTSService): accepts and returns non-JSON messages. """ + _settings: RimeNonJsonTTSSettings + class InputParams(BaseModel): """Configuration parameters for Rime Non-JSON WebSocket TTS service. @@ -772,7 +836,8 @@ class RimeNonJsonTTSService(InterruptibleTTSService): audio_format: str = "pcm", sample_rate: Optional[int] = None, params: Optional[InputParams] = None, - aggregate_sentences: Optional[bool] = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, **kwargs, ): """Initialize Rime Non-JSON WebSocket TTS service. @@ -785,41 +850,44 @@ class RimeNonJsonTTSService(InterruptibleTTSService): audio_format: Audio format to use. sample_rate: Audio sample rate in Hz. params: Additional configuration parameters. - aggregate_sentences: Whether to aggregate sentences within the TTSService. + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. Set to ``TextAggregationMode.SENTENCE`` + to aggregate text into sentences before synthesis, or + ``TextAggregationMode.TOKEN`` to stream tokens directly for lower latency. + + text_aggregation_mode: How to aggregate text before synthesis. **kwargs: Additional arguments passed to parent class. """ + params = params or RimeNonJsonTTSService.InputParams() super().__init__( sample_rate=sample_rate, aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, push_stop_frames=True, pause_frame_processing=True, + append_trailing_space=True, + settings=RimeNonJsonTTSSettings( + voice=voice_id, + model=model, + audioFormat=audio_format, + samplingRate=sample_rate, + language=self.language_to_service_language(params.language) + if params.language + else None, + segment=params.segment, + repetition_penalty=params.repetition_penalty, + temperature=params.temperature, + top_p=params.top_p, + ), **kwargs, ) - params = params or RimeNonJsonTTSService.InputParams() self._api_key = api_key self._url = url - self._voice_id = voice_id - self._model = model - self._settings = { - "speaker": voice_id, - "modelId": model, - "audioFormat": audio_format, - "samplingRate": sample_rate, - } - - if params.language: - self._settings["lang"] = self.language_to_service_language(params.language) - if params.segment is not None: - self._settings["segment"] = params.segment - if params.repetition_penalty is not None: - self._settings["repetition_penalty"] = params.repetition_penalty - if params.temperature is not None: - self._settings["temperature"] = params.temperature - if params.top_p is not None: - self._settings["top_p"] = params.top_p # Add any extra parameters for future compatibility if params.extra: - self._settings.update(params.extra) + self._settings.extra.update(params.extra) self._receive_task = None self._context_id: Optional[str] = None @@ -851,7 +919,7 @@ class RimeNonJsonTTSService(InterruptibleTTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["samplingRate"] = self.sample_rate + self._settings.samplingRate = self.sample_rate await self._connect() async def stop(self, frame: EndFrame): @@ -895,8 +963,26 @@ class RimeNonJsonTTSService(InterruptibleTTSService): try: if self._websocket and self._websocket.state is State.OPEN: return - # Build URL with query parameters (only non-None values) - params = "&".join(f"{k}={v}" for k, v in self._settings.items() if v is not None) + # Build URL with query parameters (only given, non-None values) + settings_dict = { + "speaker": self._settings.voice, + "modelId": self._settings.model, + "audioFormat": self._settings.audioFormat, + "samplingRate": self._settings.samplingRate, + } + if self._settings.language is not None: + settings_dict["lang"] = self._settings.language + if self._settings.segment is not None: + settings_dict["segment"] = self._settings.segment + if self._settings.repetition_penalty is not None: + settings_dict["repetition_penalty"] = self._settings.repetition_penalty + if self._settings.temperature is not None: + settings_dict["temperature"] = self._settings.temperature + if self._settings.top_p is not None: + settings_dict["top_p"] = self._settings.top_p + # Include extras + settings_dict.update(self._settings.extra) + params = "&".join(f"{k}={v}" for k, v in settings_dict.items() if v is not None) url = f"{self._url}?{params}" headers = {"Authorization": f"Bearer {self._api_key}"} self._websocket = await websocket_connect( @@ -990,68 +1076,17 @@ class RimeNonJsonTTSService(InterruptibleTTSService): except Exception as e: yield ErrorFrame(error=f"Unknown error occurred: {e}") - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect if necessary. + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and reconnect if necessary. Since all settings are WebSocket URL query parameters, any setting change requires reconnecting to apply the new values. """ - needs_reconnect = False + changed = await super()._update_settings(delta) - # Track previous values from self._settings only - prev_settings = self._settings.copy() - - # Let parent class handle standard settings (voice, model, language) - await super()._update_settings(settings) - - # Check if voice changed and update settings dict - if "voice" in settings or "voice_id" in settings: - self._settings["speaker"] = self._voice_id - if prev_settings.get("speaker") != self._voice_id: - logger.info(f"Switching TTS voice to: [{self._voice_id}]") - needs_reconnect = True - - # Check if model changed and update settings dict - if "model" in settings: - self._settings["modelId"] = self._model - if prev_settings.get("modelId") != self._model: - logger.info(f"Switching TTS model to: [{self._model}]") - needs_reconnect = True - - # Handle language explicitly - if "language" in settings: - new_lang = self.language_to_service_language(settings["language"]) - if new_lang and new_lang != prev_settings.get("lang"): - logger.info(f"Updating language to: [{new_lang}]") - self._settings["lang"] = new_lang - needs_reconnect = True - - # Check other parameters - for key in ["segment", "repetition_penalty", "temperature", "top_p"]: - if key in settings and settings[key] != prev_settings.get(key): - logger.info(f"Updating {key} to: [{settings[key]}]") - self._settings[key] = settings[key] - needs_reconnect = True - - # Handle extra parameters - for key, value in settings.items(): - if key not in [ - "voice", - "voice_id", - "model", - "language", - "segment", - "repetition_penalty", - "temperature", - "top_p", - ]: - if value != prev_settings.get(key): - logger.info(f"Updating extra parameter {key} to: [{value}]") - self._settings[key] = value - needs_reconnect = True - - # Reconnect if any setting changed - if needs_reconnect: + if changed: logger.debug("Settings changed, reconnecting WebSocket with new parameters") await self._disconnect() await self._connect() + + return changed diff --git a/src/pipecat/services/sambanova/llm.py b/src/pipecat/services/sambanova/llm.py index 047ce0e6c..016e1740d 100644 --- a/src/pipecat/services/sambanova/llm.py +++ b/src/pipecat/services/sambanova/llm.py @@ -84,19 +84,19 @@ class SambaNovaLLMService(OpenAILLMService): # type: ignore Dictionary of parameters for the chat completion request. """ params = { - "model": self.model_name, + "model": self._settings.model, "stream": True, "stream_options": {"include_usage": True}, - "temperature": self._settings["temperature"], - "top_p": self._settings["top_p"], - "max_tokens": self._settings["max_tokens"], - "max_completion_tokens": self._settings["max_completion_tokens"], + "temperature": self._settings.temperature, + "top_p": self._settings.top_p, + "max_tokens": self._settings.max_tokens, + "max_completion_tokens": self._settings.max_completion_tokens, } # Messages, tools, tool_choice params.update(params_from_context) - params.update(self._settings["extra"]) + params.update(self._settings.extra) return params @traced_llm # type: ignore diff --git a/src/pipecat/services/sambanova/stt.py b/src/pipecat/services/sambanova/stt.py index a1cbe8a22..f313f0d7b 100644 --- a/src/pipecat/services/sambanova/stt.py +++ b/src/pipecat/services/sambanova/stt.py @@ -72,7 +72,7 @@ class SambaNovaSTTService(BaseWhisperSTTService): # type: ignore # Build kwargs dict with only set parameters kwargs = { "file": ("audio.wav", audio, "audio/wav"), - "model": self.model_name, + "model": self._settings.model, "response_format": "json", "language": self._language, } diff --git a/src/pipecat/services/sarvam/stt.py b/src/pipecat/services/sarvam/stt.py index e8f2d55ce..e368ceb02 100644 --- a/src/pipecat/services/sarvam/stt.py +++ b/src/pipecat/services/sarvam/stt.py @@ -12,8 +12,8 @@ can handle multiple audio formats for Indian language speech recognition. """ import base64 -from dataclasses import dataclass -from typing import AsyncGenerator, Dict, Literal, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Dict, Literal, Optional from loguru import logger from pydantic import BaseModel @@ -32,6 +32,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.sarvam._sdk import sdk_headers +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven, is_given from pipecat.services.stt_latency import SARVAM_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language, resolve_language @@ -130,6 +131,23 @@ MODEL_CONFIGS: Dict[str, ModelConfig] = { } +@dataclass +class SarvamSTTSettings(STTSettings): + """Settings for the Sarvam STT service. + + Parameters: + prompt: Optional prompt to guide transcription/translation style. + mode: Mode of operation (transcribe, translate, verbatim, etc.). + vad_signals: Enable VAD signals in response. + high_vad_sensitivity: Enable high VAD sensitivity. + """ + + prompt: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + mode: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + vad_signals: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + high_vad_sensitivity: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class SarvamSTTService(STTService): """Sarvam speech-to-text service. @@ -148,6 +166,8 @@ class SarvamSTTService(STTService): ... """ + _settings: SarvamSTTSettings + class InputParams(BaseModel): """Configuration parameters for Sarvam STT service. @@ -220,50 +240,41 @@ class SarvamSTTService(STTService): f"Model '{model}' does not support language parameter (auto-detects language)." ) + # Resolve mode default from model config + mode = params.mode if params.mode is not None else self._config.default_mode + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, keepalive_timeout=keepalive_timeout, keepalive_interval=keepalive_interval, + settings=SarvamSTTSettings( + model=model, + language=params.language, + prompt=params.prompt, + mode=mode, + vad_signals=params.vad_signals, + high_vad_sensitivity=params.high_vad_sensitivity, + ), **kwargs, ) - self.set_model_name(model) self._api_key = api_key - self._language_code: Optional[Language] = params.language - - # Set language string: use provided language or model's default - if params.language: - self._language_string = language_to_sarvam_language(params.language) - else: - self._language_string = self._config.default_language - - self._prompt = params.prompt - - # Set mode: use provided mode or model's default - self._mode = params.mode if params.mode is not None else self._config.default_mode # Store connection parameters - self._vad_signals = params.vad_signals - self._high_vad_sensitivity = params.high_vad_sensitivity self._input_audio_codec = input_audio_codec # Initialize Sarvam SDK client self._sdk_headers = sdk_headers() - # NOTE: We avoid passing non-standard kwargs here because different sarvamai - # versions expose different constructor signatures (static type checkers - # complain otherwise). We instead inject headers best-effort below. - self._sarvam_client = AsyncSarvamAI(api_subscription_key=api_key) - for attr in ("default_headers", "_default_headers", "headers", "_headers"): - d = getattr(self._sarvam_client, attr, None) - if isinstance(d, dict): - d.update(self._sdk_headers) - break + # Pass Pipecat SDK headers directly at client construction time so they are + # merged by the Sarvam SDK's client wrapper and consistently applied to + # WebSocket handshake requests. + self._sarvam_client = AsyncSarvamAI(api_subscription_key=api_key, headers=self._sdk_headers) self._websocket_context = None self._socket_client = None self._receive_task = None - if self._vad_signals: + if params.vad_signals: self._register_event_handler("on_speech_started") self._register_event_handler("on_speech_stopped") self._register_event_handler("on_utterance_end") @@ -281,6 +292,12 @@ class SarvamSTTService(STTService): """ return language_to_sarvam_language(language) + def _get_language_string(self) -> Optional[str]: + """Resolve the current language setting to a Sarvam language code string.""" + if self._settings.language: + return language_to_sarvam_language(self._settings.language) + return self._config.default_language + def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -298,50 +315,91 @@ class SarvamSTTService(STTService): await super().process_frame(frame, direction) # Only handle VAD frames when not using Sarvam's VAD signals - if not self._vad_signals: + if not self._settings.vad_signals: if isinstance(frame, VADUserStartedSpeakingFrame): await self._start_metrics() elif isinstance(frame, VADUserStoppedSpeakingFrame): if self._socket_client: await self._socket_client.flush() - async def set_language(self, language: Language): - """Set the recognition language and reconnect. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta, validate, sync state, and reconnect. Args: - language: The language to use for speech recognition. + delta: A :class:`STTSettings` (or ``SarvamSTTSettings``) delta. + + Returns: + Dict mapping changed field names to their previous values. Raises: - ValueError: If called on a model that auto-detects language. + ValueError: If a setting is not supported by the current model. """ - if not self._config.supports_language: - raise ValueError( - f"Model '{self.model_name}' does not support language parameter " - "(auto-detects language)." - ) + # Validate against model capabilities before applying + if is_given(delta.language) and delta.language is not None: + if not self._config.supports_language: + raise ValueError( + f"Model '{self._settings.model}' does not support language parameter " + "(auto-detects language)." + ) - logger.info(f"Switching STT language to: [{language}]") - self._language_code = language - self._language_string = language_to_sarvam_language(language) - await self._disconnect() - await self._connect() + if isinstance(delta, SarvamSTTSettings): + if is_given(delta.prompt) and delta.prompt is not None: + if not self._config.supports_prompt: + raise ValueError( + f"Model '{self._settings.model}' does not support prompt parameter." + ) + if is_given(delta.mode) and delta.mode is not None: + if not self._config.supports_mode: + raise ValueError( + f"Model '{self._settings.model}' does not support mode parameter." + ) + + changed = await super()._update_settings(delta) + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # if not changed: + # return changed + + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed async def set_prompt(self, prompt: Optional[str]): """Set the transcription/translation prompt and reconnect. + .. deprecated:: + Use ``STTUpdateSettingsFrame(SarvamSTTSettings(prompt=...))`` instead. + Args: prompt: Prompt text to guide transcription/translation style/context. Pass None to clear/disable prompt. Only applicable to models that support prompts. """ + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + f"{self.__class__.__name__}.set_prompt() is deprecated. " + "Use STTUpdateSettingsFrame(SarvamSTTSettings(prompt=...)) instead.", + DeprecationWarning, + stacklevel=2, + ) + if not self._config.supports_prompt: if prompt is not None: - raise ValueError(f"Model '{self.model_name}' does not support prompt parameter.") + raise ValueError( + f"Model '{self._settings.model}' does not support prompt parameter." + ) # If prompt is None and model doesn't support prompts, silently return (no-op) return - logger.info(f"Updating {self.model_name} prompt.") - self._prompt = prompt + logger.info(f"Updating {self._settings.model} prompt.") + self._settings.prompt = prompt await self._disconnect() await self._connect() @@ -422,51 +480,58 @@ class SarvamSTTService(STTService): try: # Build common connection parameters connect_kwargs = { - "model": self.model_name, + "model": self._settings.model, "sample_rate": str(self.sample_rate), } # Enable flush signal when using Pipecat's VAD (not Sarvam's) so that # the flush() call on user-stopped-speaking is honored by the server. - if not self._vad_signals: + if not self._settings.vad_signals: connect_kwargs["flush_signal"] = "true" # Only send vad parameters when explicitly set (avoid overriding server defaults) - if self._vad_signals is not None: - connect_kwargs["vad_signals"] = "true" if self._vad_signals else "false" - if self._high_vad_sensitivity is not None: + if self._settings.vad_signals is not None: + connect_kwargs["vad_signals"] = "true" if self._settings.vad_signals else "false" + if self._settings.high_vad_sensitivity is not None: connect_kwargs["high_vad_sensitivity"] = ( - "true" if self._high_vad_sensitivity else "false" + "true" if self._settings.high_vad_sensitivity else "false" ) # Add language_code for models that support it - if self._language_string is not None: - connect_kwargs["language_code"] = self._language_string + language_string = self._get_language_string() + if language_string is not None: + connect_kwargs["language_code"] = language_string # Add mode for models that support it - if self._config.supports_mode and self._mode is not None: - connect_kwargs["mode"] = self._mode + if self._config.supports_mode and self._settings.mode is not None: + connect_kwargs["mode"] = self._settings.mode # Prompt support differs across sarvamai versions. Prefer connect-time prompt # when available and gracefully degrade if the SDK doesn't accept it. - if self._prompt is not None and self._config.supports_prompt: - connect_kwargs["prompt"] = self._prompt + if self._settings.prompt is not None and self._config.supports_prompt: + connect_kwargs["prompt"] = self._settings.prompt def _connect_with_sdk_headers(connect_fn, **kwargs): - # Different SDK versions may use different kwarg names. # If prompt is unsupported at connect-time, retry without it. + # Headers are supplied through request_options because this is a + # documented SDK parameter that survives SDK signature changes. + request_options = {"additional_headers": self._sdk_headers} + attempts = [kwargs] if "prompt" in kwargs: attempts.append({k: v for k, v in kwargs.items() if k != "prompt"}) last_type_error = None for attempt_kwargs in attempts: - for header_kw in ("headers", "additional_headers", "extra_headers"): - try: - return connect_fn(**attempt_kwargs, **{header_kw: self._sdk_headers}) - except TypeError as e: - last_type_error = e try: + return connect_fn( + **attempt_kwargs, + request_options=request_options, + ) + except TypeError as e: + last_type_error = e + try: + # Fallback for SDK builds that don't expose request_options. return connect_fn(**attempt_kwargs) except TypeError as e: last_type_error = e @@ -491,10 +556,10 @@ class SarvamSTTService(STTService): self._socket_client = await self._websocket_context.__aenter__() # Fallback for SDKs that support runtime prompt updates. - if self._prompt is not None and self._config.supports_prompt: + if self._settings.prompt is not None and self._config.supports_prompt: prompt_setter = getattr(self._socket_client, "set_prompt", None) if callable(prompt_setter): - await prompt_setter(self._prompt) + await prompt_setter(self._settings.prompt) # Register event handler for incoming messages def _message_handler(message): @@ -579,7 +644,7 @@ class SarvamSTTService(STTService): logger.debug("User started speaking") await self._call_event_handler("on_speech_started") await self.broadcast_frame(UserStartedSpeakingFrame) - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() elif signal == "END_SPEECH": logger.debug("User stopped speaking") @@ -592,10 +657,12 @@ class SarvamSTTService(STTService): # Prefer language from message (auto-detected for translate models). Fallback to configured. if language_code: language = self._map_language_code_to_enum(language_code) - elif self._language_string: - language = self._map_language_code_to_enum(self._language_string) else: - language = Language.HI_IN + language_string = self._get_language_string() + if language_string: + language = self._map_language_code_to_enum(language_string) + else: + language = Language.HI_IN # Emit utterance end event await self._call_event_handler("on_utterance_end") diff --git a/src/pipecat/services/sarvam/tts.py b/src/pipecat/services/sarvam/tts.py index 753293c75..c18933407 100644 --- a/src/pipecat/services/sarvam/tts.py +++ b/src/pipecat/services/sarvam/tts.py @@ -40,9 +40,9 @@ See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream for full API import asyncio import base64 import json -from dataclasses import dataclass +from dataclasses import dataclass, field from enum import Enum -from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional, Tuple +from typing import Any, AsyncGenerator, ClassVar, Dict, List, Optional, Tuple import aiohttp from loguru import logger @@ -62,7 +62,8 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.sarvam._sdk import sdk_headers -from pipecat.services.tts_service import InterruptibleTTSService, TTSService +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven +from pipecat.services.tts_service import InterruptibleTTSService, TextAggregationMode, TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -244,6 +245,80 @@ def language_to_sarvam_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=False) +@dataclass +class SarvamHttpTTSSettings(TTSSettings): + """Settings for Sarvam HTTP TTS service. + + Parameters: + language: Sarvam language code. + enable_preprocessing: Whether to enable text preprocessing. Defaults to False. + **Note:** Always enabled for bulbul:v3-beta (cannot be disabled). + pace: Speech pace multiplier. Defaults to 1.0. + - bulbul:v2: Range 0.3 to 3.0 + - bulbul:v3-beta: Range 0.5 to 2.0 + pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0. + **Note:** Only supported for bulbul:v2. Ignored for v3 models. + loudness: Volume multiplier (0.3 to 3.0). Defaults to 1.0. + **Note:** Only supported for bulbul:v2. Ignored for v3 models. + temperature: Controls output randomness for bulbul:v3-beta (0.01 to 1.0). + Lower values = more deterministic, higher = more random. Defaults to 0.6. + **Note:** Only supported for bulbul:v3-beta. Ignored for v2. + sample_rate: Audio sample rate. + """ + + language: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_preprocessing: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pace: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pitch: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + loudness: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + sarvam_sample_rate: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class SarvamTTSSettings(TTSSettings): + """Settings for Sarvam WebSocket TTS service. + + Parameters: + language: Sarvam language code (e.g. ``"hi-IN"``). Uses the standard + ``TTSSettings.language`` field. + speech_sample_rate: Audio sample rate as string. + enable_preprocessing: Enable text preprocessing. Defaults to False. + **Note:** Always enabled for bulbul:v3-beta. + min_buffer_size: Minimum characters to buffer before generating audio. + Lower values reduce latency but may affect quality. Defaults to 50. + max_chunk_length: Maximum characters processed in a single chunk. + Controls memory usage and processing efficiency. Defaults to 150. + output_audio_codec: Audio codec format. Options: linear16, mulaw, alaw, + opus, flac, aac, wav, mp3. Defaults to "linear16". + output_audio_bitrate: Audio bitrate (32k, 64k, 96k, 128k, 192k). + Defaults to "128k". + pace: Speech pace multiplier. Defaults to 1.0. + - bulbul:v2: Range 0.3 to 3.0 + - bulbul:v3-beta: Range 0.5 to 2.0 + pitch: Voice pitch adjustment (-0.75 to 0.75). Defaults to 0.0. + **Note:** Only supported for bulbul:v2. Ignored for v3 models. + loudness: Volume multiplier (0.3 to 3.0). Defaults to 1.0. + **Note:** Only supported for bulbul:v2. Ignored for v3 models. + temperature: Controls output randomness for bulbul:v3-beta (0.01 to 1.0). + Lower = more deterministic, higher = more random. Defaults to 0.6. + **Note:** Only supported for bulbul:v3-beta. Ignored for v2. + """ + + _aliases: ClassVar[Dict[str, str]] = {"target_language_code": "language"} + + speech_sample_rate: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_preprocessing: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + min_buffer_size: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + max_chunk_length: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_audio_codec: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + output_audio_bitrate: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pace: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + pitch: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + loudness: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class SarvamHttpTTSService(TTSService): """Text-to-Speech service using Sarvam AI's API. @@ -296,6 +371,8 @@ class SarvamHttpTTSService(TTSService): ) """ + _settings: SarvamHttpTTSSettings + class InputParams(BaseModel): """Input parameters for Sarvam TTS configuration. @@ -383,18 +460,12 @@ class SarvamHttpTTSService(TTSService): if sample_rate is None: sample_rate = self._config.default_sample_rate - super().__init__(sample_rate=sample_rate, **kwargs) - params = params or SarvamHttpTTSService.InputParams() # Set default voice based on model if not specified if voice_id is None: voice_id = self._config.default_speaker - self._api_key = api_key - self._base_url = base_url - self._session = aiohttp_session - # Validate and clamp pace to model's valid range pace = params.pace pace_min, pace_max = self._config.pace_range @@ -402,37 +473,49 @@ class SarvamHttpTTSService(TTSService): logger.warning(f"Pace {pace} is outside model range ({pace_min}-{pace_max}). Clamping.") pace = max(pace_min, min(pace_max, pace)) - # Build base settings - self._settings = { - "language": ( - self.language_to_service_language(params.language) if params.language else "en-IN" + super().__init__( + sample_rate=sample_rate, + settings=SarvamHttpTTSSettings( + language=( + self.language_to_service_language(params.language) + if params.language + else "en-IN" + ), + enable_preprocessing=( + True + if self._config.preprocessing_always_enabled + else params.enable_preprocessing + ), + pace=pace, + pitch=None, + loudness=None, + temperature=None, + model=model, + voice=voice_id, ), - "enable_preprocessing": ( - True if self._config.preprocessing_always_enabled else params.enable_preprocessing - ), - "pace": pace, - "model": model, - } + **kwargs, + ) + + self._api_key = api_key + self._base_url = base_url + self._session = aiohttp_session # Add parameters based on model support if self._config.supports_pitch: - self._settings["pitch"] = params.pitch + self._settings.pitch = params.pitch elif params.pitch != 0.0: logger.warning(f"pitch parameter is ignored for {model}") if self._config.supports_loudness: - self._settings["loudness"] = params.loudness + self._settings.loudness = params.loudness elif params.loudness != 1.0: logger.warning(f"loudness parameter is ignored for {model}") if self._config.supports_temperature: - self._settings["temperature"] = params.temperature + self._settings.temperature = params.temperature elif params.temperature != 0.6: logger.warning(f"temperature parameter is ignored for {model}") - self.set_model_name(model) - self.set_voice(voice_id) - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -459,7 +542,7 @@ class SarvamHttpTTSService(TTSService): frame: The start frame containing initialization parameters. """ await super().start(frame) - self._settings["sample_rate"] = self.sample_rate + self._settings.sarvam_sample_rate = self.sample_rate @traced_tts async def run_tts(self, text: str, context_id: str) -> AsyncGenerator[Frame, None]: @@ -480,21 +563,25 @@ class SarvamHttpTTSService(TTSService): # Build payload with common parameters payload = { "text": text, - "target_language_code": self._settings["language"], - "speaker": self._voice_id, + "target_language_code": self._settings.language, + "speaker": self._settings.voice, "sample_rate": self.sample_rate, - "enable_preprocessing": self._settings["enable_preprocessing"], - "model": self._model_name, - "pace": self._settings.get("pace", 1.0), + "enable_preprocessing": self._settings.enable_preprocessing, + "model": self._settings.model, + "pace": self._settings.pace if self._settings.pace is not None else 1.0, } # Add model-specific parameters based on config if self._config.supports_pitch: - payload["pitch"] = self._settings.get("pitch", 0.0) + payload["pitch"] = self._settings.pitch if self._settings.pitch is not None else 0.0 if self._config.supports_loudness: - payload["loudness"] = self._settings.get("loudness", 1.0) + payload["loudness"] = ( + self._settings.loudness if self._settings.loudness is not None else 1.0 + ) if self._config.supports_temperature: - payload["temperature"] = self._settings.get("temperature", 0.6) + payload["temperature"] = ( + self._settings.temperature if self._settings.temperature is not None else 0.6 + ) headers = { "api-subscription-key": self._api_key, @@ -605,6 +692,8 @@ class SarvamTTSService(InterruptibleTTSService): See https://docs.sarvam.ai/api-reference-docs/text-to-speech/stream for API details. """ + _settings: SarvamTTSSettings + class InputParams(BaseModel): """Configuration parameters for Sarvam TTS WebSocket service. @@ -696,7 +785,8 @@ class SarvamTTSService(InterruptibleTTSService): model: str = "bulbul:v2", voice_id: Optional[str] = None, url: str = "wss://api.sarvam.ai/text-to-speech/ws", - aggregate_sentences: Optional[bool] = True, + aggregate_sentences: Optional[bool] = None, + text_aggregation_mode: Optional[TextAggregationMode] = None, sample_rate: Optional[int] = None, params: Optional[InputParams] = None, **kwargs, @@ -710,7 +800,12 @@ class SarvamTTSService(InterruptibleTTSService): - "bulbul:v3-beta": Advanced model with temperature control voice_id: Speaker voice ID. If None, uses model-appropriate default. url: WebSocket URL for the TTS backend (default production URL). - aggregate_sentences: Merge multiple sentences into one audio chunk (default True). + aggregate_sentences: Deprecated. Use text_aggregation_mode instead. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. + + text_aggregation_mode: How to aggregate text before synthesis. sample_rate: Output audio sample rate in Hz (8000, 16000, 22050, 24000). If None, uses model-specific default. params: Optional input parameters to override defaults. @@ -729,26 +824,11 @@ class SarvamTTSService(InterruptibleTTSService): if sample_rate is None: sample_rate = self._config.default_sample_rate - # Initialize parent class first - super().__init__( - aggregate_sentences=aggregate_sentences, - push_text_frames=True, - pause_frame_processing=True, - push_stop_frames=True, - sample_rate=sample_rate, - **kwargs, - ) - params = params or SarvamTTSService.InputParams() - # Set default voice based on model if not specified if voice_id is None: voice_id = self._config.default_speaker - # WebSocket endpoint URL with model query parameter - self._websocket_url = f"{url}?model={model}" - self._api_key = api_key - self.set_model_name(model) - self.set_voice(voice_id) + params = params or SarvamTTSService.InputParams() # Validate and clamp pace to model's valid range pace = params.pace @@ -757,37 +837,57 @@ class SarvamTTSService(InterruptibleTTSService): logger.warning(f"Pace {pace} is outside model range ({pace_min}-{pace_max}). Clamping.") pace = max(pace_min, min(pace_max, pace)) - # Build base settings - self._settings = { - "target_language_code": ( - self.language_to_service_language(params.language) if params.language else "en-IN" + # Initialize parent class first + super().__init__( + aggregate_sentences=aggregate_sentences, + text_aggregation_mode=text_aggregation_mode, + push_text_frames=True, + pause_frame_processing=True, + push_stop_frames=True, + sample_rate=sample_rate, + settings=SarvamTTSSettings( + language=( + self.language_to_service_language(params.language) + if params.language + else "en-IN" + ), + speech_sample_rate=str(sample_rate), + enable_preprocessing=( + True + if self._config.preprocessing_always_enabled + else params.enable_preprocessing + ), + min_buffer_size=params.min_buffer_size, + max_chunk_length=params.max_chunk_length, + output_audio_codec=params.output_audio_codec, + output_audio_bitrate=params.output_audio_bitrate, + pace=pace, + pitch=None, + loudness=None, + temperature=None, + model=model, + voice=voice_id, ), - "speaker": voice_id, - "speech_sample_rate": str(sample_rate), - "enable_preprocessing": ( - True if self._config.preprocessing_always_enabled else params.enable_preprocessing - ), - "min_buffer_size": params.min_buffer_size, - "max_chunk_length": params.max_chunk_length, - "output_audio_codec": params.output_audio_codec, - "output_audio_bitrate": params.output_audio_bitrate, - "pace": pace, - "model": model, - } + **kwargs, + ) + + # WebSocket endpoint URL with model query parameter + self._websocket_url = f"{url}?model={model}" + self._api_key = api_key # Add parameters based on model support if self._config.supports_pitch: - self._settings["pitch"] = params.pitch + self._settings.pitch = params.pitch elif params.pitch != 0.0: logger.warning(f"pitch parameter is ignored for {model}") if self._config.supports_loudness: - self._settings["loudness"] = params.loudness + self._settings.loudness = params.loudness elif params.loudness != 1.0: logger.warning(f"loudness parameter is ignored for {model}") if self._config.supports_temperature: - self._settings["temperature"] = params.temperature + self._settings.temperature = params.temperature elif params.temperature != 0.6: logger.warning(f"temperature parameter is ignored for {model}") @@ -823,7 +923,7 @@ class SarvamTTSService(InterruptibleTTSService): await super().start(frame) # WebSocket API expects sample rate as string - self._settings["speech_sample_rate"] = str(self.sample_rate) + self._settings.speech_sample_rate = str(self.sample_rate) await self._connect() async def stop(self, frame: EndFrame): @@ -870,14 +970,15 @@ class SarvamTTSService(InterruptibleTTSService): if isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): await self.flush_audio() - async def _update_settings(self, settings: Mapping[str, Any]): - """Update service settings and reconnect if voice changed.""" - prev_voice = self._voice_id - await super()._update_settings(settings) - if not prev_voice == self._voice_id: - logger.info(f"Switching TTS voice to: [{self._voice_id}]") + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a settings delta and resend config if voice changed.""" + changed = await super()._update_settings(delta) + + if changed: await self._send_config() + return changed + async def _connect(self): """Connect to Sarvam WebSocket and start background tasks.""" await super()._connect() @@ -912,12 +1013,14 @@ class SarvamTTSService(InterruptibleTTSService): if self._websocket and self._websocket.state is State.OPEN: return + ws_additional_headers = { + "api-subscription-key": self._api_key, + **sdk_headers(), + } + self._websocket = await websocket_connect( self._websocket_url, - additional_headers={ - "api-subscription-key": self._api_key, - **sdk_headers(), - }, + additional_headers=ws_additional_headers, ) logger.debug("Connected to Sarvam TTS Websocket") await self._send_config() @@ -934,9 +1037,27 @@ class SarvamTTSService(InterruptibleTTSService): """Send initial configuration message.""" if not self._websocket: raise Exception("WebSocket not connected") - self._settings["speaker"] = self._voice_id - logger.debug(f"Config being sent is {self._settings}") - config_message = {"type": "config", "data": self._settings} + # Build config dict for the API + config_data = { + "target_language_code": self._settings.language, + "speaker": self._settings.voice, + "speech_sample_rate": self._settings.speech_sample_rate, + "enable_preprocessing": self._settings.enable_preprocessing, + "min_buffer_size": self._settings.min_buffer_size, + "max_chunk_length": self._settings.max_chunk_length, + "output_audio_codec": self._settings.output_audio_codec, + "output_audio_bitrate": self._settings.output_audio_bitrate, + "pace": self._settings.pace, + "model": self._settings.model, + } + if self._settings.pitch is not None: + config_data["pitch"] = self._settings.pitch + if self._settings.loudness is not None: + config_data["loudness"] = self._settings.loudness + if self._settings.temperature is not None: + config_data["temperature"] = self._settings.temperature + logger.debug(f"Config being sent is {config_data}") + config_message = {"type": "config", "data": config_data} try: await self._websocket.send(json.dumps(config_message)) diff --git a/src/pipecat/services/settings.py b/src/pipecat/services/settings.py new file mode 100644 index 000000000..5d215273f --- /dev/null +++ b/src/pipecat/services/settings.py @@ -0,0 +1,433 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Settings infrastructure for Pipecat AI services. + +Each service type has a settings dataclass (``LLMSettings``, ``TTSSettings``, +``STTSettings``, or a service-specific subclass). The same class is used in +two distinct modes: + +**Store mode** — the service's ``self._settings`` object that holds the full +current state. Every field must have a real value; ``NOT_GIVEN`` is never +valid here. Services that don't support an inherited field should set it to +``None``. ``validate_complete()`` (called automatically in +``AIService.start()``) enforces this invariant. + +**Delta mode** — a sparse update object carried by an +``*UpdateSettingsFrame``. Only the fields the caller wants to change are set; +all others remain at their default of ``NOT_GIVEN``. ``apply_update()`` +merges a delta into a store, skipping any ``NOT_GIVEN`` fields. + +Key helpers: + +- ``NOT_GIVEN`` / ``is_given()`` — sentinel and check for "field not provided + in this delta". +- ``apply_update(delta)`` — merge a delta into a store, returning changed + fields. +- ``from_mapping(dict)`` — build a delta from a plain dict (for backward + compatibility with dict-based ``*UpdateSettingsFrame``). +- ``validate_complete()`` — assert that a store has no ``NOT_GIVEN`` fields. +- ``extra`` dict — overflow for service-specific keys that don't map to a + declared field. +""" + +from __future__ import annotations + +import copy +from dataclasses import dataclass, field, fields +from typing import TYPE_CHECKING, Any, ClassVar, Dict, Mapping, Optional, Type, TypeVar + +from loguru import logger + +from pipecat.transcriptions.language import Language + +if TYPE_CHECKING: + from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionConfig + +# --------------------------------------------------------------------------- +# NOT_GIVEN sentinel +# --------------------------------------------------------------------------- + + +class _NotGiven: + """Sentinel meaning "this field was not included in the delta". + + ``NOT_GIVEN`` is distinct from ``None`` (which is a valid stored value, + typically meaning "this service doesn't support this field"). Every + settings field defaults to ``NOT_GIVEN`` so that delta-mode objects are + sparse by default and ``apply_update`` can skip untouched fields. + + ``NOT_GIVEN`` must never appear in a store-mode object — see + ``validate_complete()``. + """ + + _instance: Optional[_NotGiven] = None + + def __new__(cls) -> _NotGiven: + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __repr__(self) -> str: + return "NOT_GIVEN" + + def __bool__(self) -> bool: + return False + + +NOT_GIVEN: _NotGiven = _NotGiven() +"""Singleton sentinel meaning "this field was not included in the delta". + +Valid only in delta-mode settings objects. Must never appear in a service's +``self._settings`` (store mode) — use ``None`` instead for unsupported fields. +""" + + +def is_given(value: Any) -> bool: + """Check whether a delta field was explicitly provided. + + Typically used when processing a delta to decide whether a field + should be applied:: + + if is_given(delta.voice): + # caller wants to change the voice + ... + + For store-mode objects this always returns ``True`` (since + ``validate_complete`` ensures no ``NOT_GIVEN`` fields remain). + + Args: + value: The value to check. + + Returns: + ``True`` if *value* is anything other than ``NOT_GIVEN``. + """ + return not isinstance(value, _NotGiven) + + +# --------------------------------------------------------------------------- +# Base ServiceSettings +# --------------------------------------------------------------------------- + +_S = TypeVar("_S", bound="ServiceSettings") + + +@dataclass +class ServiceSettings: + """Base class for runtime-updatable service settings. + + These settings capture the subset of a service's configuration that can + be changed **while the pipeline is running** (e.g. switching the model or + changing the voice). They are *not* meant to capture every constructor + parameter — only those that support live updates via + ``*UpdateSettingsFrame``. + + Every AI service type (LLM, TTS, STT) extends this with its own fields. + Each instance operates in one of two modes (see module docstring): + + - **Store mode** (``self._settings``): holds the full current state. + Every field must be a real value — ``NOT_GIVEN`` is never valid. + Use ``None`` for inherited fields the service doesn't support. + Enforced at runtime by ``validate_complete()``. + - **Delta mode** (``*UpdateSettingsFrame``): a sparse update. + Only fields the caller wants to change are set; all others stay at + the default ``NOT_GIVEN`` and are skipped by ``apply_update()``. + + Parameters: + model: The model identifier used by the service. Set to ``None`` + in store mode if the service has no model concept. + extra: Overflow dict for service-specific keys that don't map to a + declared field. + """ + + # -- common fields ------------------------------------------------------- + + model: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + """AI model identifier (e.g. ``"gpt-4o"``, ``"eleven_turbo_v2_5"``). + + Defaults to ``NOT_GIVEN`` for delta mode. In store mode, set to a + model string or ``None`` if the service has no model concept. + """ + + extra: Dict[str, Any] = field(default_factory=dict) + """Catch-all for service-specific keys that have no declared field.""" + + # -- class-level configuration ------------------------------------------- + + _aliases: ClassVar[Dict[str, str]] = {} + """Map of alternative key names to canonical field names. + + For example ``{"voice_id": "voice"}`` lets callers use either spelling. + Subclasses should override this as needed. + """ + + # -- public API ---------------------------------------------------------- + + def given_fields(self) -> Dict[str, Any]: + """Return a dict of only the fields that are not ``NOT_GIVEN``. + + Primarily useful for delta-mode objects to inspect which fields were + set. For a store-mode object this returns all declared fields (since + none should be ``NOT_GIVEN``). + + Skips the ``extra`` field itself but merges its entries into the + returned dict at the top level. + + Returns: + Dictionary mapping field names to their provided values. + """ + result: Dict[str, Any] = {} + for f in fields(self): + if f.name == "extra": + continue + val = getattr(self, f.name) + if is_given(val): + result[f.name] = val + result.update(self.extra) + return result + + def apply_update(self: _S, delta: _S) -> Dict[str, Any]: + """Merge a delta-mode object into this store-mode object. + + Only fields in *delta* that are **given** (i.e. not ``NOT_GIVEN``) + are considered. A field is "changed" if its new value differs from + the current value. + + The ``extra`` dicts are merged: keys present in the delta overwrite + keys in the target. + + Args: + delta: A delta-mode settings object of the same type. + + Returns: + A dict mapping each changed field name to its **pre-update** value. + Use ``changed.keys()`` for the set of names, or index with + ``changed["field"]`` to inspect the old value. + + Examples:: + + # store-mode object (all fields given) + current = TTSSettings(voice="alice", language="en") + # delta-mode object (only voice is set) + delta = TTSSettings(voice="bob") + changed = current.apply_update(delta) + # changed == {"voice": "alice"} + # current.voice == "bob", current.language == "en" + """ + changed: Dict[str, Any] = {} + for f in fields(self): + if f.name == "extra": + continue + new_val = getattr(delta, f.name, NOT_GIVEN) + if not is_given(new_val): + continue + old_val = getattr(self, f.name) + if old_val != new_val: + setattr(self, f.name, new_val) + changed[f.name] = old_val + + # Merge extra + for key, new_val in delta.extra.items(): + old_val = self.extra.get(key, NOT_GIVEN) + if old_val != new_val: + self.extra[key] = new_val + changed[key] = old_val + + return changed + + @classmethod + def from_mapping(cls: Type[_S], settings: Mapping[str, Any]) -> _S: + """Build a **delta-mode** settings object from a plain dictionary. + + This exists for backward compatibility with code that passes plain + dicts via ``*UpdateSettingsFrame(settings={...})``. The returned + object is a delta: only the keys present in *settings* are set; + all other fields remain ``NOT_GIVEN``. + + Keys are matched to dataclass fields by name. Keys listed in + ``_aliases`` are translated to their canonical name first. Any + remaining unrecognized keys are placed into ``extra``. + + Args: + settings: A dictionary of setting names to values. + + Returns: + A new delta-mode settings instance. + + Examples:: + + delta = TTSSettings.from_mapping({"voice_id": "alice", "speed": 1.2}) + # delta.voice == "alice" (via alias) + # delta.language is NOT_GIVEN (not in the dict) + # delta.extra == {"speed": 1.2} + """ + field_names = {f.name for f in fields(cls)} - {"extra"} + kwargs: Dict[str, Any] = {} + extra: Dict[str, Any] = {} + + for key, value in settings.items(): + # Resolve aliases first + canonical = cls._aliases.get(key, key) + if canonical in field_names: + kwargs[canonical] = value + else: + extra[key] = value + + instance = cls(**kwargs) + instance.extra = extra + return instance + + def validate_complete(self) -> None: + """Check that this is a valid store-mode object (no ``NOT_GIVEN`` fields). + + Called automatically by ``AIService.start()`` to catch fields that a + service forgot to initialize in its ``__init__``. Can also be called + manually after constructing a store-mode settings object. + + Logs a warning for each uninitialized field. Failure to initialize + all fields may or may not cause runtime issues — it depends on + whether and how the service actually reads the field — but it indicates + a deviation from expectations and should be fixed. + """ + missing = [ + f.name + for f in fields(self) + if f.name != "extra" and isinstance(getattr(self, f.name), _NotGiven) + ] + if missing: + names = ", ".join(missing) + logger.error( + f"{type(self).__name__}: the following fields are NOT_GIVEN: {names}. " + f"All settings fields should be initialized in the service's " + f"__init__ (use None for unsupported fields)." + ) + + def copy(self: _S) -> _S: + """Return a deep copy of this settings instance. + + Returns: + A new settings object with the same field values. + """ + return copy.deepcopy(self) + + +# --------------------------------------------------------------------------- +# Service-specific settings +# --------------------------------------------------------------------------- + + +@dataclass +class ImageGenSettings(ServiceSettings): + """Runtime-updatable settings for image generation services. + + Used in both store and delta mode — see ``ServiceSettings``. + + Parameters: + model: Image generation model identifier. + """ + + +@dataclass +class VisionSettings(ServiceSettings): + """Runtime-updatable settings for vision services. + + Used in both store and delta mode — see ``ServiceSettings``. + + Parameters: + model: Vision model identifier. + """ + + +@dataclass +class LLMSettings(ServiceSettings): + """Runtime-updatable settings for LLM services. + + Used in both store and delta mode — see ``ServiceSettings``. + + These fields are common across LLM providers. Not every provider supports + every field; in store mode, set unsupported fields to ``None`` (e.g. a + service that doesn't support ``seed`` should initialize it as + ``seed=None``). + + Parameters: + model: LLM model identifier. + temperature: Sampling temperature. + max_tokens: Maximum tokens to generate. + top_p: Nucleus sampling probability. + top_k: Top-k sampling parameter. + frequency_penalty: Frequency penalty. + presence_penalty: Presence penalty. + seed: Random seed for reproducibility. + filter_incomplete_user_turns: Enable LLM-based turn completion detection + to suppress bot responses when the user was cut off mid-thought. + See ``examples/foundational/22-filter-incomplete-turns.py`` and + ``UserTurnCompletionLLMServiceMixin``. + user_turn_completion_config: Configuration for turn completion behavior + when ``filter_incomplete_user_turns`` is enabled. Controls timeouts + and prompts for incomplete turns. + """ + + temperature: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + max_tokens: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + top_p: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + top_k: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + frequency_penalty: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + presence_penalty: float | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + seed: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + filter_incomplete_user_turns: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + user_turn_completion_config: UserTurnCompletionConfig | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + + +@dataclass +class TTSSettings(ServiceSettings): + """Runtime-updatable settings for TTS services. + + Used in both store and delta mode — see ``ServiceSettings``. + + In store mode, set unsupported fields to ``None`` (e.g. ``language=None`` + if the service doesn't expose a language setting). + + Parameters: + model: TTS model identifier. + voice: Voice identifier or name. + language: Language for speech synthesis. The union type reflects the + *input* side: callers may pass a ``Language`` enum or a raw string + in a delta. However, the **stored** value (in store mode) is + always a service-specific string or ``None`` — + ``TTSService._update_settings`` converts ``Language`` enums via + ``language_to_service_language()`` before writing, and + ``__init__`` methods do the same at construction time. + """ + + voice: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language: Language | str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + _aliases: ClassVar[Dict[str, str]] = {"voice_id": "voice"} + + +@dataclass +class STTSettings(ServiceSettings): + """Runtime-updatable settings for STT services. + + Used in both store and delta mode — see ``ServiceSettings``. + + In store mode, set unsupported fields to ``None`` (e.g. ``language=None`` + if the service auto-detects language). + + Parameters: + model: STT model identifier. + language: Language for speech recognition. The union type reflects the + *input* side: callers may pass a ``Language`` enum or a raw string + in a delta. However, the **stored** value (in store mode) is + always a service-specific string or ``None`` — + ``STTService._update_settings`` converts ``Language`` enums via + ``language_to_service_language()`` before writing, and + ``__init__`` methods do the same at construction time. + """ + + language: Language | str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) diff --git a/src/pipecat/services/soniox/stt.py b/src/pipecat/services/soniox/stt.py index c9184ba4c..32cbee1f4 100644 --- a/src/pipecat/services/soniox/stt.py +++ b/src/pipecat/services/soniox/stt.py @@ -8,7 +8,8 @@ import json import time -from typing import AsyncGenerator, List, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, List, Optional from loguru import logger from pydantic import BaseModel @@ -23,6 +24,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import SONIOX_TTFS_P99 from pipecat.services.stt_service import WebsocketSTTService from pipecat.transcriptions.language import Language @@ -134,6 +136,35 @@ def _prepare_language_hints( return list(set(prepared_languages)) +@dataclass +class SonioxSTTSettings(STTSettings): + """Settings for Soniox STT service. + + Parameters: + audio_format: Audio format to use for transcription. + num_channels: Number of channels to use for transcription. + language_hints: List of language hints to use for transcription. + language_hints_strict: If true, strictly enforce language hints. + context: Customization for transcription. String for models with + context_version 1 and SonioxContextObject for models with + context_version 2. + enable_speaker_diarization: Whether to enable speaker diarization. + enable_language_identification: Whether to enable language identification. + client_reference_id: Client reference ID to use for transcription. + """ + + audio_format: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + num_channels: int | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language_hints: List[Language] | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + language_hints_strict: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + context: SonioxContextObject | str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_speaker_diarization: bool | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_language_identification: bool | None | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + client_reference_id: str | None | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class SonioxSTTService(WebsocketSTTService): """Speech-to-Text service using Soniox's WebSocket API. @@ -144,6 +175,8 @@ class SonioxSTTService(WebsocketSTTService): For complete API documentation, see: https://soniox.com/docs/speech-to-text/api-reference/websocket-api """ + _settings: SonioxSTTSettings + def __init__( self, *, @@ -169,19 +202,30 @@ class SonioxSTTService(WebsocketSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to the STTService. """ + params = params or SonioxInputParams() + super().__init__( sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, keepalive_timeout=1, keepalive_interval=5, + settings=SonioxSTTSettings( + model=params.model, + language=None, + audio_format=params.audio_format, + num_channels=params.num_channels, + language_hints=params.language_hints, + language_hints_strict=params.language_hints_strict, + context=params.context, + enable_speaker_diarization=params.enable_speaker_diarization, + enable_language_identification=params.enable_language_identification, + client_reference_id=params.client_reference_id, + ), **kwargs, ) - params = params or SonioxInputParams() self._api_key = api_key self._url = url - self.set_model_name(params.model) - self._params = params self._vad_force_turn_endpoint = vad_force_turn_endpoint self._final_transcription_buffer = [] @@ -189,6 +233,14 @@ class SonioxSTTService(WebsocketSTTService): self._receive_task = None + def can_generate_metrics(self) -> bool: + """Check if this service can generate processing metrics. + + Returns: + True, as Soniox STT supports metrics generation. + """ + return True + async def start(self, frame: StartFrame): """Start the Soniox STT websocket connection. @@ -198,6 +250,31 @@ class SonioxSTTService(WebsocketSTTService): await super().start(frame) await self._connect() + async def _update_settings(self, delta: SonioxSTTSettings) -> dict[str, Any]: + """Apply settings delta. + + Settings are stored but not applied to the active connection. + + Args: + delta: A settings delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(delta) + + if not changed: + return changed + + # TODO: someday we could reconnect here to apply updated settings. + # Code might look something like the below: + # await self._disconnect() + # await self._connect() + + self._warn_unhandled_updated_settings(changed) + + return changed + async def stop(self, frame: EndFrame): """Stop the Soniox STT websocket connection. @@ -233,10 +310,8 @@ class SonioxSTTService(WebsocketSTTService): Yields: Frame: None (transcription results come via WebSocket callbacks). """ - await self.start_processing_metrics() if self._websocket and self._websocket.state is State.OPEN: await self._websocket.send(audio) - await self.stop_processing_metrics() yield None @@ -311,24 +386,26 @@ class SonioxSTTService(WebsocketSTTService): # Either one or the other is required. enable_endpoint_detection = not self._vad_force_turn_endpoint - context = self._params.context + s = self._settings + + context = s.context if isinstance(context, SonioxContextObject): context = context.model_dump() # Send the initial configuration message. config = { "api_key": self._api_key, - "model": self._model_name, - "audio_format": self._params.audio_format, - "num_channels": self._params.num_channels or 1, + "model": s.model, + "audio_format": s.audio_format, + "num_channels": s.num_channels or 1, "enable_endpoint_detection": enable_endpoint_detection, "sample_rate": self.sample_rate, - "language_hints": _prepare_language_hints(self._params.language_hints), - "language_hints_strict": self._params.language_hints_strict, + "language_hints": _prepare_language_hints(s.language_hints), + "language_hints_strict": s.language_hints_strict, "context": context, - "enable_speaker_diarization": self._params.enable_speaker_diarization, - "enable_language_identification": self._params.enable_language_identification, - "client_reference_id": self._params.client_reference_id, + "enable_speaker_diarization": s.enable_speaker_diarization, + "enable_language_identification": s.enable_language_identification, + "client_reference_id": s.client_reference_id, } # Send the configuration message. @@ -415,6 +492,8 @@ class SonioxSTTService(WebsocketSTTService): # the rest will be sent as interim tokens (even final tokens). await send_endpoint_transcript() else: + if not self._final_transcription_buffer: + await self.start_processing_metrics() self._final_transcription_buffer.append(token) else: non_final_transcription.append(token) diff --git a/src/pipecat/services/speechmatics/stt.py b/src/pipecat/services/speechmatics/stt.py index 72f3f3990..bdeb3b249 100644 --- a/src/pipecat/services/speechmatics/stt.py +++ b/src/pipecat/services/speechmatics/stt.py @@ -8,8 +8,10 @@ import asyncio import os +import warnings +from dataclasses import dataclass, field from enum import Enum -from typing import Any, AsyncGenerator +from typing import Any, AsyncGenerator, ClassVar from dotenv import load_dotenv from loguru import logger @@ -31,6 +33,7 @@ from pipecat.frames.frames import ( VADUserStoppedSpeakingFrame, ) from pipecat.processors.frame_processor import FrameDirection +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import SPEECHMATICS_TTFS_P99 from pipecat.services.stt_service import STTService from pipecat.transcriptions.language import Language, resolve_language @@ -80,6 +83,83 @@ class TurnDetectionMode(str, Enum): SMART_TURN = "smart_turn" +@dataclass +class SpeechmaticsSTTSettings(STTSettings): + """Settings for Speechmatics STT service. + + See ``SpeechmaticsSTTService.InputParams`` for detailed descriptions of each field. + + Parameters: + model: The operating point / model name. + domain: Domain for Speechmatics API. + turn_detection_mode: Endpoint handling mode. + speaker_active_format: Formatter for active speaker ID. + speaker_passive_format: Formatter for passive speaker ID. + focus_speakers: List of speaker IDs to focus on. + ignore_speakers: List of speaker IDs to ignore. + focus_mode: Speaker focus mode for diarization. + known_speakers: List of known speaker labels and identifiers. + additional_vocab: List of additional vocabulary entries. + audio_encoding: Audio encoding format. + operating_point: Operating point for accuracy vs. latency. + max_delay: Maximum delay in seconds for transcription. + end_of_utterance_silence_trigger: Maximum delay for end of utterance trigger. + end_of_utterance_max_delay: Maximum delay for end of utterance. + punctuation_overrides: Punctuation overrides. + include_partials: Include partial segment fragments. + split_sentences: Emit finalized sentences mid-turn. + enable_diarization: Enable speaker diarization. + speaker_sensitivity: Diarization sensitivity. + max_speakers: Maximum number of speakers to detect. + prefer_current_speaker: Prefer current speaker ID. + extra_params: Extra parameters for the STT engine. + """ + + domain: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + turn_detection_mode: TurnDetectionMode | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaker_active_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaker_passive_format: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + focus_speakers: list[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + ignore_speakers: list[str] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + focus_mode: SpeakerFocusMode | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + known_speakers: list[SpeakerIdentifier] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + additional_vocab: list[AdditionalVocabEntry] | _NotGiven = field( + default_factory=lambda: NOT_GIVEN + ) + audio_encoding: AudioEncoding | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + operating_point: OperatingPoint | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + max_delay: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + end_of_utterance_silence_trigger: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + end_of_utterance_max_delay: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + punctuation_overrides: dict[str, Any] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + include_partials: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + split_sentences: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + enable_diarization: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + speaker_sensitivity: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + max_speakers: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + prefer_current_speaker: bool | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + extra_params: dict[str, Any] | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + #: Fields that can be updated on a live connection via the Speechmatics + #: diarization-config API — no reconnect needed. + HOT_FIELDS: ClassVar[frozenset[str]] = frozenset( + { + "focus_speakers", + "ignore_speakers", + "focus_mode", + } + ) + + #: Fields that are purely local (formatting templates) — no reconnect + #: and no API call needed. + LOCAL_FIELDS: ClassVar[frozenset[str]] = frozenset( + { + "speaker_active_format", + "speaker_passive_format", + } + ) + + class SpeechmaticsSTTService(STTService): """Speechmatics STT service implementation. @@ -98,6 +178,8 @@ class SpeechmaticsSTTService(STTService): ... """ + _settings: SpeechmaticsSTTSettings + # Export related classes as class attributes TurnDetectionMode = TurnDetectionMode AudioEncoding = AudioEncoding @@ -316,8 +398,6 @@ class SpeechmaticsSTTService(STTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to STTService. """ - super().__init__(sample_rate=sample_rate, ttfs_p99_latency=ttfs_p99_latency, **kwargs) - # Service parameters self._api_key: str = api_key or os.getenv("SPEECHMATICS_API_KEY") self._base_url: str = ( @@ -337,31 +417,62 @@ class SpeechmaticsSTTService(STTService): # Deprecation check self._check_deprecated_args(kwargs, params) - # Voice agent + # Output formatting defaults + speaker_active_format = params.speaker_active_format + if speaker_active_format is None: + speaker_active_format = ( + "@{speaker_id}: {text}" if params.enable_diarization else "{text}" + ) + speaker_passive_format = params.speaker_passive_format or speaker_active_format + + # Settings — seeded from InputParams + settings = SpeechmaticsSTTSettings( + model=None, # Will be resolved from operating_point after config is built + language=params.language, + domain=params.domain, + turn_detection_mode=params.turn_detection_mode, + speaker_active_format=speaker_active_format, + speaker_passive_format=speaker_passive_format, + focus_speakers=params.focus_speakers, + ignore_speakers=params.ignore_speakers, + focus_mode=params.focus_mode, + known_speakers=params.known_speakers, + additional_vocab=params.additional_vocab, + audio_encoding=params.audio_encoding, + operating_point=params.operating_point, + max_delay=params.max_delay, + end_of_utterance_silence_trigger=params.end_of_utterance_silence_trigger, + end_of_utterance_max_delay=params.end_of_utterance_max_delay, + punctuation_overrides=params.punctuation_overrides, + include_partials=params.include_partials, + split_sentences=params.split_sentences, + enable_diarization=params.enable_diarization, + speaker_sensitivity=params.speaker_sensitivity, + max_speakers=params.max_speakers, + prefer_current_speaker=params.prefer_current_speaker, + extra_params=params.extra_params, + ) + + # Build SDK config from settings, then resolve model from operating_point self._client: VoiceAgentClient | None = None - self._config: VoiceAgentConfig = self._prepare_config(params) + self._config: VoiceAgentConfig = self._build_config(settings) + settings.model = self._config.operating_point.value + + super().__init__( + sample_rate=sample_rate, + ttfs_p99_latency=ttfs_p99_latency, + settings=settings, + **kwargs, + ) # Outbound frame queue self._outbound_frames: asyncio.Queue[Frame] = asyncio.Queue() - # Output formatting - if params.speaker_active_format is None: - params.speaker_active_format = ( - "@{speaker_id}: {text}" if params.enable_diarization else "{text}" - ) - # Framework options self._enable_vad: bool = self._config.end_of_utterance_mode not in [ EndOfUtteranceMode.FIXED, EndOfUtteranceMode.EXTERNAL, ] - self._speaker_active_format: str = params.speaker_active_format - self._speaker_passive_format: str = ( - params.speaker_passive_format or params.speaker_active_format - ) - - # Model + metrics - self.set_model_name(self._config.operating_point.value) # Message queue self._stt_msg_queue: asyncio.Queue[dict[str, Any]] = asyncio.Queue() @@ -384,6 +495,64 @@ class SpeechmaticsSTTService(STTService): await super().start(frame) await self._connect() + async def _update_settings(self, delta: SpeechmaticsSTTSettings) -> dict[str, Any]: + """Apply settings delta, reconnecting only when necessary. + + Fields are classified into three categories (see + ``SpeechmaticsSTTSettings``): + + * **HOT_FIELDS** – diarization speaker settings that can be pushed + to a live Speechmatics connection without reconnecting. + * **LOCAL_FIELDS** – formatting templates evaluated locally; no + reconnect or API call needed. + * Everything else – baked into ``VoiceAgentConfig`` at connection + time and therefore require a full disconnect / reconnect. + + Args: + delta: A settings delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + changed = await super()._update_settings(delta) + + if not changed: + return changed + + no_reconnect = SpeechmaticsSTTSettings.HOT_FIELDS | SpeechmaticsSTTSettings.LOCAL_FIELDS + needs_reconnect = bool(changed.keys() - no_reconnect) + + if needs_reconnect: + logger.debug(f"{self} settings update requires reconnect: {changed.keys()}") + # Connection-level fields changed — rebuild the SDK config + # from the now-updated self._settings, then reconnect. + self._config = self._build_config(self._settings) + await self._disconnect() + await self._connect() + elif changed.keys() & SpeechmaticsSTTSettings.HOT_FIELDS: + logger.debug(f"{self} applying hot settings update: {changed.keys()}") + if self._config.enable_diarization: + # Only hot-updatable fields changed — push to the live session. + self._config.speaker_config.focus_speakers = self._settings.focus_speakers + self._config.speaker_config.ignore_speakers = self._settings.ignore_speakers + self._config.speaker_config.focus_mode = self._settings.focus_mode + if self._client: + self._client.update_diarization_config(self._config.speaker_config) + else: + logger.debug( + f"{self} hot settings updated but diarization not enabled: {changed.keys()}. ignoring." + ) + # Diarization not enabled — the new settings will take effect + # if/when diarization is enabled, which does require a reconnect. + elif changed.keys() & SpeechmaticsSTTSettings.LOCAL_FIELDS: + logger.debug( + f"{self} local settings update, no special action required: {changed.keys()}" + ) + # Only local fields changed — no need to push to the STT engine, + # the new settings will take effect immediately. + + return changed + async def stop(self, frame: EndFrame): """Called when the session ends.""" await super().stop(frame) @@ -494,28 +663,39 @@ class SpeechmaticsSTTService(STTService): # CONFIGURATION # ============================================================================ - def _prepare_config(self, params: InputParams) -> VoiceAgentConfig: - """Parse the InputParams into VoiceAgentConfig.""" - # Preset - config = VoiceAgentConfigPreset.load(params.turn_detection_mode.value) + def _build_config(self, settings: SpeechmaticsSTTSettings) -> VoiceAgentConfig: + """Build a ``VoiceAgentConfig`` from the given settings. + + Used both at init time (with explicit settings, before + ``super().__init__`` has run) and before reconnecting so the + connection always reflects the latest settings. + + Args: + settings: Settings to build from. + """ + s = settings + + # Preset from turn detection mode + config = VoiceAgentConfigPreset.load(s.turn_detection_mode.value) # Language + domain - config.language = self._language_to_speechmatics_language(params.language) - config.domain = params.domain - config.output_locale = self._locale_to_speechmatics_locale(config.language, params.language) + language = s.language + config.language = self._language_to_speechmatics_language(language) + config.domain = s.domain if s.domain is not None else None + config.output_locale = self._locale_to_speechmatics_locale(config.language, language) # Speaker config config.speaker_config = SpeakerFocusConfig( - focus_speakers=params.focus_speakers, - ignore_speakers=params.ignore_speakers, - focus_mode=params.focus_mode, + focus_speakers=s.focus_speakers if s.focus_speakers is not None else [], + ignore_speakers=s.ignore_speakers if s.ignore_speakers is not None else [], + focus_mode=s.focus_mode if s.focus_mode is not None else SpeakerFocusMode.RETAIN, ) - config.known_speakers = params.known_speakers + config.known_speakers = s.known_speakers if s.known_speakers is not None else [] # Custom dictionary - config.additional_vocab = params.additional_vocab + config.additional_vocab = s.additional_vocab if s.additional_vocab is not None else [] - # Advanced parameters + # Advanced parameters — only set if not None for param in [ "operating_point", "max_delay", @@ -529,21 +709,20 @@ class SpeechmaticsSTTService(STTService): "max_speakers", "prefer_current_speaker", ]: - if getattr(params, param) is not None: - setattr(config, param, getattr(params, param)) + val = getattr(s, param) + if val is not None: + setattr(config, param, val) # Extra parameters - if isinstance(params.extra_params, dict): - for key, value in params.extra_params.items(): + if isinstance(s.extra_params, dict): + for key, value in s.extra_params.items(): if hasattr(config, key): setattr(config, key, value) # Enable sentences - config.speech_segment_config = SpeechSegmentConfig( - emit_sentences=params.split_sentences or False - ) + split = s.split_sentences if s.split_sentences is not None else False + config.speech_segment_config = SpeechSegmentConfig(emit_sentences=split or False) - # Return the complete config return config def update_params( @@ -552,12 +731,23 @@ class SpeechmaticsSTTService(STTService): ) -> None: """Updates the speaker configuration. + .. deprecated:: + Use ``STTUpdateSettingsFrame`` with + ``SpeechmaticsSTTSettings(...)`` instead. + This can update the speakers to listen to or ignore during an in-flight transcription. Only available if diarization is enabled. Args: params: Update parameters for the service. """ + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "update_params() is deprecated. Use STTUpdateSettingsFrame with " + "SpeechmaticsSTTSettings(...) instead.", + DeprecationWarning, + ) # Check possible if not self._config.enable_diarization: raise ValueError("Diarization is not enabled") @@ -646,7 +836,7 @@ class SpeechmaticsSTTService(STTService): # await self.start_processing_metrics() await self.broadcast_frame(UserStartedSpeakingFrame) if self._should_interrupt: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() async def _handle_end_of_turn(self, message: dict[str, Any]) -> None: """Handle EndOfTurn events. @@ -727,9 +917,9 @@ class SpeechmaticsSTTService(STTService): def attr_from_segment(segment: dict[str, Any]) -> dict[str, Any]: # Formats the output text based on the speaker and defined formats from the config. text = ( - self._speaker_active_format + self._settings.speaker_active_format if segment.get("is_active", True) - else self._speaker_passive_format + else self._settings.speaker_passive_format ).format( **{ "speaker_id": segment.get("speaker_id", "UU"), diff --git a/src/pipecat/services/speechmatics/tts.py b/src/pipecat/services/speechmatics/tts.py index 0f3ff0cb6..1ddb895aa 100644 --- a/src/pipecat/services/speechmatics/tts.py +++ b/src/pipecat/services/speechmatics/tts.py @@ -7,7 +7,8 @@ """Speechmatics TTS service integration.""" import asyncio -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional from urllib.parse import urlencode import aiohttp @@ -21,6 +22,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.utils.network import exponential_backoff_time from pipecat.utils.tracing.service_decorators import traced_tts @@ -35,6 +37,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class SpeechmaticsTTSSettings(TTSSettings): + """Settings for Speechmatics TTS service. + + Parameters: + max_retries: Maximum number of retries for HTTP requests. + """ + + max_retries: int | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class SpeechmaticsTTSService(TTSService): """Speechmatics TTS service implementation. @@ -42,6 +55,8 @@ class SpeechmaticsTTSService(TTSService): It converts text to speech and returns raw PCM audio data for real-time playback. """ + _settings: SpeechmaticsTTSSettings + SPEECHMATICS_SAMPLE_RATE = 16000 class InputParams(BaseModel): @@ -80,7 +95,18 @@ class SpeechmaticsTTSService(TTSService): f"Speechmatics TTS only supports {self.SPEECHMATICS_SAMPLE_RATE}Hz sample rate. " f"Current rate of {sample_rate}Hz may cause issues." ) - super().__init__(sample_rate=sample_rate, **kwargs) + params = params or SpeechmaticsTTSService.InputParams() + + super().__init__( + sample_rate=sample_rate, + settings=SpeechmaticsTTSSettings( + model=None, + voice=voice_id, + language=None, + max_retries=params.max_retries, + ), + **kwargs, + ) # Service parameters self._api_key: str = api_key @@ -91,12 +117,6 @@ class SpeechmaticsTTSService(TTSService): if not self._api_key: raise ValueError("Missing Speechmatics API key") - # Default parameters - self._params = params or SpeechmaticsTTSService.InputParams() - - # Set voice from constructor parameter - self.set_voice(voice_id) - def can_generate_metrics(self) -> bool: """Check if this service can generate processing metrics. @@ -131,7 +151,7 @@ class SpeechmaticsTTSService(TTSService): } # Complete HTTP URL - url = _get_endpoint_url(self._base_url, self._voice_id, self.sample_rate) + url = _get_endpoint_url(self._base_url, self._settings.voice, self.sample_rate) try: # Start TTS TTFB metrics @@ -159,7 +179,7 @@ class SpeechmaticsTTSService(TTSService): attempt += 1 # Check if we've exceeded the maximum number of attempts - if attempt >= self._params.max_retries: + if attempt >= self._settings.max_retries: raise ValueError() # Report error frame diff --git a/src/pipecat/services/stt_service.py b/src/pipecat/services/stt_service.py index bad8e2e28..ebf007f6f 100644 --- a/src/pipecat/services/stt_service.py +++ b/src/pipecat/services/stt_service.py @@ -9,9 +9,10 @@ import asyncio import io import time +import warnings import wave from abc import abstractmethod -from typing import Any, AsyncGenerator, Dict, Mapping, Optional +from typing import Any, AsyncGenerator, Optional from loguru import logger from websockets.protocol import State @@ -32,6 +33,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService +from pipecat.services.settings import STTSettings, is_given from pipecat.services.stt_latency import DEFAULT_TTFS_P99 from pipecat.services.websocket_service import WebsocketService from pipecat.transcriptions.language import Language @@ -73,6 +75,8 @@ class STTService(AIService): logger.error(f"STT connection error: {error}") """ + _settings: STTSettings + def __init__( self, *, @@ -82,6 +86,7 @@ class STTService(AIService): ttfs_p99_latency: Optional[float] = None, keepalive_timeout: Optional[float] = None, keepalive_interval: float = 5.0, + settings: Optional[STTSettings] = None, **kwargs, ): """Initialize the STT service. @@ -105,13 +110,20 @@ class STTService(AIService): connection alive. None disables keepalive. Useful for services that close idle connections (e.g. behind a ServiceSwitcher). keepalive_interval: Seconds between idle checks when keepalive is enabled. + settings: The runtime-updatable settings for the STT service. **kwargs: Additional arguments passed to the parent AIService. """ - super().__init__(**kwargs) + super().__init__( + settings=settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or STTSettings(), + **kwargs, + ) self._audio_passthrough = audio_passthrough self._init_sample_rate = sample_rate self._sample_rate = 0 - self._settings: Dict[str, Any] = {} + self._muted: bool = False self._user_id: str = "" self._ttfs_p99_latency = ttfs_p99_latency @@ -122,6 +134,7 @@ class STTService(AIService): self._user_speaking: bool = False self._finalize_pending: bool = False self._finalize_requested: bool = False + self._last_transcript_time: float = 0 # Keepalive state self._keepalive_timeout = keepalive_timeout @@ -179,18 +192,53 @@ class STTService(AIService): async def set_model(self, model: str): """Set the speech recognition model. + .. deprecated:: 0.0.104 + Use ``STTUpdateSettingsFrame(model=...)`` instead. + Args: model: The name of the model to use for speech recognition. """ - self.set_model_name(model) + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_model' is deprecated, use 'STTUpdateSettingsFrame(model=...)' instead.", + DeprecationWarning, + stacklevel=2, + ) + logger.info(f"Switching STT model to: [{model}]") + settings_cls = type(self._settings) + await self._update_settings(settings_cls(model=model)) async def set_language(self, language: Language): """Set the language for speech recognition. + .. deprecated:: 0.0.104 + Use ``STTUpdateSettingsFrame(language=...)`` instead. + Args: language: The language to use for speech recognition. """ - pass + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_language' is deprecated, use 'STTUpdateSettingsFrame(language=...)' instead.", + DeprecationWarning, + stacklevel=2, + ) + logger.info(f"Switching STT language to: [{language}]") + settings_cls = type(self._settings) + await self._update_settings(settings_cls(language=language)) + + def language_to_service_language(self, language: Language) -> Optional[str]: + """Convert a language to the service-specific language format. + + Args: + language: The language to convert. + + Returns: + The service-specific language identifier, or None if not supported. + """ + return Language(language) @abstractmethod async def run_stt(self, audio: bytes) -> AsyncGenerator[Frame, None]: @@ -222,20 +270,29 @@ class STTService(AIService): await self._cancel_ttfb_timeout() await self._cancel_keepalive_task() - async def _update_settings(self, settings: Mapping[str, Any]): - logger.info(f"Updating STT settings: {self._settings}") - for key, value in settings.items(): - if key in self._settings: - logger.info(f"Updating STT setting {key} to: [{value}]") - self._settings[key] = value - if key == "language": - await self.set_language(value) - elif key == "language": - await self.set_language(value) - elif key == "model": - self.set_model_name(value) - else: - logger.warning(f"Unknown setting for STT service: {key}") + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply an STT settings delta. + + Handles ``model`` (via parent). Translates ``Language`` enum values + before applying so the stored value is a service-specific string. + Concrete services should override this method and handle language + changes (including any reconnect logic) based on the returned + changed-field dict. + + Args: + delta: An STT settings delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + # Translate language *before* applying so the stored value is canonical + if is_given(delta.language) and isinstance(delta.language, Language): + converted = self.language_to_service_language(delta.language) + if converted is not None: + delta.language = converted + + changed = await super()._update_settings(delta) + return changed async def process_audio_frame(self, frame: AudioRawFrame, direction: FrameDirection): """Process an audio frame for speech recognition. @@ -300,7 +357,20 @@ class STTService(AIService): await self._handle_vad_user_stopped_speaking(frame) await self.push_frame(frame, direction) elif isinstance(frame, STTUpdateSettingsFrame): - await self._update_settings(frame.settings) + if frame.delta is not None: + await self._update_settings(frame.delta) + elif frame.settings: + # Backward-compatible path: convert legacy dict to settings object. + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Passing a dict via STTUpdateSettingsFrame(settings={...}) is deprecated " + "since 0.0.104, use STTUpdateSettingsFrame(delta=STTSettings(...)) instead.", + DeprecationWarning, + stacklevel=2, + ) + delta = type(self._settings).from_mapping(frame.settings) + await self._update_settings(delta) elif isinstance(frame, STTMuteFrame): self._muted = frame.mute logger.debug(f"STT service {'muted' if frame.mute else 'unmuted'}") @@ -323,6 +393,9 @@ class STTService(AIService): direction: The direction to push the frame. """ if isinstance(frame, TranscriptionFrame): + # Store the transcript time for TTFB calculation + self._last_transcript_time = time.time() + # Set finalized from pending state and auto-reset if self._finalize_pending: frame.finalized = True @@ -376,6 +449,7 @@ class STTService(AIService): self._user_speaking = True self._finalize_requested = False self._finalize_pending = False + self._last_transcript_time = 0 async def _handle_vad_user_stopped_speaking(self, frame: VADUserStoppedSpeakingFrame): """Handle VAD user stopped speaking frame. @@ -405,14 +479,17 @@ class STTService(AIService): ) async def _ttfb_timeout_handler(self): - """Wait for timeout then report TTFB. + """Wait for timeout then report TTFB using the last transcript timestamp. This timeout allows the final transcription to arrive before we calculate - and report TTFB. If no transcription arrived, no TTFB is reported. + and report TTFB. Uses _last_transcript_time as the end time so we measure + to when the transcript actually arrived, not when the timeout fired. + If no transcription arrived, no TTFB is reported. """ try: await asyncio.sleep(self._stt_ttfb_timeout) - await self.stop_ttfb_metrics() + if self._last_transcript_time > 0: + await self.stop_ttfb_metrics(end_time=self._last_transcript_time) except asyncio.CancelledError: # Task was cancelled (new utterance or interruption), which is expected behavior pass diff --git a/src/pipecat/services/tavus/video.py b/src/pipecat/services/tavus/video.py index d9f259797..8c63ff354 100644 --- a/src/pipecat/services/tavus/video.py +++ b/src/pipecat/services/tavus/video.py @@ -94,6 +94,7 @@ class TavusVideoService(AIService): """ await super().setup(setup) callbacks = TavusCallbacks( + on_joined=self._on_joined, on_participant_joined=self._on_participant_joined, on_participant_left=self._on_participant_left, ) @@ -119,6 +120,10 @@ class TavusVideoService(AIService): await self._client.cleanup() self._client = None + async def _on_joined(self, data): + """Handle bot joined the Daily room.""" + logger.info("Tavus bot joined Daily room") + async def _on_participant_left(self, participant, reason): """Handle participant leaving the session.""" participant_id = participant["id"] diff --git a/src/pipecat/services/tts_service.py b/src/pipecat/services/tts_service.py index 1e5bdf73f..4285e14f9 100644 --- a/src/pipecat/services/tts_service.py +++ b/src/pipecat/services/tts_service.py @@ -8,8 +8,10 @@ import asyncio import uuid +import warnings from abc import abstractmethod from dataclasses import dataclass +from enum import Enum from typing import ( Any, AsyncGenerator, @@ -18,7 +20,6 @@ from typing import ( Callable, Dict, List, - Mapping, Optional, Sequence, Tuple, @@ -38,6 +39,7 @@ from pipecat.frames.frames import ( Frame, InterimTranscriptionFrame, InterruptionFrame, + LLMAssistantPushAggregationFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, StartFrame, @@ -52,6 +54,7 @@ from pipecat.frames.frames import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService +from pipecat.services.settings import TTSSettings, is_given from pipecat.services.websocket_service import WebsocketService from pipecat.transcriptions.language import Language from pipecat.utils.text.base_text_aggregator import BaseTextAggregator @@ -65,10 +68,33 @@ class TTSContext: """Context information for a TTS request. Attributes: - append_to_context: Whether this TTS output should be appended to the conversation context. + append_to_context: Whether this TTS output should be appended to the + conversation context after it is spoken. + push_assistant_aggregation: Whether to push an + ``LLMAssistantPushAggregationFrame`` after the TTS has finished + speaking, forcing the assistant aggregator to commit its current + text buffer to the conversation context. """ append_to_context: bool = True + push_assistant_aggregation: Optional[bool] = False + + +class TextAggregationMode(str, Enum): + """Controls how incoming text is aggregated before TTS synthesis. + + Parameters: + SENTENCE: Buffer text until sentence boundaries are detected before synthesis. + Produces more natural speech but adds latency (~200-300ms per sentence). + TOKEN: Stream text tokens directly to TTS as they arrive. + Reduces latency but may affect speech quality depending on the TTS provider. + """ + + SENTENCE = "sentence" + TOKEN = "token" + + def __str__(self): + return self.value class TTSService(AIService): @@ -103,10 +129,13 @@ class TTSService(AIService): logger.debug(f"TTS request: {context_id} - {text}") """ + _settings: TTSSettings + def __init__( self, *, - aggregate_sentences: bool = True, + text_aggregation_mode: Optional[TextAggregationMode] = None, + aggregate_sentences: Optional[bool] = None, # if True, TTSService will push TextFrames and LLMFullResponseEndFrames, # otherwise subclass must do it push_text_frames: bool = True, @@ -125,6 +154,8 @@ class TTSService(AIService): append_trailing_space: bool = False, # TTS output sample rate sample_rate: Optional[int] = None, + # if True, enables word-level timestamp tracking and synchronization + supports_word_timestamps: bool = False, # Text aggregator to aggregate incoming tokens and decide when to push to the TTS. text_aggregator: Optional[BaseTextAggregator] = None, # Types of text aggregations that should not be spoken. @@ -142,12 +173,22 @@ class TTSService(AIService): text_filter: Optional[BaseTextFilter] = None, # Audio transport destination of the generated frames. transport_destination: Optional[str] = None, + settings: Optional[TTSSettings] = None, **kwargs, ): """Initialize the TTS service. Args: + text_aggregation_mode: How to aggregate incoming text before synthesis. + TextAggregationMode.SENTENCE (default) buffers until sentence boundaries, + TextAggregationMode.TOKEN streams tokens directly for lower latency. aggregate_sentences: Whether to aggregate text into sentences before synthesis. + + .. deprecated:: 0.0.104 + Use ``text_aggregation_mode`` instead. Set to ``TextAggregationMode.SENTENCE`` + to aggregate text into sentences before synthesis, or + ``TextAggregationMode.TOKEN`` to stream tokens directly for lower latency. + push_text_frames: Whether to push TextFrames and LLMFullResponseEndFrames. push_stop_frames: Whether to automatically push TTSStoppedFrames. stop_frame_timeout_s: Idle time before pushing TTSStoppedFrame when push_stop_frames is True. @@ -157,6 +198,9 @@ class TTSService(AIService): append_trailing_space: Whether to append a trailing space to text before sending to TTS. This helps prevent some TTS services from vocalizing trailing punctuation (e.g., "dot"). sample_rate: Output sample rate for generated audio. + supports_word_timestamps: Whether this service supports word-level timestamp tracking. + When True, enables synchronization of audio with spoken words so only spoken words + are added to the conversation context. text_aggregator: Custom text aggregator for processing incoming text. .. deprecated:: 0.0.95 @@ -175,10 +219,41 @@ class TTSService(AIService): Use `text_filters` instead, which allows multiple filters. transport_destination: Destination for generated audio frames. + settings: The runtime-updatable settings for the TTS service. **kwargs: Additional arguments passed to the parent AIService. """ - super().__init__(**kwargs) - self._aggregate_sentences: bool = aggregate_sentences + super().__init__( + settings=settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or TTSSettings(), + **kwargs, + ) + + # Resolve text_aggregation_mode from the new param or deprecated aggregate_sentences + if aggregate_sentences is not None: + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Parameter 'aggregate_sentences' is deprecated. " + "Use 'text_aggregation_mode=TextAggregationMode.SENTENCE' or " + "'text_aggregation_mode=TextAggregationMode.TOKEN' instead.", + DeprecationWarning, + stacklevel=2, + ) + if text_aggregation_mode is None: + text_aggregation_mode = ( + TextAggregationMode.SENTENCE + if aggregate_sentences + else TextAggregationMode.TOKEN + ) + + if text_aggregation_mode is None: + text_aggregation_mode = TextAggregationMode.SENTENCE + + self._text_aggregation_mode: TextAggregationMode = text_aggregation_mode self._push_text_frames: bool = push_text_frames self._push_stop_frames: bool = push_stop_frames self._stop_frame_timeout_s: float = stop_frame_timeout_s @@ -188,9 +263,9 @@ class TTSService(AIService): self._append_trailing_space: bool = append_trailing_space self._init_sample_rate = sample_rate self._sample_rate = 0 - self._voice_id: str = "" - self._settings: Dict[str, Any] = {} - self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator() + self._text_aggregator: BaseTextAggregator = text_aggregator or SimpleTextAggregator( + aggregation_type=self._text_aggregation_mode + ) if text_aggregator: import warnings @@ -226,12 +301,55 @@ class TTSService(AIService): self._processing_text: bool = False self._tts_contexts: Dict[str, TTSContext] = {} + self._streamed_text: str = "" + self._text_aggregation_metrics_started: bool = False + + # Word timestamp state (active when supports_word_timestamps=True) + self._supports_word_timestamps: bool = supports_word_timestamps + self._initial_word_timestamp: int = -1 + self._initial_word_times: List[Tuple[str, float, Optional[str]]] = [] + self._words_task: Optional[asyncio.Task] = None + self._llm_response_started: bool = False self._register_event_handler("on_connected") self._register_event_handler("on_disconnected") self._register_event_handler("on_connection_error") self._register_event_handler("on_tts_request") + @property + def _is_streaming_tokens(self) -> bool: + """Whether the service is streaming tokens directly without sentence aggregation.""" + return self._text_aggregation_mode == TextAggregationMode.TOKEN + + async def start_tts_usage_metrics(self, text: str): + """Record TTS usage metrics. + + When streaming tokens, usage metrics are aggregated and reported at + flush time instead of per token, so individual calls are skipped. + + Args: + text: The text being processed by TTS. + """ + if self._is_streaming_tokens: + return + await super().start_tts_usage_metrics(text) + + async def start_text_aggregation_metrics(self): + """Start text aggregation metrics if not already started. + + Only starts the metric once per LLM response. Skipped when streaming + tokens since per-token aggregation time is not meaningful. + """ + if self._is_streaming_tokens or self._text_aggregation_metrics_started: + return + self._text_aggregation_metrics_started = True + await super().start_text_aggregation_metrics() + + async def stop_text_aggregation_metrics(self): + """Stop text aggregation metrics and reset the started flag.""" + self._text_aggregation_metrics_started = False + await super().stop_text_aggregation_metrics() + @property def sample_rate(self) -> int: """Get the current sample rate for audio output. @@ -261,18 +379,42 @@ class TTSService(AIService): async def set_model(self, model: str): """Set the TTS model to use. + .. deprecated:: 0.0.104 + Use ``TTSUpdateSettingsFrame(model=...)`` instead. + Args: model: The name of the TTS model. """ - self.set_model_name(model) + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_model' is deprecated, use 'TTSUpdateSettingsFrame(model=...)' instead.", + DeprecationWarning, + stacklevel=2, + ) + logger.info(f"Switching TTS model to: [{model}]") + settings_cls = type(self._settings) + await self._update_settings(settings_cls(model=model)) - def set_voice(self, voice: str): + async def set_voice(self, voice: str): """Set the voice for speech synthesis. + .. deprecated:: 0.0.104 + Use ``TTSUpdateSettingsFrame(voice=...)`` instead. + Args: voice: The voice identifier or name. """ - self._voice_id = voice + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "'set_voice' is deprecated, use 'TTSUpdateSettingsFrame(voice=...)' instead.", + DeprecationWarning, + stacklevel=2, + ) + logger.info(f"Switching TTS voice to: [{voice}]") + settings_cls = type(self._settings) + await self._update_settings(settings_cls(voice=voice)) def create_context_id(self) -> str: """Generate a unique context ID for a TTS request. @@ -324,15 +466,6 @@ class TTSService(AIService): return text + " " return text - async def update_setting(self, key: str, value: Any): - """Update a service-specific setting. - - Args: - key: The setting key to update. - value: The new value for the setting. - """ - pass - async def flush_audio(self): """Flush any buffered audio data.""" pass @@ -347,6 +480,8 @@ class TTSService(AIService): self._sample_rate = self._init_sample_rate or frame.audio_out_sample_rate if self._push_stop_frames and not self._stop_frame_task: self._stop_frame_task = self.create_task(self._stop_frame_handler()) + if self._supports_word_timestamps: + self._create_words_task() async def stop(self, frame: EndFrame): """Stop the TTS service. @@ -358,6 +493,8 @@ class TTSService(AIService): if self._stop_frame_task: await self.cancel_task(self._stop_frame_task) self._stop_frame_task = None + if self._words_task: + await self._stop_words_task() async def cancel(self, frame: CancelFrame): """Cancel the TTS service. @@ -369,6 +506,8 @@ class TTSService(AIService): if self._stop_frame_task: await self.cancel_task(self._stop_frame_task) self._stop_frame_task = None + if self._words_task: + await self._stop_words_task() def add_text_transformer( self, @@ -403,22 +542,26 @@ class TTSService(AIService): if not (agg_type == aggregation_type and func == transform_function) ] - async def _update_settings(self, settings: Mapping[str, Any]): - for key, value in settings.items(): - if key in self._settings: - logger.info(f"Updating TTS setting {key} to: [{value}]") - self._settings[key] = value - if key == "language": - self._settings[key] = self.language_to_service_language(value) - elif key == "model": - self.set_model_name(value) - elif key == "voice" or key == "voice_id": - self.set_voice(value) - elif key == "text_filter": - for filter in self._text_filters: - await filter.update_settings(value) - else: - logger.warning(f"Unknown setting for TTS service: {key}") + async def _update_settings(self, delta: TTSSettings) -> dict[str, Any]: + """Apply a TTS settings delta. + + Translates language to service-specific value before applying. + + Args: + delta: A TTS settings delta. + + Returns: + Dict mapping changed field names to their previous values. + """ + # Translate language *before* applying so the stored value is canonical + if is_given(delta.language) and isinstance(delta.language, Language): + converted = self.language_to_service_language(delta.language) + if converted is not None: + delta.language = converted + + changed = await super()._update_settings(delta) + + return changed async def say(self, text: str): """Immediately speak the provided text. @@ -465,10 +608,14 @@ class TTSService(AIService): and not isinstance(frame, InterimTranscriptionFrame) and not isinstance(frame, TranscriptionFrame) ): + await self.start_text_aggregation_metrics() await self._process_text_frame(frame) elif isinstance(frame, InterruptionFrame): await self._handle_interruption(frame, direction) await self.push_frame(frame, direction) + elif isinstance(frame, LLMFullResponseStartFrame): + self._llm_response_started = True + await self.push_frame(frame, direction) elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): # We pause processing incoming frames if the LLM response included # text (it might be that it's only a function calling response). We @@ -477,9 +624,17 @@ class TTSService(AIService): # Flush any remaining text (including text waiting for lookahead) remaining = await self._text_aggregator.flush() + # Stop the aggregation metric (no-op if already stopped on first sentence). + await self.stop_text_aggregation_metrics() if remaining: await self._push_tts_frames(AggregatedTextFrame(remaining.text, remaining.type)) + # Log accumulated streamed text and emit aggregated usage metric. + if self._streamed_text: + logger.debug(f"{self}: Generating TTS [{self._streamed_text}]") + await super().start_tts_usage_metrics(self._streamed_text) + self._streamed_text = "" + # Reset aggregator state self._processing_text = False if isinstance(frame, LLMFullResponseEndFrame): @@ -487,13 +642,19 @@ class TTSService(AIService): await self.push_frame(frame, direction) else: await self.push_frame(frame, direction) + # Flush any pending audio so the TTS service closes the current context. + if self._supports_word_timestamps: + await self.flush_audio() elif isinstance(frame, TTSSpeakFrame): # Store if we were processing text or not so we can set it back. processing_text = self._processing_text + # If we are not receiving text from the LLM, we can assume that the SpeakFrame should be automatically added to the context + push_assistant_aggregation = frame.append_to_context and not self._llm_response_started # Assumption: text in TTSSpeakFrame does not include inter-frame spaces await self._push_tts_frames( AggregatedTextFrame(frame.text, AggregationType.SENTENCE), append_tts_text_to_context=frame.append_to_context, + push_assistant_aggregation=push_assistant_aggregation, ) # We pause processing incoming frames because we are sending data to # the TTS. We pause to avoid audio overlapping. @@ -501,7 +662,20 @@ class TTSService(AIService): await self.flush_audio() self._processing_text = processing_text elif isinstance(frame, TTSUpdateSettingsFrame): - await self._update_settings(frame.settings) + if frame.delta is not None: + await self._update_settings(frame.delta) + elif frame.settings: + # Backward-compatible path: convert legacy dict to settings object. + with warnings.catch_warnings(): + warnings.simplefilter("always") + warnings.warn( + "Passing a dict via TTSUpdateSettingsFrame(settings={...}) is deprecated " + "since 0.0.104, use TTSUpdateSettingsFrame(delta=TTSSettings(...)) instead.", + DeprecationWarning, + stacklevel=2, + ) + delta = type(self._settings).from_mapping(frame.settings) + await self._update_settings(delta) elif isinstance(frame, BotStoppedSpeakingFrame): await self._maybe_resume_frame_processing() await self.push_frame(frame, direction) @@ -612,6 +786,12 @@ class TTSService(AIService): for filter in self._text_filters: await filter.handle_interruption() + self._llm_response_started = False + self._streamed_text = "" + self._text_aggregation_metrics_started = False + if self._supports_word_timestamps: + await self.reset_word_timestamps() + async def _maybe_pause_frame_processing(self): if self._processing_text and self._pause_frame_processing: await self.pause_processing_frames() @@ -621,32 +801,25 @@ class TTSService(AIService): await self.resume_processing_frames() async def _process_text_frame(self, frame: TextFrame): - text: Optional[str] = None - includes_inter_frame_spaces: bool = False - if not self._aggregate_sentences: - text = frame.text - includes_inter_frame_spaces = frame.includes_inter_frame_spaces - aggregated_by = "token" - - if text: - logger.trace(f"Pushing TTS frames for text: {text}, {aggregated_by}") - await self._push_tts_frames( - AggregatedTextFrame(text, aggregated_by), includes_inter_frame_spaces - ) - else: - async for aggregate in self._text_aggregator.aggregate(frame.text): - text = aggregate.text - aggregated_by = aggregate.type - logger.trace(f"Pushing TTS frames for text: {text}, {aggregated_by}") - await self._push_tts_frames( - AggregatedTextFrame(text, aggregated_by), includes_inter_frame_spaces - ) + async for aggregate in self._text_aggregator.aggregate(frame.text): + includes_inter_frame_spaces = ( + frame.includes_inter_frame_spaces + if aggregate.type == AggregationType.TOKEN + else False + ) + if aggregate.type != AggregationType.TOKEN: + # Stop the aggregation metric on the first sentence only. + await self.stop_text_aggregation_metrics() + await self._push_tts_frames( + AggregatedTextFrame(aggregate.text, aggregate.type), includes_inter_frame_spaces + ) async def _push_tts_frames( self, src_frame: AggregatedTextFrame, includes_inter_frame_spaces: Optional[bool] = False, append_tts_text_to_context: Optional[bool] = True, + push_assistant_aggregation: Optional[bool] = False, ): type = src_frame.aggregated_by text = src_frame.text @@ -670,7 +843,15 @@ class TTSService(AIService): # or when we received an LLMFullResponseEndFrame self._processing_text = True - await self.start_processing_metrics() + # Accumulate text for a single debug log at flush time when streaming tokens. + if self._is_streaming_tokens: + self._streamed_text += text + + # Skip per-token processing metrics when streaming. The per-token + # processing time is just websocket send overhead (~0.1ms) and not + # meaningful. TTFB captures the important timing for streaming TTS. + if not self._is_streaming_tokens: + await self.start_processing_metrics() # Process all filters. for filter in self._text_filters: @@ -678,7 +859,8 @@ class TTSService(AIService): text = await filter.filter(text) if not text.strip(): - await self.stop_processing_metrics() + if not self._is_streaming_tokens: + await self.stop_processing_metrics() return # Create context ID and store metadata @@ -705,7 +887,8 @@ class TTSService(AIService): self._tts_contexts[context_id] = TTSContext( append_to_context=append_tts_text_to_context if append_tts_text_to_context is not None - else True + else True, + push_assistant_aggregation=push_assistant_aggregation, ) # Apply any final text preparation (e.g., trailing space) @@ -716,7 +899,8 @@ class TTSService(AIService): await self.process_generator(self.run_tts(prepared_text, context_id)) - await self.stop_processing_metrics() + if not self._is_streaming_tokens: + await self.stop_processing_metrics() if self._push_text_frames: # In TTS services that support word timestamps, the TTSTextFrames @@ -733,6 +917,8 @@ class TTSService(AIService): if append_tts_text_to_context is not None: frame.append_to_context = append_tts_text_to_context await self.push_frame(frame) + if push_assistant_aggregation: + await self.push_frame(LLMAssistantPushAggregationFrame()) async def _stop_frame_handler(self): has_started = False @@ -750,25 +936,9 @@ class TTSService(AIService): await self.push_frame(TTSStoppedFrame()) has_started = False - -class WordTTSService(TTSService): - """Base class for TTS services that support word timestamps. - - Word timestamps are useful to synchronize audio with text of the spoken - words. This way only the spoken words are added to the conversation context. - """ - - def __init__(self, **kwargs): - """Initialize the Word TTS service. - - Args: - **kwargs: Additional arguments passed to the parent TTSService. - """ - super().__init__(**kwargs) - self._initial_word_timestamp = -1 - self._initial_word_times = [] - self._words_task = None - self._llm_response_started: bool = False + # + # Word timestamp methods (active when supports_word_timestamps=True) + # async def start_word_timestamps(self): """Start tracking word timestamps from the current time.""" @@ -803,55 +973,9 @@ class WordTTSService(TTSService): else: await self._add_word_timestamps(word_times_with_context) - async def start(self, frame: StartFrame): - """Start the word TTS service. - - Args: - frame: The start frame containing initialization parameters. - """ - await super().start(frame) - self._create_words_task() - - async def stop(self, frame: EndFrame): - """Stop the word TTS service. - - Args: - frame: The end frame. - """ - await super().stop(frame) - await self._stop_words_task() - - async def cancel(self, frame: CancelFrame): - """Cancel the word TTS service. - - Args: - frame: The cancel frame. - """ - await super().cancel(frame) - await self._stop_words_task() - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process frames with word timestamp awareness. - - Args: - frame: The frame to process. - direction: The direction of frame processing. - """ - await super().process_frame(frame, direction) - - if isinstance(frame, LLMFullResponseStartFrame): - self._llm_response_started = True - elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)): - await self.flush_audio() - - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - await super()._handle_interruption(frame, direction) - self._llm_response_started = False - await self.reset_word_timestamps() - def _create_words_task(self): if not self._words_task: - self._words_queue = asyncio.Queue() + self._words_queue: asyncio.Queue = asyncio.Queue() self._words_task = self.create_task(self._words_task_handler()) async def _stop_words_task(self): @@ -878,6 +1002,9 @@ class WordTTSService(TTSService): frame = TTSStoppedFrame() frame.pts = last_pts frame.context_id = context_id + if context_id in self._tts_contexts: + if self._tts_contexts[context_id].push_assistant_aggregation: + await self.push_frame(LLMAssistantPushAggregationFrame()) else: # Assumption: word-by-word text frames don't include spaces, so # we can rely on the default includes_inter_frame_spaces=False @@ -893,6 +1020,23 @@ class WordTTSService(TTSService): self._words_queue.task_done() +class WordTTSService(TTSService): + """Deprecated. Use TTSService with supports_word_timestamps=True instead. + + .. deprecated:: 0.0.104 + Word timestamp functionality has been moved to TTSService. Pass + ``supports_word_timestamps=True`` to TTSService (or any subclass) instead. + """ + + def __init__(self, **kwargs): + """Initialize the Word TTS service. + + Args: + **kwargs: Additional arguments passed to the parent TTSService. + """ + super().__init__(supports_word_timestamps=True, **kwargs) + + class WebsocketTTSService(TTSService, WebsocketService): """Base class for websocket-based TTS services. @@ -965,10 +1109,12 @@ class InterruptibleTTSService(WebsocketTTSService): self._bot_speaking = False -class WebsocketWordTTSService(WordTTSService, WebsocketService): - """Base class for websocket-based TTS services that support word timestamps. +class WebsocketWordTTSService(WebsocketTTSService): + """Deprecated. Use WebsocketTTSService with supports_word_timestamps=True instead. - Combines word timestamp functionality with websocket connectivity. + .. deprecated:: 0.0.104 + Word timestamp functionality has been moved to TTSService. Pass + ``supports_word_timestamps=True`` to WebsocketTTSService instead. """ def __init__(self, *, reconnect_on_error: bool = True, **kwargs): @@ -978,53 +1124,26 @@ class WebsocketWordTTSService(WordTTSService, WebsocketService): reconnect_on_error: Whether to automatically reconnect on websocket errors. **kwargs: Additional arguments passed to parent classes. """ - WordTTSService.__init__(self, **kwargs) - WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs) - - async def _report_error(self, error: ErrorFrame): - await self._call_event_handler("on_connection_error", error.error) - await self.push_error_frame(error) + super().__init__( + supports_word_timestamps=True, reconnect_on_error=reconnect_on_error, **kwargs + ) -class InterruptibleWordTTSService(WebsocketWordTTSService): - """Websocket-based TTS service with word timestamps that handles interruptions. +class InterruptibleWordTTSService(InterruptibleTTSService): + """Deprecated. Use InterruptibleTTSService with supports_word_timestamps=True instead. - For TTS services that support word timestamps but can't correlate generated - audio with requested text. Handles interruptions by reconnecting when needed. + .. deprecated:: 0.0.104 + Word timestamp functionality has been moved to TTSService. Pass + ``supports_word_timestamps=True`` to InterruptibleTTSService instead. """ def __init__(self, **kwargs): """Initialize the Interruptible Word TTS service. Args: - **kwargs: Additional arguments passed to the parent WebsocketWordTTSService. + **kwargs: Additional arguments passed to the parent InterruptibleTTSService. """ - super().__init__(**kwargs) - - # Indicates if the bot is speaking. If the bot is not speaking we don't - # need to reconnect when the user speaks. If the bot is speaking and the - # user interrupts we need to reconnect. - self._bot_speaking = False - - async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): - await super()._handle_interruption(frame, direction) - if self._bot_speaking: - await self._disconnect() - await self._connect() - - async def process_frame(self, frame: Frame, direction: FrameDirection): - """Process frames with bot speaking state tracking. - - Args: - frame: The frame to process. - direction: The direction of frame processing. - """ - await super().process_frame(frame, direction) - - if isinstance(frame, BotStartedSpeakingFrame): - self._bot_speaking = True - elif isinstance(frame, BotStoppedSpeakingFrame): - self._bot_speaking = False + super().__init__(supports_word_timestamps=True, **kwargs) class AudioContextTTSService(WebsocketTTSService): @@ -1198,6 +1317,7 @@ class AudioContextTTSService(WebsocketTTSService): async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection): await super()._handle_interruption(frame, direction) await self._stop_audio_context_task() + await self.on_audio_context_interrupted(context_id=self._context_id) self.reset_active_audio_context() self._create_audio_context_task() @@ -1226,6 +1346,7 @@ class AudioContextTTSService(WebsocketTTSService): # We just finished processing the context, so we can safely remove it. del self._contexts[context_id] + await self.on_audio_context_completed(context_id=context_id) self.reset_active_audio_context() # Append some silence between sentences. @@ -1262,16 +1383,42 @@ class AudioContextTTSService(WebsocketTTSService): logger.trace(f"{self} time out on audio context {context_id}") break + async def on_audio_context_interrupted(self, context_id: str): + """Called when an audio context is cancelled due to an interruption. -class AudioContextWordTTSService(AudioContextTTSService, WebsocketWordTTSService): - """Websocket-based TTS service with word timestamps and audio context management. + Override this in a subclass to perform provider-specific cleanup (e.g. + sending a cancel/close message over the WebSocket) when the bot is + interrupted mid-speech. The audio context task has already been stopped + and the active context has **not** yet been reset when this is called, + so ``context_id`` reflects the context that was cut short. - This is a base class for websocket-based TTS services that support word - timestamps and also allow correlating the generated audio with the requested - text through audio contexts. + Args: + context_id: The ID of the audio context that was interrupted, or + ``None`` if no context was active at the time. + """ + pass - Combines the audio context management capabilities of AudioContextTTSService - with the word timestamp functionality of WebsocketWordTTSService. + async def on_audio_context_completed(self, context_id: str): + """Called after an audio context has finished playing all of its audio. + + Override this in a subclass to perform provider-specific cleanup (e.g. + sending a close-context message to free server-side resources) once an + audio context has been fully processed. The context entry has already + been removed from the internal context map, and the active context has + **not** yet been reset when this is called. + + Args: + context_id: The ID of the audio context that finished processing. + """ + pass + + +class AudioContextWordTTSService(AudioContextTTSService): + """Deprecated. Use AudioContextTTSService with supports_word_timestamps=True instead. + + .. deprecated:: 0.0.104 + Word timestamp functionality has been moved to TTSService. Pass + ``supports_word_timestamps=True`` to AudioContextTTSService instead. """ def __init__(self, *, reconnect_on_error: bool = True, **kwargs): @@ -1281,5 +1428,6 @@ class AudioContextWordTTSService(AudioContextTTSService, WebsocketWordTTSService reconnect_on_error: Whether to automatically reconnect on websocket errors. **kwargs: Additional arguments passed to parent classes. """ - AudioContextTTSService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs) - WebsocketWordTTSService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs) + super().__init__( + supports_word_timestamps=True, reconnect_on_error=reconnect_on_error, **kwargs + ) diff --git a/src/pipecat/services/ultravox/llm.py b/src/pipecat/services/ultravox/llm.py index d549b11e5..07c3c34fe 100644 --- a/src/pipecat/services/ultravox/llm.py +++ b/src/pipecat/services/ultravox/llm.py @@ -15,6 +15,7 @@ import asyncio import datetime import json import uuid +from dataclasses import dataclass, field from typing import Any, Dict, List, Literal, Optional, Union import aiohttp @@ -30,11 +31,11 @@ from pipecat.frames.frames import ( Frame, InputAudioRawFrame, InputTextRawFrame, + InterruptionFrame, LLMContextFrame, LLMFullResponseEndFrame, LLMFullResponseStartFrame, LLMTextFrame, - LLMUpdateSettingsFrame, StartFrame, TranscriptionFrame, TTSAudioRawFrame, @@ -42,7 +43,7 @@ from pipecat.frames.frames import ( TTSStoppedFrame, TTSTextFrame, UserAudioRawFrame, - UserStoppedSpeakingFrame, + VADUserStoppedSpeakingFrame, ) from pipecat.processors.aggregators.llm_context import LLMContext from pipecat.processors.aggregators.llm_response import ( @@ -56,6 +57,7 @@ from pipecat.processors.aggregators.openai_llm_context import ( ) from pipecat.processors.frame_processor import FrameDirection from pipecat.services.llm_service import FunctionCallFromLLM, LLMService +from pipecat.services.settings import NOT_GIVEN, LLMSettings, _NotGiven from pipecat.utils.time import time_now_iso8601 try: @@ -66,6 +68,17 @@ except ModuleNotFoundError as e: raise Exception(f"Missing module: {e}") +@dataclass +class UltravoxRealtimeLLMSettings(LLMSettings): + """Settings for UltravoxRealtimeLLMService. + + Parameters: + output_medium: The output medium for the model ("voice" or "text"). + """ + + output_medium: str | _NotGiven = field(default=NOT_GIVEN) + + class AgentInputParams(BaseModel): """Input parameters for Ultravox Realtime generation using a pre-defined Agent. @@ -78,6 +91,9 @@ class AgentInputParams(BaseModel): template_context: Context variables to use when instantiating a call with the agent. Defaults to an empty dict. metadata: Metadata to attach to the call. Default to an empty dict. + output_medium: The initial output medium for the agent. Use "text" for text + responses or "voice" for audio responses. Defaults to None, which uses the + agent's default. max_duration: The maximum duration of the call. Defaults to None, which will use the agent's default maximum duration. extra: Extra parameters to include in the agent call creation request. Defaults @@ -89,6 +105,7 @@ class AgentInputParams(BaseModel): agent_id: uuid.UUID template_context: Dict[str, Any] = Field(default_factory=dict) metadata: Dict[str, str] = Field(default_factory=dict) + output_medium: Optional[Literal["text", "voice"]] = None max_duration: Optional[datetime.timedelta] = Field( default=None, ge=datetime.timedelta(seconds=10), le=datetime.timedelta(hours=1) ) @@ -105,6 +122,8 @@ class OneShotInputParams(BaseModel): model: Model identifier to use. Defaults to "fixie-ai/ultravox". voice: Voice identifier for speech generation. Defaults to None. metadata: Metadata to attach to the call. Default to an empty dict. + output_medium: The initial output medium for the agent. Use "text" for text + responses or "voice" for audio responses. Defaults to None (voice). max_duration: The maximum duration of the call. Defaults to one hour. extra: Extra parameters to include in the call creation request. Defaults to an empty dict. See the Ultravox API documentation for valid arguments: @@ -117,6 +136,7 @@ class OneShotInputParams(BaseModel): model: Optional[str] = None voice: Optional[uuid.UUID] = None metadata: Dict[str, str] = Field(default_factory=dict) + output_medium: Optional[Literal["text", "voice"]] = None max_duration: datetime.timedelta = Field( default=datetime.timedelta(hours=1), ge=datetime.timedelta(seconds=10), @@ -146,6 +166,8 @@ class UltravoxRealtimeLLMService(LLMService): by the model and may not always align with its understanding of user input. """ + _settings: UltravoxRealtimeLLMSettings + def __init__( self, *, @@ -162,7 +184,22 @@ class UltravoxRealtimeLLMService(LLMService): May only be set with OneShotInputParams. **kwargs: Additional arguments passed to parent LLMService. """ - super().__init__(**kwargs) + super().__init__( + settings=UltravoxRealtimeLLMSettings( + model=None, + temperature=None, + max_tokens=None, + top_p=None, + top_k=None, + frequency_penalty=None, + presence_penalty=None, + seed=None, + filter_incomplete_user_turns=False, + user_turn_completion_config=None, + output_medium=None, + ), + **kwargs, + ) self._params = params if one_shot_selected_tools: if not isinstance(self._params, OneShotInputParams): @@ -181,6 +218,14 @@ class UltravoxRealtimeLLMService(LLMService): self._sample_rate = 48000 self._resampler = create_stream_resampler() + def can_generate_metrics(self) -> bool: + """Check if the service can generate usage metrics. + + Returns: + True if metrics generation is supported. + """ + return True + # # standard AIService frame handling # @@ -208,6 +253,14 @@ class UltravoxRealtimeLLMService(LLMService): except Exception as e: await self.push_error("Failed to connect to Ultravox", e, fatal=True) + @staticmethod + def _output_medium_to_api(medium: Optional[Literal["text", "voice"]]) -> Optional[str]: + if medium == "text": + return "MESSAGE_MEDIUM_TEXT" + elif medium == "voice": + return "MESSAGE_MEDIUM_VOICE" + return None + async def _start_agent_call(self, params: AgentInputParams) -> str: request_body = { "templateContext": params.template_context, @@ -218,6 +271,9 @@ class UltravoxRealtimeLLMService(LLMService): } }, } + initial_output_medium = self._output_medium_to_api(params.output_medium) + if initial_output_medium: + request_body["initialOutputMedium"] = initial_output_medium if params.max_duration: request_body["maxDuration"] = f"{params.max_duration.total_seconds():3f}s" request_body = request_body | params.extra @@ -248,7 +304,11 @@ class UltravoxRealtimeLLMService(LLMService): "inputSampleRate": self._sample_rate, } }, - } | params.extra + } + initial_output_medium = self._output_medium_to_api(params.output_medium) + if initial_output_medium: + request_body["initialOutputMedium"] = initial_output_medium + request_body = request_body | params.extra async with aiohttp.ClientSession() as session: async with session.post( "https://api.ultravox.ai/api/calls", @@ -310,6 +370,13 @@ class UltravoxRealtimeLLMService(LLMService): await self.cancel_task(self._receive_task, timeout=1.0) self._receive_task = None + async def _update_settings(self, delta: UltravoxRealtimeLLMSettings): + changed = await super()._update_settings(delta) + if "output_medium" in changed: + await self._update_output_medium(self._settings.output_medium) + self._warn_unhandled_updated_settings(changed.keys() - {"output_medium"}) + return changed + # # frame processing # StartFrame, StopFrame, CancelFrame implemented in base class @@ -331,21 +398,17 @@ class UltravoxRealtimeLLMService(LLMService): else LLMContext.from_openai_context(frame.context) ) await self._handle_context(context) - elif isinstance(frame, LLMUpdateSettingsFrame): - if "output_medium" in frame.settings: - await self._update_output_medium(frame.settings.get("output_medium")) + elif isinstance(frame, InterruptionFrame): + await self.stop_all_metrics() + await self.push_frame(frame, direction) elif isinstance(frame, InputTextRawFrame): await self._send_user_text(frame.text) await self.push_frame(frame, direction) elif isinstance(frame, InputAudioRawFrame): await self._send_user_audio(frame) await self.push_frame(frame, direction) - elif isinstance(frame, UserStoppedSpeakingFrame): - # This may or may not align with Ultravox's end of user speech detection, - # which relies on a more complex endpointing model. In particular it will - # yield a seemingly very slow TTFB in the case of endpointing false - # negatives. It will be close in the majority of cases though. - await self.start_ttfb_metrics() + elif isinstance(frame, VADUserStoppedSpeakingFrame): + await self._handle_vad_user_stopped_speaking(frame) await self.push_frame(frame, direction) else: await self.push_frame(frame, direction) @@ -366,6 +429,25 @@ class UltravoxRealtimeLLMService(LLMService): } await self._send(socket_message) + async def _handle_vad_user_stopped_speaking(self, frame: VADUserStoppedSpeakingFrame): + """Handle VAD user stopped speaking frame. + + Calculates the actual speech end time and starts a timeout task to wait + for the final transcription before reporting TTFB. + + Args: + frame: The VAD user stopped speaking frame. + """ + # Skip TTFB measurement if stop_secs is not set + if frame.stop_secs == 0.0: + return + + # Calculate the actual speech end time (current time minus VAD stop delay). + # This approximates when the last user audio was sent to the Ultravox service, + # which we use to measure against the eventual transcription response. + speech_end_time = frame.timestamp - frame.stop_secs + await self.start_ttfb_metrics(start_time=speech_end_time) + async def _send_user_audio(self, frame: InputAudioRawFrame): """Send user audio frame to Ultravox Realtime.""" if not self._socket: @@ -469,6 +551,7 @@ class UltravoxRealtimeLLMService(LLMService): if not audio: return if not self._bot_responding: + await self.start_processing_metrics() await self.stop_ttfb_metrics() await self.push_frame(LLMFullResponseStartFrame()) await self.push_frame(TTSStartedFrame()) @@ -476,6 +559,7 @@ class UltravoxRealtimeLLMService(LLMService): await self.push_frame(TTSAudioRawFrame(audio, self._sample_rate, 1)) async def _handle_response_end(self): + await self.stop_processing_metrics() if self._bot_responding == "voice": await self.push_frame(TTSStoppedFrame()) await self.push_frame(LLMFullResponseEndFrame()) @@ -509,22 +593,29 @@ class UltravoxRealtimeLLMService(LLMService): async def _handle_agent_transcript( self, medium: str, text: Optional[str], delta: Optional[str], final: bool ): - if text or delta: - frame = LLMTextFrame(text=text or delta) - frame.skip_tts = medium == "voice" - await self.push_frame(frame) - if medium == "text": - if text: - await self.stop_ttfb_metrics() - await self.push_frame(LLMFullResponseStartFrame()) - await self.push_frame(TTSStartedFrame()) - await self.push_frame(TTSTextFrame(text=text, aggregated_by=AggregationType.WORD)) - self._bot_responding = "text" - elif final: + if medium == "voice": + # In voice mode, audio is handled by _handle_audio(). Here we push + # text transcripts of the audio for downstream consumers. + if (text or delta) and not final: + frame = LLMTextFrame(text=text or delta) + frame.append_to_context = False + await self.push_frame(frame) + if delta: + tts_frame = TTSTextFrame(text=delta, aggregated_by=AggregationType.WORD) + tts_frame.includes_inter_frame_spaces = True + await self.push_frame(tts_frame) + elif medium == "text": + if final: + await self.stop_processing_metrics() await self.push_frame(LLMFullResponseEndFrame()) self._bot_responding = None - elif delta: - await self.push_frame(TTSTextFrame(text=delta, aggregated_by=AggregationType.WORD)) + elif text or delta: + if not self._bot_responding: + await self.start_processing_metrics() + await self.stop_ttfb_metrics() + await self.push_frame(LLMFullResponseStartFrame()) + self._bot_responding = "text" + await self.push_frame(LLMTextFrame(text=text or delta)) def create_context_aggregator( self, diff --git a/src/pipecat/services/vision_service.py b/src/pipecat/services/vision_service.py index d12737d84..572f3b423 100644 --- a/src/pipecat/services/vision_service.py +++ b/src/pipecat/services/vision_service.py @@ -12,11 +12,12 @@ visual content. """ from abc import abstractmethod -from typing import AsyncGenerator +from typing import AsyncGenerator, Optional from pipecat.frames.frames import Frame, UserImageRawFrame from pipecat.processors.frame_processor import FrameDirection from pipecat.services.ai_service import AIService +from pipecat.services.settings import VisionSettings class VisionService(AIService): @@ -27,13 +28,20 @@ class VisionService(AIService): with the AI service infrastructure for metrics and lifecycle management. """ - def __init__(self, **kwargs): + def __init__(self, *, settings: Optional[VisionSettings] = None, **kwargs): """Initialize the vision service. Args: + settings: The runtime-updatable settings for the vision service. **kwargs: Additional arguments passed to the parent AIService. """ - super().__init__(**kwargs) + super().__init__( + settings=settings + # Here in case subclass doesn't implement more specific settings + # (which hopefully should be rare) + or VisionSettings(), + **kwargs, + ) self._describe_text = None @abstractmethod diff --git a/src/pipecat/services/whisper/base_stt.py b/src/pipecat/services/whisper/base_stt.py index bc999dba4..cf3342f4b 100644 --- a/src/pipecat/services/whisper/base_stt.py +++ b/src/pipecat/services/whisper/base_stt.py @@ -10,13 +10,15 @@ This module provides common functionality for services implementing the Whisper interface, including language mapping, metrics generation, and error handling. """ -from typing import AsyncGenerator, Optional +from dataclasses import dataclass, field +from typing import Any, AsyncGenerator, Optional from loguru import logger from openai import AsyncOpenAI from openai.types.audio import Transcription from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_latency import WHISPER_TTFS_P99 from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language, resolve_language @@ -24,6 +26,22 @@ from pipecat.utils.time import time_now_iso8601 from pipecat.utils.tracing.service_decorators import traced_stt +@dataclass +class BaseWhisperSTTSettings(STTSettings): + """Settings for Whisper API-based STT services. + + Parameters: + base_url: API base URL. + prompt: Optional text to guide the model's style or continue + a previous segment. + temperature: Sampling temperature between 0 and 1. + """ + + base_url: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + prompt: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + def language_to_whisper_language(language: Language) -> Optional[str]: """Maps pipecat Language enum to Whisper API language codes. @@ -106,6 +124,8 @@ class BaseWhisperSTTService(SegmentedSTTService): including metrics generation and error handling. """ + _settings: BaseWhisperSTTSettings + def __init__( self, *, @@ -135,34 +155,45 @@ class BaseWhisperSTTService(SegmentedSTTService): Override for your deployment. See https://github.com/pipecat-ai/stt-benchmark **kwargs: Additional arguments passed to SegmentedSTTService. """ - super().__init__(ttfs_p99_latency=ttfs_p99_latency, **kwargs) - self.set_model_name(model) + super().__init__( + ttfs_p99_latency=ttfs_p99_latency, + settings=BaseWhisperSTTSettings( + model=model, + language=self.language_to_service_language(language or Language.EN), + base_url=base_url, + prompt=prompt, + temperature=temperature, + ), + **kwargs, + ) self._client = self._create_client(api_key, base_url) - self._language = self.language_to_service_language(language or Language.EN) + self._language = self._settings.language self._prompt = prompt self._temperature = temperature self._include_prob_metrics = include_prob_metrics - self._settings = { - "base_url": base_url, - "language": self._language, - "prompt": self._prompt, - "temperature": self._temperature, - } - def _create_client(self, api_key: Optional[str], base_url: Optional[str]): return AsyncOpenAI(api_key=api_key, base_url=base_url) - async def set_model(self, model: str): - """Set the model name for transcription. + async def _update_settings(self, delta: STTSettings) -> dict[str, Any]: + """Apply a settings delta, syncing instance variables. - Args: - model: The name of the model to use. + Keeps ``_language``, ``_prompt``, and ``_temperature`` in sync with + the settings fields. """ - self.set_model_name(model) + changed = await super()._update_settings(delta) + + if "language" in changed: + self._language = self._settings.language + if "prompt" in changed: + self._prompt = self._settings.prompt + if "temperature" in changed: + self._temperature = self._settings.temperature + + return changed def can_generate_metrics(self) -> bool: - """Indicates whether this service can generate metrics. + """Whether this service can generate processing metrics. Returns: bool: True, as this service supports metric generation. @@ -180,15 +211,6 @@ class BaseWhisperSTTService(SegmentedSTTService): """ return language_to_whisper_language(language) - async def set_language(self, language: Language): - """Set the language for transcription. - - Args: - language: The Language enum value to use for transcription. - """ - logger.info(f"Switching STT language to: [{language}]") - self._language = self.language_to_service_language(language) - @traced_stt async def _handle_transcription( self, transcript: str, is_final: bool, language: Optional[Language] = None diff --git a/src/pipecat/services/whisper/stt.py b/src/pipecat/services/whisper/stt.py index f11978cc2..d386d6ed2 100644 --- a/src/pipecat/services/whisper/stt.py +++ b/src/pipecat/services/whisper/stt.py @@ -11,6 +11,7 @@ supporting both Faster Whisper and MLX Whisper backends for efficient inference. """ import asyncio +from dataclasses import dataclass, field from enum import Enum from typing import AsyncGenerator, Optional @@ -19,6 +20,7 @@ from loguru import logger from typing_extensions import TYPE_CHECKING, override from pipecat.frames.frames import ErrorFrame, Frame, TranscriptionFrame +from pipecat.services.settings import NOT_GIVEN, STTSettings, _NotGiven from pipecat.services.stt_service import SegmentedSTTService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.time import time_now_iso8601 @@ -172,6 +174,36 @@ def language_to_whisper_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class WhisperSTTSettings(STTSettings): + """Settings for the local Whisper (Faster Whisper) STT service. + + Parameters: + device: Inference device ('cpu', 'cuda', or 'auto'). + compute_type: Compute type for inference ('default', 'int8', etc.). + no_speech_prob: Probability threshold for filtering non-speech segments. + """ + + device: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + compute_type: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + no_speech_prob: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + +@dataclass +class WhisperMLXSTTSettings(STTSettings): + """Settings for the MLX Whisper STT service. + + Parameters: + no_speech_prob: Probability threshold for filtering non-speech segments. + temperature: Sampling temperature (0.0-1.0). + engine: Whisper engine identifier. + """ + + no_speech_prob: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + temperature: float | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + engine: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class WhisperSTTService(SegmentedSTTService): """Class to transcribe audio with a locally-downloaded Whisper model. @@ -179,6 +211,8 @@ class WhisperSTTService(SegmentedSTTService): segments. It supports multiple languages and various model sizes. """ + _settings: WhisperSTTSettings + def __init__( self, *, @@ -199,20 +233,21 @@ class WhisperSTTService(SegmentedSTTService): language: The default language for transcription. **kwargs: Additional arguments passed to SegmentedSTTService. """ - super().__init__(**kwargs) + super().__init__( + settings=WhisperSTTSettings( + model=model if isinstance(model, str) else model.value, + language=language, + device=device, + compute_type=compute_type, + no_speech_prob=no_speech_prob, + ), + **kwargs, + ) self._device: str = device self._compute_type = compute_type - self.set_model_name(model if isinstance(model, str) else model.value) self._no_speech_prob = no_speech_prob self._model: Optional[WhisperModel] = None - self._settings = { - "language": language, - "device": self._device, - "compute_type": self._compute_type, - "no_speech_prob": self._no_speech_prob, - } - self._load() def can_generate_metrics(self) -> bool: @@ -234,15 +269,6 @@ class WhisperSTTService(SegmentedSTTService): """ return language_to_whisper_language(language) - async def set_language(self, language: Language): - """Set the language for transcription. - - Args: - language: The Language enum value to use for transcription. - """ - logger.info(f"Switching STT language to: [{language}]") - self._settings["language"] = language - def _load(self): """Loads the Whisper model. @@ -255,7 +281,7 @@ class WhisperSTTService(SegmentedSTTService): logger.debug("Loading Whisper model...") self._model = WhisperModel( - self.model_name, device=self._device, compute_type=self._compute_type + self._settings.model, device=self._device, compute_type=self._compute_type ) logger.debug("Loaded Whisper model") except ModuleNotFoundError as e: @@ -293,9 +319,8 @@ class WhisperSTTService(SegmentedSTTService): # Divide by 32768 because we have signed 16-bit data. audio_float = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / 32768.0 - whisper_lang = self.language_to_service_language(self._settings["language"]) segments, _ = await asyncio.to_thread( - self._model.transcribe, audio_float, language=whisper_lang + self._model.transcribe, audio_float, language=self._settings.language ) text: str = "" for segment in segments: @@ -305,13 +330,13 @@ class WhisperSTTService(SegmentedSTTService): await self.stop_processing_metrics() if text: - await self._handle_transcription(text, True, self._settings["language"]) + await self._handle_transcription(text, True, self._settings.language) logger.debug(f"Transcription: [{text}]") yield TranscriptionFrame( text, self._user_id, time_now_iso8601(), - self._settings["language"], + self._settings.language, ) @@ -322,6 +347,8 @@ class WhisperSTTServiceMLX(WhisperSTTService): segments. It's optimized for Apple Silicon and supports multiple languages and quantizations. """ + _settings: WhisperMLXSTTSettings + def __init__( self, *, @@ -341,19 +368,21 @@ class WhisperSTTServiceMLX(WhisperSTTService): **kwargs: Additional arguments passed to SegmentedSTTService. """ # Skip WhisperSTTService.__init__ and call its parent directly - SegmentedSTTService.__init__(self, **kwargs) + SegmentedSTTService.__init__( + self, + settings=WhisperMLXSTTSettings( + model=model if isinstance(model, str) else model.value, + language=language, + no_speech_prob=no_speech_prob, + temperature=temperature, + engine="mlx", + ), + **kwargs, + ) - self.set_model_name(model if isinstance(model, str) else model.value) self._no_speech_prob = no_speech_prob self._temperature = temperature - self._settings = { - "language": language, - "no_speech_prob": self._no_speech_prob, - "temperature": self._temperature, - "engine": "mlx", - } - # No need to call _load() as MLX Whisper loads models on demand @override @@ -390,13 +419,12 @@ class WhisperSTTServiceMLX(WhisperSTTService): # Divide by 32768 because we have signed 16-bit data. audio_float = np.frombuffer(audio, dtype=np.int16).astype(np.float32) / 32768.0 - whisper_lang = self.language_to_service_language(self._settings["language"]) chunk = await asyncio.to_thread( mlx_whisper.transcribe, audio_float, - path_or_hf_repo=self.model_name, + path_or_hf_repo=self._settings.model, temperature=self._temperature, - language=whisper_lang, + language=self._settings.language, ) text: str = "" for segment in chunk.get("segments", []): @@ -413,13 +441,13 @@ class WhisperSTTServiceMLX(WhisperSTTService): await self.stop_processing_metrics() if text: - await self._handle_transcription(text, True, self._settings["language"]) + await self._handle_transcription(text, True, self._settings.language) logger.debug(f"Transcription: [{text}]") yield TranscriptionFrame( text, self._user_id, time_now_iso8601(), - self._settings["language"], + self._settings.language, ) except Exception as e: diff --git a/src/pipecat/services/xtts/tts.py b/src/pipecat/services/xtts/tts.py index bf4eb4f03..8817c09b5 100644 --- a/src/pipecat/services/xtts/tts.py +++ b/src/pipecat/services/xtts/tts.py @@ -10,7 +10,8 @@ This module provides integration with Coqui XTTS streaming server for text-to-speech synthesis using local Docker deployment. """ -from typing import Any, AsyncGenerator, Dict, Optional +from dataclasses import dataclass, field +from typing import AsyncGenerator, Dict, Optional import aiohttp from loguru import logger @@ -24,6 +25,7 @@ from pipecat.frames.frames import ( TTSStartedFrame, TTSStoppedFrame, ) +from pipecat.services.settings import NOT_GIVEN, TTSSettings, _NotGiven from pipecat.services.tts_service import TTSService from pipecat.transcriptions.language import Language, resolve_language from pipecat.utils.tracing.service_decorators import traced_tts @@ -68,6 +70,17 @@ def language_to_xtts_language(language: Language) -> Optional[str]: return resolve_language(language, LANGUAGE_MAP, use_base_code=True) +@dataclass +class XTTSTTSSettings(TTSSettings): + """Settings for XTTS TTS service. + + Parameters: + base_url: Base URL of the XTTS streaming server. + """ + + base_url: str | _NotGiven = field(default_factory=lambda: NOT_GIVEN) + + class XTTSService(TTSService): """Coqui XTTS text-to-speech service. @@ -76,6 +89,8 @@ class XTTSService(TTSService): studio speakers configuration. """ + _settings: XTTSTTSSettings + def __init__( self, *, @@ -96,13 +111,16 @@ class XTTSService(TTSService): sample_rate: Audio sample rate. If None, uses default. **kwargs: Additional arguments passed to parent TTSService. """ - super().__init__(sample_rate=sample_rate, **kwargs) - - self._settings = { - "language": self.language_to_service_language(language), - "base_url": base_url, - } - self.set_voice(voice_id) + super().__init__( + sample_rate=sample_rate, + settings=XTTSTTSSettings( + model=None, + voice=voice_id, + language=self.language_to_service_language(language), + base_url=base_url, + ), + **kwargs, + ) self._studio_speakers: Optional[Dict[str, Any]] = None self._aiohttp_session = aiohttp_session @@ -138,7 +156,7 @@ class XTTSService(TTSService): if self._studio_speakers: return - async with self._aiohttp_session.get(self._settings["base_url"] + "/studio_speakers") as r: + async with self._aiohttp_session.get(self._settings.base_url + "/studio_speakers") as r: if r.status != 200: text = await r.text() await self.push_error( @@ -164,13 +182,13 @@ class XTTSService(TTSService): logger.error(f"{self} no studio speakers available") return - embeddings = self._studio_speakers[self._voice_id] + embeddings = self._studio_speakers[self._settings.voice] - url = self._settings["base_url"] + "/tts_stream" + url = self._settings.base_url + "/tts_stream" payload = { "text": text.replace(".", "").replace("*", ""), - "language": self._settings["language"], + "language": self._settings.language, "speaker_embedding": embeddings["speaker_embedding"], "gpt_cond_latent": embeddings["gpt_cond_latent"], "add_wav_header": False, diff --git a/src/pipecat/transports/base_input.py b/src/pipecat/transports/base_input.py index 77ff61bba..1da672ab7 100644 --- a/src/pipecat/transports/base_input.py +++ b/src/pipecat/transports/base_input.py @@ -424,6 +424,11 @@ class BaseInputTransport(FrameProcessor): if self._params.audio_in_filter: frame.audio = await self._params.audio_in_filter.filter(frame.audio) + # Skip frames with no audio data (e.g. filter is buffering). + if not frame.audio: + self._audio_in_queue.task_done() + continue + ################################################################### # DEPRECATED. # @@ -553,7 +558,7 @@ class BaseInputTransport(FrameProcessor): # Make sure we notify about interruptions quickly out-of-band. if should_push_immediate_interruption and self._allow_interruptions: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() elif self.interruption_strategies and self._bot_speaking: logger.debug( "User started speaking while bot is speaking with interruption config - " diff --git a/src/pipecat/transports/daily/transport.py b/src/pipecat/transports/daily/transport.py index 9575fd51b..dc9868426 100644 --- a/src/pipecat/transports/daily/transport.py +++ b/src/pipecat/transports/daily/transport.py @@ -24,7 +24,9 @@ from pydantic import BaseModel from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADParams from pipecat.frames.frames import ( + BotConnectedFrame, CancelFrame, + ClientConnectedFrame, DataFrame, EndFrame, Frame, @@ -2070,6 +2072,8 @@ class DailyTransport(BaseTransport): Event handlers available: - on_joined: Called when the bot joins the room. Args: (data: dict) + - on_connected: Called when the bot connects to the room (alias for + on_joined). Args: (data: dict) - on_left: Called when the bot leaves the room. - on_before_leave: [sync] Called just before the bot leaves the room. - on_error: Called when a transport error occurs. Args: (error: str) @@ -2187,6 +2191,7 @@ class DailyTransport(BaseTransport): # Register supported handlers. The user will only be able to register # these handlers. self._register_event_handler("on_active_speaker_changed") + self._register_event_handler("on_connected") self._register_event_handler("on_joined") self._register_event_handler("on_left") self._register_event_handler("on_error") @@ -2578,6 +2583,10 @@ class DailyTransport(BaseTransport): if error: await self._on_error(f"Unable to start transcription: {error}") await self._call_event_handler("on_joined", data) + # Also call on_connected for compatibility with other transports + await self._call_event_handler("on_connected", data) + if self._input: + await self._input.push_frame(BotConnectedFrame()) async def _on_left(self): """Handle room left events.""" @@ -2716,6 +2725,8 @@ class DailyTransport(BaseTransport): await self._call_event_handler("on_participant_joined", participant) # Also call on_client_connected for compatibility with other transports await self._call_event_handler("on_client_connected", participant) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) async def _on_participant_left(self, participant, reason): """Handle participant left events.""" diff --git a/src/pipecat/transports/heygen/transport.py b/src/pipecat/transports/heygen/transport.py index dbeded3e5..d79d0080e 100644 --- a/src/pipecat/transports/heygen/transport.py +++ b/src/pipecat/transports/heygen/transport.py @@ -23,9 +23,11 @@ from loguru import logger from pipecat.frames.frames import ( AudioRawFrame, + BotConnectedFrame, BotStartedSpeakingFrame, BotStoppedSpeakingFrame, CancelFrame, + ClientConnectedFrame, EndFrame, Frame, InputAudioRawFrame, @@ -339,6 +341,7 @@ class HeyGenTransport(BaseTransport): session_request=session_request, service_type=service_type, callbacks=HeyGenCallbacks( + on_connected=self._on_connected, on_participant_connected=self._on_participant_connected, on_participant_disconnected=self._on_participant_disconnected, ), @@ -349,9 +352,16 @@ class HeyGenTransport(BaseTransport): # Register supported handlers. The user will only be able to register # these handlers. + self._register_event_handler("on_connected") self._register_event_handler("on_client_connected") self._register_event_handler("on_client_disconnected") + async def _on_connected(self): + """Handle bot connected to LiveKit room.""" + await self._call_event_handler("on_connected") + if self._input: + await self._input.push_frame(BotConnectedFrame()) + async def _on_participant_disconnected(self, participant_id: str): logger.debug(f"HeyGen participant {participant_id} disconnected") if participant_id != "heygen": @@ -387,6 +397,8 @@ class HeyGenTransport(BaseTransport): async def _on_client_connected(self, participant: Any): """Handle client connected events.""" await self._call_event_handler("on_client_connected", participant) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) async def _on_client_disconnected(self, participant: Any): """Handle client disconnected events.""" diff --git a/src/pipecat/transports/livekit/transport.py b/src/pipecat/transports/livekit/transport.py index 1902e7cd3..7e9c1de35 100644 --- a/src/pipecat/transports/livekit/transport.py +++ b/src/pipecat/transports/livekit/transport.py @@ -23,7 +23,9 @@ from pipecat.audio.utils import create_stream_resampler from pipecat.audio.vad.vad_analyzer import VADAnalyzer from pipecat.frames.frames import ( AudioRawFrame, + BotConnectedFrame, CancelFrame, + ClientConnectedFrame, EndFrame, ImageRawFrame, OutputAudioRawFrame, @@ -1131,6 +1133,8 @@ class LiveKitTransport(BaseTransport): async def _on_connected(self): """Handle room connected events.""" await self._call_event_handler("on_connected") + if self._input: + await self._input.push_frame(BotConnectedFrame()) async def _on_disconnected(self): """Handle room disconnected events.""" @@ -1143,6 +1147,8 @@ class LiveKitTransport(BaseTransport): async def _on_participant_connected(self, participant_id: str): """Handle participant connected events.""" await self._call_event_handler("on_participant_connected", participant_id) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) async def _on_participant_disconnected(self, participant_id: str): """Handle participant disconnected events.""" diff --git a/src/pipecat/transports/smallwebrtc/transport.py b/src/pipecat/transports/smallwebrtc/transport.py index dc91588a3..36f883278 100644 --- a/src/pipecat/transports/smallwebrtc/transport.py +++ b/src/pipecat/transports/smallwebrtc/transport.py @@ -23,6 +23,7 @@ from pydantic import BaseModel from pipecat.frames.frames import ( CancelFrame, + ClientConnectedFrame, EndFrame, Frame, InputAudioRawFrame, @@ -964,6 +965,8 @@ class SmallWebRTCTransport(BaseTransport): async def _on_client_connected(self, webrtc_connection): """Handle client connection events.""" await self._call_event_handler("on_client_connected", webrtc_connection) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) async def _on_client_disconnected(self, webrtc_connection): """Handle client disconnection events.""" diff --git a/src/pipecat/transports/tavus/transport.py b/src/pipecat/transports/tavus/transport.py index dd63cb790..cb6844250 100644 --- a/src/pipecat/transports/tavus/transport.py +++ b/src/pipecat/transports/tavus/transport.py @@ -21,7 +21,9 @@ from loguru import logger from pydantic import BaseModel from pipecat.frames.frames import ( + BotConnectedFrame, CancelFrame, + ClientConnectedFrame, EndFrame, Frame, InputAudioRawFrame, @@ -132,10 +134,12 @@ class TavusCallbacks(BaseModel): """Callback handlers for Tavus events. Parameters: + on_connected: Called when the bot connects to the room. on_participant_joined: Called when a participant joins the conversation. on_participant_left: Called when a participant leaves the conversation. """ + on_connected: Callable[[Mapping[str, Any]], Awaitable[None]] on_participant_joined: Callable[[Mapping[str, Any]], Awaitable[None]] on_participant_left: Callable[[Mapping[str, Any], str], Awaitable[None]] @@ -270,6 +274,7 @@ class TavusTransportClient: async def _on_joined(self, data): """Handle joined event.""" logger.debug("TavusTransportClient joined!") + await self._callbacks.on_connected(data) async def _on_left(self): """Handle left event.""" @@ -664,6 +669,7 @@ class TavusTransport(BaseTransport): Event handlers available: + - on_connected(transport, data): Bot connected to the room - on_client_connected(transport, participant): Participant connected to the session - on_client_disconnected(transport, participant): Participant disconnected from the session @@ -702,6 +708,7 @@ class TavusTransport(BaseTransport): self._params = params callbacks = TavusCallbacks( + on_connected=self._on_joined, on_participant_joined=self._on_participant_joined, on_participant_left=self._on_participant_left, ) @@ -720,9 +727,16 @@ class TavusTransport(BaseTransport): # Register supported handlers. The user will only be able to register # these handlers. + self._register_event_handler("on_connected") self._register_event_handler("on_client_connected") self._register_event_handler("on_client_disconnected") + async def _on_joined(self, data): + """Handle bot joined room event.""" + await self._call_event_handler("on_connected", data) + if self._input: + await self._input.push_frame(BotConnectedFrame()) + async def _on_participant_left(self, participant, reason): """Handle participant left events.""" persona_name = await self._client.get_persona_name() @@ -786,6 +800,8 @@ class TavusTransport(BaseTransport): async def _on_client_connected(self, participant: Any): """Handle client connected events.""" await self._call_event_handler("on_client_connected", participant) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) async def _on_client_disconnected(self, participant: Any): """Handle client disconnected events.""" diff --git a/src/pipecat/transports/websocket/fastapi.py b/src/pipecat/transports/websocket/fastapi.py index f52123e52..0fde2b9ae 100644 --- a/src/pipecat/transports/websocket/fastapi.py +++ b/src/pipecat/transports/websocket/fastapi.py @@ -23,6 +23,7 @@ from pydantic import BaseModel from pipecat.frames.frames import ( CancelFrame, + ClientConnectedFrame, EndFrame, Frame, InputAudioRawFrame, @@ -260,6 +261,7 @@ class FastAPIWebsocketInputTransport(BaseInputTransport): if not self._monitor_websocket_task and self._params.session_timeout: self._monitor_websocket_task = self.create_task(self._monitor_websocket()) await self._client.trigger_client_connected() + await self.push_frame(ClientConnectedFrame()) if not self._receive_task: self._receive_task = self.create_task(self._receive_messages()) await self.set_transport_ready(frame) diff --git a/src/pipecat/transports/websocket/server.py b/src/pipecat/transports/websocket/server.py index e5f628fa4..fa3645d37 100644 --- a/src/pipecat/transports/websocket/server.py +++ b/src/pipecat/transports/websocket/server.py @@ -22,11 +22,11 @@ from pydantic import BaseModel from pipecat.frames.frames import ( CancelFrame, + ClientConnectedFrame, EndFrame, Frame, InputAudioRawFrame, InputTransportMessageFrame, - InputTransportMessageUrgentFrame, InterruptionFrame, OutputAudioRawFrame, OutputTransportMessageFrame, @@ -504,6 +504,8 @@ class WebsocketServerTransport(BaseTransport): if self._output: await self._output.set_client_connection(websocket) await self._call_event_handler("on_client_connected", websocket) + if self._input: + await self._input.push_frame(ClientConnectedFrame()) else: logger.error("A WebsocketServerTransport output is missing in the pipeline") diff --git a/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py b/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py index acd4936a3..f141a75b7 100644 --- a/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py +++ b/src/pipecat/turns/user_stop/turn_analyzer_user_turn_stop_strategy.py @@ -115,10 +115,14 @@ class TurnAnalyzerUserTurnStopStrategy(BaseUserTurnStopStrategy): """Handle input audio to check if the turn is completed.""" state = self._turn_analyzer.append_audio(frame.audio, self._vad_user_speaking) - # If at this point the model says the turn is complete it will be due to - # a timeout, so we mark turn as complete and we trigger the user end of - # turn. + # Streaming analyzers (e.g. KrispVivaTurn) detect turn completion + # frame-by-frame inside append_audio, so COMPLETE is returned here + # rather than in analyze_end_of_turn. Batch analyzers (BaseSmartTurn) + # return COMPLETE here only on a silence timeout. In either case we + # consume and push metrics immediately while they're fresh. if state == EndOfTurnState.COMPLETE: + _, prediction = await self._turn_analyzer.analyze_end_of_turn() + await self._handle_prediction_result(prediction) self._turn_complete = True await self._maybe_trigger_user_turn_stopped() diff --git a/src/pipecat/turns/user_turn_processor.py b/src/pipecat/turns/user_turn_processor.py index 7f8995202..85bc658dd 100644 --- a/src/pipecat/turns/user_turn_processor.py +++ b/src/pipecat/turns/user_turn_processor.py @@ -182,7 +182,7 @@ class UserTurnProcessor(FrameProcessor): await self._user_idle_controller.process_frame(UserStartedSpeakingFrame()) if params.enable_interruptions and self._allow_interruptions: - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self._call_event_handler("on_user_turn_started", strategy) diff --git a/src/pipecat/utils/context/llm_context_summarization.py b/src/pipecat/utils/context/llm_context_summarization.py index 6865a00d9..e68311942 100644 --- a/src/pipecat/utils/context/llm_context_summarization.py +++ b/src/pipecat/utils/context/llm_context_summarization.py @@ -10,12 +10,19 @@ This module provides reusable functionality for automatically compressing conver context when token limits are reached, enabling efficient long-running conversations. """ -from dataclasses import dataclass -from typing import List, Optional +import warnings +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, List, Optional + +if TYPE_CHECKING: + from pipecat.services.llm_service import LLMService from loguru import logger -from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage + +# Fallback timeout (seconds) used when summarization_timeout is None. +DEFAULT_SUMMARIZATION_TIMEOUT = 120.0 # Token estimation constants CHARS_PER_TOKEN = 4 # Industry-standard heuristic: 1 token ≈ 4 characters @@ -48,31 +55,124 @@ The conversation transcript follows. Generate only the summary, no other text."" @dataclass -class LLMContextSummarizationConfig: - """Configuration for context summarization behavior. +class LLMContextSummaryConfig: + """Configuration for summary generation parameters. - Controls when and how conversation context is automatically compressed - to manage token limits in long-running conversations. + Contains settings that control how a summary is generated. Used by both + automatic and manual summarization modes. + + Parameters: + target_context_tokens: Maximum token size for the generated summary. + This value is passed directly to the LLM as the max_tokens parameter + when generating the summary. Should be sized appropriately to allow + the summary plus recent preserved messages to fit within reasonable + context limits. + min_messages_after_summary: Number of recent messages to preserve + uncompressed after each summarization. These messages maintain + immediate conversational context. + summarization_prompt: Custom prompt for the LLM to use when generating + summaries. If None, uses DEFAULT_SUMMARIZATION_PROMPT. + summary_message_template: Template for formatting the summary when + injected into context. Must contain ``{summary}`` as a placeholder + for the generated summary text. Allows applications to wrap the + summary in custom delimiters (e.g., XML tags) so that system + prompts can distinguish summaries from live conversation. + llm: Optional separate LLM service for generating summaries. When set, + summarization requests are sent to this service instead of the + pipeline's primary LLM. Useful for routing summarization to a + cheaper/faster model (e.g., Gemini Flash) while keeping an + expensive model for conversation. If None, uses the pipeline LLM. + summarization_timeout: Maximum time in seconds to wait for the LLM to + generate a summary. If the call exceeds this timeout, summarization + is aborted with an error and future summarizations are unblocked. + """ + + target_context_tokens: int = 6000 + min_messages_after_summary: int = 4 + summarization_prompt: Optional[str] = None + summary_message_template: str = "Conversation summary: {summary}" + llm: Optional["LLMService"] = None + summarization_timeout: float = DEFAULT_SUMMARIZATION_TIMEOUT + + def __post_init__(self): + """Validate configuration parameters.""" + if self.target_context_tokens <= 0: + raise ValueError("target_context_tokens must be positive") + if self.min_messages_after_summary < 0: + raise ValueError("min_messages_after_summary must be non-negative") + + @property + def summary_prompt(self) -> str: + """Get the summarization prompt to use. + + Returns: + The custom prompt if set, otherwise the default summarization prompt. + """ + return self.summarization_prompt or DEFAULT_SUMMARIZATION_PROMPT + + +@dataclass +class LLMAutoContextSummarizationConfig: + """Configuration for automatic context summarization. + + Controls when conversation context is automatically compressed and how + that summary is generated. Summarization is triggered when either the + token limit or the unsummarized message count threshold is exceeded. Parameters: max_context_tokens: Maximum allowed context size in tokens. When this limit is reached, summarization is triggered to compress the context. The tokens are calculated using the industry-standard approximation of 1 token ≈ 4 characters. - target_context_tokens: Maximum token size for the generated summary. - This value is passed directly to the LLM as the max_tokens parameter - when generating the summary. Should be sized appropriately to allow - the summary plus recent preserved messages to fit within reasonable - context limits. max_unsummarized_messages: Maximum number of new messages that can accumulate since the last summary before triggering a new summarization. This ensures regular compression even if token limits are not reached. - min_messages_after_summary: Number of recent messages to preserve - uncompressed after each summarization. These messages maintain - immediate conversational context. - summarization_prompt: Custom prompt for the LLM to use when generating - summaries. If None, uses DEFAULT_SUMMARIZATION_PROMPT. + summary_config: Configuration for summary generation parameters + (prompt, token budget, messages to keep). If not provided, uses + default ``LLMContextSummaryConfig`` values. + """ + + max_context_tokens: int = 8000 + max_unsummarized_messages: int = 20 + summary_config: LLMContextSummaryConfig = field(default_factory=LLMContextSummaryConfig) + + def __post_init__(self): + """Validate configuration parameters.""" + if self.max_context_tokens <= 0: + raise ValueError("max_context_tokens must be positive") + if self.max_unsummarized_messages < 1: + raise ValueError("max_unsummarized_messages must be at least 1") + + # Auto-adjust target_context_tokens if it exceeds max_context_tokens + if self.summary_config.target_context_tokens > self.max_context_tokens: + # Use 80% of max_context_tokens as a reasonable default + self.summary_config.target_context_tokens = int(self.max_context_tokens * 0.8) + + +@dataclass +class LLMContextSummarizationConfig: + """Configuration for context summarization behavior. + + .. deprecated:: + Use :class:`LLMAutoContextSummarizationConfig` with a nested + :class:`LLMContextSummaryConfig` instead:: + + LLMAutoContextSummarizationConfig( + max_context_tokens=8000, + max_unsummarized_messages=20, + summary_config=LLMContextSummaryConfig( + target_context_tokens=6000, + min_messages_after_summary=4, + ), + ) + + Parameters: + max_context_tokens: Maximum allowed context size in tokens. + target_context_tokens: Maximum token size for the generated summary. + max_unsummarized_messages: Maximum new messages before triggering summarization. + min_messages_after_summary: Number of recent messages to preserve. + summarization_prompt: Custom prompt for summary generation. """ max_context_tokens: int = 8000 @@ -80,9 +180,18 @@ class LLMContextSummarizationConfig: max_unsummarized_messages: int = 20 min_messages_after_summary: int = 4 summarization_prompt: Optional[str] = None + summary_message_template: str = "Conversation summary: {summary}" + llm: Optional["LLMService"] = None + summarization_timeout: float = DEFAULT_SUMMARIZATION_TIMEOUT def __post_init__(self): """Validate configuration parameters.""" + warnings.warn( + "LLMContextSummarizationConfig is deprecated. " + "Use LLMAutoContextSummarizationConfig with a nested LLMContextSummaryConfig instead.", + DeprecationWarning, + stacklevel=2, + ) if self.max_context_tokens <= 0: raise ValueError("max_context_tokens must be positive") if self.target_context_tokens <= 0: @@ -107,6 +216,25 @@ class LLMContextSummarizationConfig: """ return self.summarization_prompt or DEFAULT_SUMMARIZATION_PROMPT + def to_auto_config(self) -> LLMAutoContextSummarizationConfig: + """Convert to the new :class:`LLMAutoContextSummarizationConfig`. + + Returns: + An equivalent ``LLMAutoContextSummarizationConfig`` instance. + """ + return LLMAutoContextSummarizationConfig( + max_context_tokens=self.max_context_tokens, + max_unsummarized_messages=self.max_unsummarized_messages, + summary_config=LLMContextSummaryConfig( + target_context_tokens=self.target_context_tokens, + min_messages_after_summary=self.min_messages_after_summary, + summarization_prompt=self.summarization_prompt, + summary_message_template=self.summary_message_template, + llm=self.llm, + summarization_timeout=self.summarization_timeout, + ), + ) + @dataclass class LLMMessagesToSummarize: @@ -188,6 +316,11 @@ class LLMContextSummarizationUtil: total = 0 for message in context.messages: + # LLMSpecificMessage holds service-specific data (e.g. thinking blocks, + # thought signatures). Skipping them here for now. + if isinstance(message, LLMSpecificMessage): + continue + # Role and structure overhead total += TOKEN_OVERHEAD_PER_MESSAGE @@ -248,6 +381,12 @@ class LLMContextSummarizationUtil: for i in range(start_idx, len(messages)): msg = messages[i] + # LLMSpecificMessage instances (e.g. thinking blocks) never carry tool_call or + # tool_call_id fields, so they cannot affect the pending-call tracking. Skipping + # them avoids an AttributeError. + if isinstance(msg, LLMSpecificMessage): + continue + role = msg.get("role") # Check for tool calls in assistant messages @@ -296,9 +435,17 @@ class LLMContextSummarizationUtil: if len(messages) <= min_messages_to_keep: return LLMMessagesToSummarize(messages=[], last_summarized_index=-1) - # Find first system message index + # Find first system message index. LLMSpecificMessage instances are excluded because + # they are not dict-like and never represent a system message; they hold + # service-specific metadata (e.g. thinking blocks) that is always paired with a + # standard message. first_system_index = next( - (i for i, msg in enumerate(messages) if msg.get("role") == "system"), -1 + ( + i + for i, msg in enumerate(messages) + if not isinstance(msg, LLMSpecificMessage) and msg.get("role") == "system" + ), + -1, ) # Messages to summarize are between first system and recent messages @@ -356,6 +503,14 @@ class LLMContextSummarizationUtil: transcript_parts = [] for msg in messages: + # LLMSpecificMessage holds service-specific internal data (e.g. Anthropic thinking + # blocks, Gemini thought signatures). This data is not meaningful as plain text for + # a summarization transcript, and the summarizer LLM would not know how to interpret + # it. The conversational content of those turns is already captured by the + # accompanying standard assistant message. + if isinstance(msg, LLMSpecificMessage): + continue + role = msg.get("role", "unknown") content = msg.get("content", "") diff --git a/src/pipecat/utils/text/base_text_aggregator.py b/src/pipecat/utils/text/base_text_aggregator.py index 13691d9cd..2b050fcb7 100644 --- a/src/pipecat/utils/text/base_text_aggregator.py +++ b/src/pipecat/utils/text/base_text_aggregator.py @@ -21,6 +21,7 @@ class AggregationType(str, Enum): """Built-in aggregation strings.""" SENTENCE = "sentence" + TOKEN = "token" WORD = "word" def __str__(self): @@ -66,6 +67,25 @@ class BaseTextAggregator(ABC): logic, text manipulation behavior, and state management for interruptions. """ + def __init__(self, *, aggregation_type: AggregationType = AggregationType.SENTENCE): + """Initialize the base text aggregator. + + Args: + aggregation_type: The aggregation strategy to use. SENTENCE buffers + text until sentence boundaries are detected, TOKEN passes text + through immediately, and WORD buffers until word boundaries. + """ + self._aggregation_type = AggregationType(aggregation_type) + + @property + def aggregation_type(self) -> AggregationType: + """Get the aggregation type for this aggregator. + + Returns: + The aggregation type. + """ + return self._aggregation_type + @property @abstractmethod def text(self) -> Aggregation: diff --git a/src/pipecat/utils/text/pattern_pair_aggregator.py b/src/pipecat/utils/text/pattern_pair_aggregator.py index bfaf9291b..835bb8591 100644 --- a/src/pipecat/utils/text/pattern_pair_aggregator.py +++ b/src/pipecat/utils/text/pattern_pair_aggregator.py @@ -96,8 +96,11 @@ class PatternPairAggregator(SimpleTextAggregator): Creates an empty aggregator with no patterns or handlers registered. Text buffering and pattern detection will begin when text is aggregated. + + Args: + **kwargs: Additional arguments passed to SimpleTextAggregator (e.g. aggregation_type). """ - super().__init__() + super().__init__(**kwargs) self._patterns = {} self._handlers = {} self._last_processed_position = 0 # Track where we last checked for complete patterns @@ -146,7 +149,7 @@ class PatternPairAggregator(SimpleTextAggregator): Returns: Self for method chaining. """ - if type in [AggregationType.SENTENCE, AggregationType.WORD]: + if type in [AggregationType.SENTENCE, AggregationType.WORD, AggregationType.TOKEN]: raise ValueError( f"The aggregation type '{type}' is reserved for default behavior and can not be used for custom patterns." ) @@ -321,6 +324,9 @@ class PatternPairAggregator(SimpleTextAggregator): and uses the parent's lookahead logic for sentence detection when no patterns are active. + In TOKEN mode, pattern detection still works but non-pattern text is + yielded as TOKEN aggregations instead of waiting for sentence boundaries. + Args: text: Text to aggregate. @@ -370,18 +376,35 @@ class PatternPairAggregator(SimpleTextAggregator): # boundaries when a pattern begins (e.g., "Here is code ..." yields "Here is code") result = self._text[: pattern_start[0]] self._text = self._text[pattern_start[0] :] - yield PatternMatch( - content=result.strip(), type=AggregationType.SENTENCE, full_match=result + agg_type = ( + AggregationType.TOKEN + if self._aggregation_type == AggregationType.TOKEN + else AggregationType.SENTENCE ) + yield PatternMatch(content=result.strip(), type=agg_type, full_match=result) continue - # Use parent's lookahead logic for sentence detection - aggregation = await super()._check_sentence_with_lookahead(char) - if aggregation: - # Convert to PatternMatch for consistency with return type + if self._aggregation_type != AggregationType.TOKEN: + # Use parent's lookahead logic for sentence detection + aggregation = await super()._check_sentence_with_lookahead(char) + if aggregation: + # Convert to PatternMatch for consistency with return type + yield PatternMatch( + content=aggregation.text, + type=aggregation.type, + full_match=aggregation.text, + ) + + # In TOKEN mode, yield any accumulated text after processing all chars, + # but only if there's no incomplete pattern being buffered. + if self._aggregation_type == AggregationType.TOKEN and self._text: + if self._match_start_of_pattern(self._text) is None: yield PatternMatch( - content=aggregation.text, type=aggregation.type, full_match=aggregation.text + content=self._text, + type=AggregationType.TOKEN, + full_match=self._text, ) + self._text = "" async def handle_interruption(self): """Handle interruptions by clearing the buffer and pattern state. diff --git a/src/pipecat/utils/text/simple_text_aggregator.py b/src/pipecat/utils/text/simple_text_aggregator.py index b0cc698a9..b5b179fcf 100644 --- a/src/pipecat/utils/text/simple_text_aggregator.py +++ b/src/pipecat/utils/text/simple_text_aggregator.py @@ -25,11 +25,15 @@ class SimpleTextAggregator(BaseTextAggregator): most straightforward implementation of text aggregation for TTS processing. """ - def __init__(self): + def __init__(self, **kwargs): """Initialize the simple text aggregator. Creates an empty text buffer ready to begin accumulating text tokens. + + Args: + **kwargs: Additional arguments passed to BaseTextAggregator (e.g. aggregation_type). """ + super().__init__(**kwargs) self._text = "" self._needs_lookahead: bool = False @@ -43,19 +47,25 @@ class SimpleTextAggregator(BaseTextAggregator): return Aggregation(text=self._text.strip(" "), type=AggregationType.SENTENCE) async def aggregate(self, text: str) -> AsyncIterator[Aggregation]: - """Aggregate text and yield completed sentences. + """Aggregate text and yield completed aggregations. - Processes the input text character-by-character. When sentence-ending - punctuation is detected, it waits for non-whitespace lookahead before - calling NLTK. This prevents false positives like "$29." being detected - as a sentence when it's actually "$29.95". + In SENTENCE mode, processes the input text character-by-character. When + sentence-ending punctuation is detected, it waits for non-whitespace + lookahead before calling NLTK. + + In TOKEN mode, yields the text immediately without buffering. Args: text: Text to aggregate. Yields: - Complete sentences as Aggregation objects. + Aggregation objects (sentences in SENTENCE mode, tokens in TOKEN mode). """ + if self._aggregation_type == AggregationType.TOKEN: + if text: + yield Aggregation(text=text, type=AggregationType.TOKEN) + return + # Process text character by character for char in text: self._text += char @@ -114,11 +124,15 @@ class SimpleTextAggregator(BaseTextAggregator): """Flush any remaining text in the buffer. Returns any text remaining in the buffer. This is called at the end - of a stream to ensure all text is processed. + of a stream to ensure all text is processed. In TOKEN mode, returns + None since tokens are yielded immediately. Returns: - Any remaining text as a sentence, or None if buffer is empty. + Any remaining text as a sentence, or None if buffer is empty or in TOKEN mode. """ + if self._aggregation_type == AggregationType.TOKEN: + return None + if self._text: # Return whatever we have in the buffer result = self._text diff --git a/src/pipecat/utils/text/skip_tags_aggregator.py b/src/pipecat/utils/text/skip_tags_aggregator.py index 4232efd7d..1b6a7f156 100644 --- a/src/pipecat/utils/text/skip_tags_aggregator.py +++ b/src/pipecat/utils/text/skip_tags_aggregator.py @@ -14,7 +14,7 @@ as a unit regardless of internal punctuation. from typing import AsyncIterator, Optional, Sequence from pipecat.utils.string import StartEndTags, parse_start_end_tags -from pipecat.utils.text.base_text_aggregator import Aggregation +from pipecat.utils.text.base_text_aggregator import Aggregation, AggregationType from pipecat.utils.text.simple_text_aggregator import SimpleTextAggregator @@ -31,14 +31,15 @@ class SkipTagsAggregator(SimpleTextAggregator): identified and that content within tags is never split at sentence boundaries. """ - def __init__(self, tags: Sequence[StartEndTags]): + def __init__(self, tags: Sequence[StartEndTags], **kwargs): """Initialize the skip tags aggregator. Args: tags: Sequence of StartEndTags objects defining the tag pairs that should prevent sentence boundary detection. + **kwargs: Additional arguments passed to SimpleTextAggregator (e.g. aggregation_type). """ - super().__init__() + super().__init__(**kwargs) self._tags = tags self._current_tag: Optional[StartEndTags] = None self._current_tag_index: int = 0 @@ -50,13 +51,33 @@ class SkipTagsAggregator(SimpleTextAggregator): uses the parent's lookahead logic for sentence detection when not inside tags. + In TOKEN mode, text is passed through immediately unless we're inside + a tag, in which case we buffer until the closing tag is found. + Args: text: Text to aggregate. Yields: Aggregation objects containing text up to a sentence boundary, - marked as SENTENCE type. + marked as SENTENCE type (or TOKEN type in TOKEN mode). """ + if self._aggregation_type == AggregationType.TOKEN: + # In TOKEN mode, process chars for tag tracking but yield the + # full input as a single token when not inside a tag. + for char in text: + self._text += char + + # Update tag state + (self._current_tag, self._current_tag_index) = parse_start_end_tags( + self._text, self._tags, self._current_tag, self._current_tag_index + ) + + # After processing all chars: if not inside a tag, yield accumulated text + if not self._current_tag and self._text: + yield Aggregation(text=self._text, type=AggregationType.TOKEN) + self._text = "" + return + # Process text character by character for char in text: self._text += char diff --git a/src/pipecat/utils/tracing/service_attributes.py b/src/pipecat/utils/tracing/service_attributes.py index c8471a03b..97ac49d87 100644 --- a/src/pipecat/utils/tracing/service_attributes.py +++ b/src/pipecat/utils/tracing/service_attributes.py @@ -17,6 +17,8 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional if TYPE_CHECKING: from opentelemetry.trace import Span + from pipecat.services.settings import ServiceSettings + from pipecat.utils.tracing.setup import is_tracing_available if is_tracing_available(): @@ -68,7 +70,7 @@ def add_tts_span_attributes( model: str, voice_id: str, text: Optional[str] = None, - settings: Optional[Dict[str, Any]] = None, + settings: Optional["ServiceSettings"] = None, character_count: Optional[int] = None, operation_name: str = "tts", ttfb: Optional[float] = None, @@ -107,7 +109,7 @@ def add_tts_span_attributes( # Add settings if provided if settings: - for key, value in settings.items(): + for key, value in settings.given_fields().items(): if isinstance(value, (str, int, float, bool)): span.set_attribute(f"settings.{key}", value) @@ -126,7 +128,7 @@ def add_stt_span_attributes( is_final: Optional[bool] = None, language: Optional[str] = None, user_id: Optional[str] = None, - settings: Optional[Dict[str, Any]] = None, + settings: Optional["ServiceSettings"] = None, vad_enabled: bool = False, ttfb: Optional[float] = None, **kwargs, @@ -171,7 +173,7 @@ def add_stt_span_attributes( # Add settings if provided if settings: - for key, value in settings.items(): + for key, value in settings.given_fields().items(): if isinstance(value, (str, int, float, bool)): span.set_attribute(f"settings.{key}", value) @@ -282,7 +284,7 @@ def add_gemini_live_span_attributes( voice_id: Optional[str] = None, language: Optional[str] = None, modalities: Optional[str] = None, - settings: Optional[Dict[str, Any]] = None, + settings: Optional["ServiceSettings"] = None, tools: Optional[List[Dict]] = None, tools_serialized: Optional[str] = None, transcript: Optional[str] = None, @@ -359,7 +361,7 @@ def add_gemini_live_span_attributes( # Add settings if provided if settings: - for key, value in settings.items(): + for key, value in settings.given_fields().items(): if isinstance(value, (str, int, float, bool)): span.set_attribute(f"settings.{key}", value) elif key == "vad" and value: diff --git a/src/pipecat/utils/tracing/service_decorators.py b/src/pipecat/utils/tracing/service_decorators.py index 968fe8e8a..304ecb5e8 100644 --- a/src/pipecat/utils/tracing/service_decorators.py +++ b/src/pipecat/utils/tracing/service_decorators.py @@ -42,6 +42,23 @@ T = TypeVar("T") R = TypeVar("R") +def _get_model_name(service) -> str: + """Get the model name from a service instance. + + This is a bit of a mess — there were multiple places a model name could live. + Soon, self._settings should be the only source of truth about model name. + In fact...it might already be the case, but juuuuust to be safe, we'll + check all the places we used to store it. + """ + return ( + getattr(getattr(service, "_settings", None), "model", None) + or getattr(service, "_full_model_name", None) + or getattr(service, "model_name", None) + or getattr(service, "_model_name", None) + or "unknown" + ) + + def _noop_decorator(func): """No-op fallback decorator when tracing is unavailable. @@ -202,13 +219,14 @@ def traced_tts(func: Optional[Callable] = None, *, name: Optional[str] = None) - tracer = trace.get_tracer("pipecat") with tracer.start_as_current_span(span_name, context=parent_context) as span: try: + settings = getattr(self, "_settings", None) add_tts_span_attributes( span=span, service_name=service_class_name, - model=getattr(self, "model_name") or "unknown", - voice_id=getattr(self, "_voice_id", "unknown"), + model=_get_model_name(self), + voice_id=getattr(settings, "voice", "unknown"), text=text, - settings=getattr(self, "_settings", {}), + settings=settings, character_count=len(text), operation_name="tts", cartesia_version=getattr(self, "_cartesia_version", None), @@ -320,12 +338,12 @@ def traced_stt(func: Optional[Callable] = None, *, name: Optional[str] = None) - ) # Use settings from the service if available - settings = getattr(self, "_settings", {}) + settings = getattr(self, "_settings", None) add_stt_span_attributes( span=current_span, service_name=service_class_name, - model=getattr(self, "model_name") or settings.get("model", "unknown"), + model=_get_model_name(self), transcript=transcript, is_final=is_final, language=str(language) if language else None, @@ -492,24 +510,16 @@ def traced_llm(func: Optional[Callable] = None, *, name: Optional[str] = None) - # Get settings from the service params = {} if hasattr(self, "_settings"): - for key, value in self._settings.items(): - if key == "extra": - continue - # Add value directly if it's a basic type + for key, value in self._settings.given_fields().items(): if isinstance(value, (int, float, bool, str)): params[key] = value - elif value is None or ( - hasattr(value, "__name__") and value.__name__ == "NOT_GIVEN" - ): + elif value is None: params[key] = "NOT_GIVEN" # Add all available attributes to the span attribute_kwargs = { "service_name": service_class_name, - "model": getattr(self, "_full_model_name", None) - or getattr(self, "model_name", None) - or params.get("model") - or "unknown", + "model": _get_model_name(self), "stream": True, # Most LLM services use streaming "parameters": params, } @@ -609,19 +619,15 @@ def traced_gemini_live(operation: str) -> Callable: ) as current_span: try: # Base service attributes - model_name = ( - getattr(self, "model_name", None) - or getattr(self, "_model_name", None) - or "unknown" - ) + model_name = _get_model_name(self) voice_id = getattr(self, "_voice_id", None) language_code = getattr(self, "_language_code", None) - settings = getattr(self, "_settings", {}) + settings = getattr(self, "_settings", None) # Get modalities if available modalities = None - if hasattr(self, "_settings") and "modalities" in self._settings: - modality_obj = self._settings["modalities"] + if settings and hasattr(settings, "modalities"): + modality_obj = settings.modalities if hasattr(modality_obj, "value"): modalities = modality_obj.value else: @@ -917,11 +923,7 @@ def traced_openai_realtime(operation: str) -> Callable: ) as current_span: try: # Base service attributes - model_name = ( - getattr(self, "model_name", None) - or getattr(self, "_model_name", None) - or "unknown" - ) + model_name = _get_model_name(self) # Operation-specific attribute collection operation_attrs = {} diff --git a/tests/test_context_aggregators.py b/tests/test_context_aggregators.py index 24dae0b4c..37d36bfef 100644 --- a/tests/test_context_aggregators.py +++ b/tests/test_context_aggregators.py @@ -21,7 +21,6 @@ from pipecat.frames.frames import ( FunctionCallResultProperties, InterimTranscriptionFrame, InterruptionFrame, - InterruptionTaskFrame, LLMContextAssistantTimestampFrame, LLMContextFrame, LLMFullResponseEndFrame, @@ -567,7 +566,7 @@ class BaseTestUserContextAggregator: SleepFrame(), UserStoppedSpeakingFrame(), ] - expected_up_frames = [InterruptionTaskFrame] + expected_up_frames = [InterruptionFrame] expected_down_frames = [ BotStartedSpeakingFrame, UserStartedSpeakingFrame, diff --git a/tests/test_context_aggregators_universal.py b/tests/test_context_aggregators_universal.py index 1bba463b0..b22abf6c6 100644 --- a/tests/test_context_aggregators_universal.py +++ b/tests/test_context_aggregators_universal.py @@ -12,6 +12,7 @@ from pipecat.frames.frames import ( FunctionCallFromLLM, FunctionCallResultFrame, FunctionCallsStartedFrame, + InterimTranscriptionFrame, InterruptionFrame, LLMContextAssistantTimestampFrame, LLMContextFrame, @@ -26,6 +27,7 @@ from pipecat.frames.frames import ( LLMThoughtTextFrame, StartFrame, TranscriptionFrame, + TranslationFrame, UserMuteStartedFrame, UserStartedSpeakingFrame, UserStoppedSpeakingFrame, @@ -48,6 +50,7 @@ from pipecat.turns.user_mute import ( MuteUntilFirstBotCompleteUserMuteStrategy, ) from pipecat.turns.user_stop import SpeechTimeoutUserTurnStopStrategy +from pipecat.turns.user_turn_completion_mixin import UserTurnCompletionConfig from pipecat.turns.user_turn_strategies import UserTurnStrategies USER_TURN_STOP_TIMEOUT = 0.2 @@ -153,6 +156,28 @@ class TestLLMUserAggregator(unittest.IsolatedAsyncioTestCase): ) assert context.messages[0]["content"] == "Hi there!" + async def test_llm_messages_update_reinjects_turn_completion_instructions(self): + context = LLMContext() + params = LLMUserAggregatorParams(filter_incomplete_user_turns=True) + pipeline = Pipeline([LLMUserAggregator(context, params=params)]) + + new_messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello!"}, + ] + frames_to_send = [LLMMessagesUpdateFrame(messages=new_messages)] + await run_test( + pipeline, + frames_to_send=frames_to_send, + ) + config = UserTurnCompletionConfig() + # The context should contain the new messages plus the re-injected instructions + assert len(context.messages) == 3 + assert context.messages[0]["content"] == "You are a helpful assistant." + assert context.messages[1]["content"] == "Hello!" + assert context.messages[2]["role"] == "system" + assert context.messages[2]["content"] == config.completion_instructions + async def test_default_user_turn_strategies(self): context = LLMContext() user_aggregator = LLMUserAggregator( @@ -428,6 +453,44 @@ class TestLLMUserAggregator(unittest.IsolatedAsyncioTestCase): ignore_start=False, ) + async def test_interim_transcription_not_pushed_downstream(self): + """InterimTranscriptionFrame should be consumed and not pushed downstream.""" + context = LLMContext() + pipeline = Pipeline([LLMUserAggregator(context)]) + + frames_to_send = [ + InterimTranscriptionFrame(text="Hel", user_id="", timestamp="now"), + InterimTranscriptionFrame(text="Hello", user_id="", timestamp="now"), + ] + # The interim transcription triggers a user turn start via the default + # TranscriptionUserTurnStartStrategy, so we expect turn-related frames + # but NOT the InterimTranscriptionFrame itself. + expected_down_frames = [ + UserStartedSpeakingFrame, + InterruptionFrame, + ] + (down_frames, _) = await run_test( + pipeline, + frames_to_send=frames_to_send, + expected_down_frames=expected_down_frames, + ) + self.assertFalse(any(isinstance(f, InterimTranscriptionFrame) for f in down_frames)) + + async def test_translation_not_pushed_downstream(self): + """TranslationFrame should be consumed and not pushed downstream.""" + context = LLMContext() + pipeline = Pipeline([LLMUserAggregator(context)]) + + frames_to_send = [ + TranslationFrame(text="Hola!", user_id="", timestamp="now", language="es"), + ] + # No downstream frames expected — translations are consumed. + await run_test( + pipeline, + frames_to_send=frames_to_send, + expected_down_frames=[], + ) + class TestLLMAssistantAggregator(unittest.IsolatedAsyncioTestCase): async def test_empty(self): diff --git a/tests/test_context_summarization.py b/tests/test_context_summarization.py index 87aaa74d3..10223a606 100644 --- a/tests/test_context_summarization.py +++ b/tests/test_context_summarization.py @@ -6,15 +6,18 @@ """Tests for context summarization feature.""" +import asyncio import unittest -from unittest.mock import AsyncMock, MagicMock, patch +from unittest.mock import AsyncMock -from pipecat.frames.frames import LLMContextSummaryRequestFrame -from pipecat.processors.aggregators.llm_context import LLMContext +from pipecat.frames.frames import LLMContextSummaryRequestFrame, LLMContextSummaryResultFrame +from pipecat.processors.aggregators.llm_context import LLMContext, LLMSpecificMessage from pipecat.services.llm_service import LLMService from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, LLMContextSummarizationConfig, LLMContextSummarizationUtil, + LLMContextSummaryConfig, ) @@ -166,43 +169,109 @@ class TestContextSummarizationMixin(unittest.TestCase): self.assertIn("USER: First part Second part", transcript) -class TestLLMContextSummarizationConfig(unittest.TestCase): - """Tests for LLMContextSummarizationConfig.""" +class TestLLMContextSummaryConfig(unittest.TestCase): + """Tests for LLMContextSummaryConfig.""" def test_default_config(self): """Test default configuration values.""" - config = LLMContextSummarizationConfig() + config = LLMContextSummaryConfig() - self.assertEqual(config.max_context_tokens, 8000) - self.assertEqual(config.max_unsummarized_messages, 20) + self.assertEqual(config.target_context_tokens, 6000) self.assertEqual(config.min_messages_after_summary, 4) self.assertIsNone(config.summarization_prompt) def test_custom_config(self): """Test custom configuration.""" - config = LLMContextSummarizationConfig( - max_context_tokens=2500, + config = LLMContextSummaryConfig( target_context_tokens=2000, - max_unsummarized_messages=15, min_messages_after_summary=4, summarization_prompt="Custom prompt", ) - self.assertEqual(config.max_context_tokens, 2500) self.assertEqual(config.target_context_tokens, 2000) - self.assertEqual(config.max_unsummarized_messages, 15) self.assertEqual(config.min_messages_after_summary, 4) self.assertEqual(config.summary_prompt, "Custom prompt") def test_summary_prompt_property(self): """Test summary_prompt property uses default when None.""" - config = LLMContextSummarizationConfig() + config = LLMContextSummaryConfig() self.assertIn("summarizing a conversation", config.summary_prompt.lower()) - config_with_custom = LLMContextSummarizationConfig(summarization_prompt="Custom") + config_with_custom = LLMContextSummaryConfig(summarization_prompt="Custom") self.assertEqual(config_with_custom.summary_prompt, "Custom") +class TestLLMAutoContextSummarizationConfig(unittest.TestCase): + """Tests for LLMAutoContextSummarizationConfig.""" + + def test_default_config(self): + """Test default configuration values.""" + config = LLMAutoContextSummarizationConfig() + + self.assertEqual(config.max_context_tokens, 8000) + self.assertEqual(config.max_unsummarized_messages, 20) + self.assertEqual(config.summary_config.target_context_tokens, 6000) + self.assertEqual(config.summary_config.min_messages_after_summary, 4) + + def test_custom_config(self): + """Test custom configuration.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=2500, + max_unsummarized_messages=15, + summary_config=LLMContextSummaryConfig( + target_context_tokens=2000, + min_messages_after_summary=4, + summarization_prompt="Custom prompt", + ), + ) + + self.assertEqual(config.max_context_tokens, 2500) + self.assertEqual(config.max_unsummarized_messages, 15) + self.assertEqual(config.summary_config.target_context_tokens, 2000) + self.assertEqual(config.summary_config.min_messages_after_summary, 4) + self.assertEqual(config.summary_config.summary_prompt, "Custom prompt") + + def test_target_tokens_auto_adjusted(self): + """Test that target_context_tokens is auto-adjusted when it exceeds max.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=1000, + summary_config=LLMContextSummaryConfig(target_context_tokens=9000), + ) + self.assertLessEqual(config.summary_config.target_context_tokens, config.max_context_tokens) + + +class TestLLMContextSummarizationConfigDeprecated(unittest.TestCase): + """Tests for deprecated LLMContextSummarizationConfig.""" + + def test_emits_deprecation_warning(self): + """Test that instantiating the deprecated config emits a DeprecationWarning.""" + with self.assertWarns(DeprecationWarning): + LLMContextSummarizationConfig() + + def test_to_auto_config(self): + """Test conversion to the new LLMAutoContextSummarizationConfig.""" + import warnings + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + old_config = LLMContextSummarizationConfig( + max_context_tokens=2500, + target_context_tokens=2000, + max_unsummarized_messages=15, + min_messages_after_summary=4, + summarization_prompt="Custom", + ) + + new_config = old_config.to_auto_config() + + self.assertIsInstance(new_config, LLMAutoContextSummarizationConfig) + self.assertEqual(new_config.max_context_tokens, 2500) + self.assertEqual(new_config.max_unsummarized_messages, 15) + self.assertEqual(new_config.summary_config.target_context_tokens, 2000) + self.assertEqual(new_config.summary_config.min_messages_after_summary, 4) + self.assertEqual(new_config.summary_config.summarization_prompt, "Custom") + + class TestFunctionCallHandling(unittest.TestCase): """Tests for function call handling in summarization.""" @@ -601,6 +670,324 @@ class TestSummaryGenerationExceptions(unittest.IsolatedAsyncioTestCase): self.assertGreater(last_index, -1) self.assertEqual(last_index, 1) # Should be the index of the last summarized message + async def test_generate_summary_task_timeout(self): + """Test that _generate_summary_task handles timeout correctly.""" + llm_service = LLMService() + + # Mock _generate_summary to hang + async def slow_summary(frame): + await asyncio.sleep(10) + return ("summary", 1) + + llm_service._generate_summary = slow_summary + + broadcast_calls = [] + + async def mock_broadcast(frame_class, **kwargs): + broadcast_calls.append((frame_class, kwargs)) + + llm_service.broadcast_frame = mock_broadcast + llm_service.push_error = AsyncMock() + + context = LLMContext() + context.add_message({"role": "user", "content": "Message 1"}) + context.add_message({"role": "assistant", "content": "Response 1"}) + context.add_message({"role": "user", "content": "Message 2"}) + + frame = LLMContextSummaryRequestFrame( + request_id="timeout_test", + context=context, + min_messages_to_keep=1, + target_context_tokens=1000, + summarization_prompt="Summarize this", + summarization_timeout=0.1, # Very short timeout + ) + + await llm_service._generate_summary_task(frame) + + # Should have broadcast an error result + self.assertEqual(len(broadcast_calls), 1) + _, kwargs = broadcast_calls[0] + self.assertEqual(kwargs["request_id"], "timeout_test") + self.assertEqual(kwargs["summary"], "") + self.assertEqual(kwargs["last_summarized_index"], -1) + # error is None for timeout path (push_error is called instead) + self.assertIsNone(kwargs["error"]) + + # push_error should have been called with timeout message + llm_service.push_error.assert_called_once() + call_args = llm_service.push_error.call_args + error_msg = call_args.kwargs.get("error_msg") or call_args.args[0] + self.assertIn("timed out", error_msg) + + +class TestDedicatedLLMSummarization(unittest.IsolatedAsyncioTestCase): + """Tests for dedicated LLM summarization in LLMContextSummarizer.""" + + async def asyncSetUp(self): + from pipecat.utils.asyncio.task_manager import TaskManager, TaskManagerParams + + self.task_manager = TaskManager() + self.task_manager.setup(TaskManagerParams(loop=asyncio.get_running_loop())) + + def _create_context_and_config(self, dedicated_llm): + """Create a context with enough messages and a config with a dedicated LLM.""" + context = LLMContext() + for i in range(10): + context.add_message( + {"role": "user", "content": f"Test message {i} that adds tokens to context."} + ) + + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, # Very low to trigger easily + summary_config=LLMContextSummaryConfig( + llm=dedicated_llm, + summarization_timeout=5.0, + ), + ) + return context, config + + async def test_dedicated_llm_success(self): + """Test that dedicated LLM generates summary and applies result.""" + from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer + + dedicated_llm = LLMService() + dedicated_llm._generate_summary = AsyncMock(return_value=("Dedicated summary", 5)) + + context, config = self._create_context_and_config(dedicated_llm) + original_message_count = len(context.messages) + summarizer = LLMContextSummarizer(context=context, config=config) + await summarizer.setup(self.task_manager) + + # Track whether on_request_summarization event fires (it should NOT) + event_fired = False + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal event_fired + event_fired = True + + # Trigger summarization via LLM response start + from pipecat.frames.frames import LLMFullResponseStartFrame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + # Wait for the background task to complete + await asyncio.sleep(0.1) + + # The event should NOT have fired (dedicated LLM handles it internally) + self.assertFalse(event_fired) + + # Verify the dedicated LLM was called + dedicated_llm._generate_summary.assert_called_once() + + # Verify summary was applied to context (message count should decrease) + self.assertLess(len(context.messages), original_message_count) + + # Verify summary message is present + summary_messages = [ + msg for msg in context.messages if "Conversation summary:" in msg.get("content", "") + ] + self.assertEqual(len(summary_messages), 1) + self.assertIn("Dedicated summary", summary_messages[0]["content"]) + + await summarizer.cleanup() + + async def test_dedicated_llm_timeout(self): + """Test that dedicated LLM timeout produces error and clears state.""" + from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer + + dedicated_llm = LLMService() + + async def slow_summary(frame): + await asyncio.sleep(10) + return ("summary", 1) + + dedicated_llm._generate_summary = slow_summary + + context, config = self._create_context_and_config(dedicated_llm) + config.summary_config.summarization_timeout = 0.1 # Very short timeout + summarizer = LLMContextSummarizer(context=context, config=config) + await summarizer.setup(self.task_manager) + + original_message_count = len(context.messages) + + # Trigger summarization + from pipecat.frames.frames import LLMFullResponseStartFrame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + # Wait for the background task to complete (timeout + some buffer) + await asyncio.sleep(0.3) + + # Context should be unchanged (timeout = error = no summary applied) + self.assertEqual(len(context.messages), original_message_count) + + # Summarization state should be cleared so new requests can be made + self.assertFalse(summarizer._summarization_in_progress) + + await summarizer.cleanup() + + async def test_dedicated_llm_exception(self): + """Test that dedicated LLM exceptions produce error and clear state.""" + from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer + + dedicated_llm = LLMService() + dedicated_llm._generate_summary = AsyncMock( + side_effect=RuntimeError("LLM connection failed") + ) + + context, config = self._create_context_and_config(dedicated_llm) + summarizer = LLMContextSummarizer(context=context, config=config) + await summarizer.setup(self.task_manager) + + original_message_count = len(context.messages) + + # Trigger summarization + from pipecat.frames.frames import LLMFullResponseStartFrame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + # Wait for the background task to complete + await asyncio.sleep(0.1) + + # Context should be unchanged (exception = error = no summary applied) + self.assertEqual(len(context.messages), original_message_count) + + # Summarization state should be cleared + self.assertFalse(summarizer._summarization_in_progress) + + await summarizer.cleanup() + + async def test_dedicated_llm_does_not_emit_event(self): + """Test that summarizer does NOT emit on_request_summarization when dedicated LLM is set.""" + from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer + + dedicated_llm = LLMService() + dedicated_llm._generate_summary = AsyncMock(return_value=("Summary", 1)) + + context, config = self._create_context_and_config(dedicated_llm) + summarizer = LLMContextSummarizer(context=context, config=config) + await summarizer.setup(self.task_manager) + + event_fired = False + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal event_fired + event_fired = True + + from pipecat.frames.frames import LLMFullResponseStartFrame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + await asyncio.sleep(0.1) + + self.assertFalse(event_fired) + + await summarizer.cleanup() + + async def test_no_dedicated_llm_emits_event(self): + """Test that summarizer emits on_request_summarization when no dedicated LLM.""" + from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer + + context = LLMContext() + for i in range(10): + context.add_message( + {"role": "user", "content": f"Test message {i} that adds tokens to context."} + ) + + config = LLMAutoContextSummarizationConfig(max_context_tokens=50) + summarizer = LLMContextSummarizer(context=context, config=config) + await summarizer.setup(self.task_manager) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + from pipecat.frames.frames import LLMFullResponseStartFrame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + self.assertIsNotNone(request_frame) + self.assertIsInstance(request_frame, LLMContextSummaryRequestFrame) + + await summarizer.cleanup() + + +class TestLLMSpecificMessageHandling(unittest.TestCase): + """Tests that LLMSpecificMessage objects are correctly skipped in summarization.""" + + def test_estimate_context_tokens_skips_specific_messages(self): + """Test that estimate_context_tokens skips LLMSpecificMessage objects.""" + context = LLMContext() + context.add_message({"role": "user", "content": "Hello"}) + context.add_message(LLMSpecificMessage(llm="google", message={})) + context.add_message({"role": "assistant", "content": "Hi there"}) + + tokens_with_specific = LLMContextSummarizationUtil.estimate_context_tokens(context) + + context_without = LLMContext() + context_without.add_message({"role": "user", "content": "Hello"}) + context_without.add_message({"role": "assistant", "content": "Hi there"}) + tokens_without = LLMContextSummarizationUtil.estimate_context_tokens(context_without) + + self.assertEqual(tokens_with_specific, tokens_without) + + def test_get_messages_to_summarize_with_specific_messages(self): + """Test that get_messages_to_summarize handles LLMSpecificMessage objects.""" + context = LLMContext() + context.add_message({"role": "system", "content": "System prompt"}) + context.add_message(LLMSpecificMessage(llm="google", message={})) + context.add_message({"role": "user", "content": "Message 1"}) + context.add_message({"role": "assistant", "content": "Response 1"}) + context.add_message(LLMSpecificMessage(llm="google", message={})) + context.add_message({"role": "user", "content": "Message 2"}) + context.add_message({"role": "assistant", "content": "Response 2"}) + + result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 2) + + self.assertEqual(len(result.messages), 4) + self.assertEqual(result.last_summarized_index, 4) + + def test_format_messages_skips_specific_messages(self): + """Test that format_messages_for_summary skips LLMSpecificMessage objects.""" + messages = [ + {"role": "user", "content": "Hello"}, + LLMSpecificMessage(llm="google", message={}), + {"role": "assistant", "content": "Hi there"}, + ] + + transcript = LLMContextSummarizationUtil.format_messages_for_summary(messages) + + self.assertIn("USER: Hello", transcript) + self.assertIn("ASSISTANT: Hi there", transcript) + + def test_function_call_tracking_skips_specific_messages(self): + """Test that _get_function_calls_in_progress_index skips LLMSpecificMessage.""" + messages = [ + {"role": "user", "content": "What time is it?"}, + LLMSpecificMessage(llm="google", message={}), + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_123", + "type": "function", + "function": {"name": "get_time", "arguments": "{}"}, + } + ], + }, + LLMSpecificMessage(llm="google", message={}), + {"role": "tool", "tool_call_id": "call_123", "content": '{"time": "10:30 AM"}'}, + ] + + result = LLMContextSummarizationUtil._get_function_calls_in_progress_index(messages, 0) + self.assertEqual(result, -1) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_frame_processor.py b/tests/test_frame_processor.py index 138c8e6d8..a875741e3 100644 --- a/tests/test_frame_processor.py +++ b/tests/test_frame_processor.py @@ -9,8 +9,6 @@ import unittest from dataclasses import dataclass, field from typing import List -from loguru import logger - from pipecat.frames.frames import ( DataFrame, EndFrame, @@ -85,50 +83,38 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): assert before_push_called assert after_push_called - async def test_interruption_and_wait(self): - class DelayFrameProcessor(FrameProcessor): - """This processors just gives time to the event loop to change - between tasks. Otherwise things happen to fast.""" - - async def process_frame(self, frame: Frame, direction: FrameDirection): - await super().process_frame(frame, direction) - await asyncio.sleep(0.1) - await self.push_frame(frame, direction) + async def test_broadcast_interruption(self): + """Test that broadcast_interruption() pushes InterruptionFrame both + directions and allows subsequent code to run.""" class InterruptFrameProcessor(FrameProcessor): async def process_frame(self, frame: Frame, direction: FrameDirection): await super().process_frame(frame, direction) if isinstance(frame, TextFrame): - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() await self.push_frame(OutputTransportMessageUrgentFrame(message=frame.text)) else: await self.push_frame(frame, direction) - pipeline = Pipeline([DelayFrameProcessor(), InterruptFrameProcessor()]) + pipeline = Pipeline([InterruptFrameProcessor()]) frames_to_send = [ - # Just a random interruption to make sure we don't clear anything - # before the actual `InterruptionTaskFrame` interruption. - InterruptionFrame(), - # This will generate an `InterruptionTaskFrame` and will wait for an - # `InterruptionFrame`. TextFrame(text="Hello from Pipecat!"), - # Just give time for everything to complete. SleepFrame(sleep=0.5), - EndFrame(), ] expected_down_frames = [ - InterruptionFrame, InterruptionFrame, OutputTransportMessageUrgentFrame, - EndFrame, + ] + expected_up_frames = [ + InterruptionFrame, ] await run_test( pipeline, frames_to_send=frames_to_send, expected_down_frames=expected_down_frames, - send_end_frame=False, + expected_up_frames=expected_up_frames, ) async def test_interruptible_frames(self): @@ -454,33 +440,20 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): stop_frames = [f for f in received_frames if isinstance(f, StopFrame)] self.assertEqual(len(stop_frames), 1, "StopFrame should survive interruption") - async def test_interruption_frame_complete_sets_event(self): - """Test that InterruptionFrame.complete() sets the event.""" - event = asyncio.Event() - frame = InterruptionFrame(event=event) - self.assertFalse(event.is_set()) - frame.complete() - self.assertTrue(event.is_set()) - - async def test_interruption_frame_complete_without_event(self): - """Test that InterruptionFrame.complete() is safe without an event.""" - frame = InterruptionFrame() - frame.complete() # Should not raise - - async def test_interruption_event_set_at_pipeline_sink(self): - """Test that the event from push_interruption_task_frame_and_wait() - is set when the InterruptionFrame reaches the pipeline sink.""" - event_was_set = False + async def test_broadcast_interruption_allows_subsequent_code(self): + """Test that broadcast_interruption() returns immediately, allowing the + caller to run code afterwards (e.g. push an urgent frame).""" + code_after_ran = False class InterruptOnTextProcessor(FrameProcessor): async def process_frame(self, frame: Frame, direction: FrameDirection): - nonlocal event_was_set + nonlocal code_after_ran await super().process_frame(frame, direction) if isinstance(frame, TextFrame): - await self.push_interruption_task_frame_and_wait() + await self.broadcast_interruption() - event_was_set = True + code_after_ran = True await self.push_frame(OutputTransportMessageUrgentFrame(message="done")) else: await self.push_frame(frame, direction) @@ -499,63 +472,7 @@ class TestFrameProcessor(unittest.IsolatedAsyncioTestCase): frames_to_send=frames_to_send, expected_down_frames=expected_down_frames, ) - self.assertTrue(event_was_set, "Event should be set after InterruptionFrame completes") - - async def test_interruption_completion_timeout_warning(self): - """Test that a warning is logged when an InterruptionFrame is blocked - and never reaches the pipeline sink.""" - warnings = [] - handler_id = logger.add( - lambda msg: warnings.append(str(msg)), level="WARNING", format="{message}" - ) - - try: - - class BlockInterruptionProcessor(FrameProcessor): - """Blocks InterruptionFrames, completing them after a delay.""" - - async def process_frame(self, frame: Frame, direction: FrameDirection): - await super().process_frame(frame, direction) - if isinstance(frame, InterruptionFrame): - # Complete after the timeout so the warning fires - # but the test doesn't hang. - async def delayed_complete(): - await asyncio.sleep(1.0) - frame.complete() - - asyncio.create_task(delayed_complete()) - return - await self.push_frame(frame, direction) - - class InterruptOnTextProcessor(FrameProcessor): - async def process_frame(self, frame: Frame, direction: FrameDirection): - await super().process_frame(frame, direction) - if isinstance(frame, TextFrame): - await self.push_interruption_task_frame_and_wait(timeout=0.5) - await self.push_frame(OutputTransportMessageUrgentFrame(message="done")) - else: - await self.push_frame(frame, direction) - - pipeline = Pipeline([BlockInterruptionProcessor(), InterruptOnTextProcessor()]) - - frames_to_send = [ - TextFrame(text="trigger"), - ] - expected_down_frames = [ - OutputTransportMessageUrgentFrame, - ] - await run_test( - pipeline, - frames_to_send=frames_to_send, - expected_down_frames=expected_down_frames, - ) - finally: - logger.remove(handler_id) - - self.assertTrue( - any("InterruptionFrame has not completed" in w for w in warnings), - "Expected a timeout warning about InterruptionFrame not completing", - ) + self.assertTrue(code_after_ran, "Code after broadcast_interruption() should execute") if __name__ == "__main__": diff --git a/tests/test_llm_context_summarizer.py b/tests/test_llm_context_summarizer.py index 7555a8762..7e8b326f9 100644 --- a/tests/test_llm_context_summarizer.py +++ b/tests/test_llm_context_summarizer.py @@ -12,11 +12,18 @@ from pipecat.frames.frames import ( LLMContextSummaryRequestFrame, LLMContextSummaryResultFrame, LLMFullResponseStartFrame, + LLMSummarizeContextFrame, ) from pipecat.processors.aggregators.llm_context import LLMContext -from pipecat.processors.aggregators.llm_context_summarizer import LLMContextSummarizer +from pipecat.processors.aggregators.llm_context_summarizer import ( + LLMContextSummarizer, + SummaryAppliedEvent, +) from pipecat.utils.asyncio.task_manager import TaskManager, TaskManagerParams -from pipecat.utils.context.llm_context_summarization import LLMContextSummarizationConfig +from pipecat.utils.context.llm_context_summarization import ( + LLMAutoContextSummarizationConfig, + LLMContextSummaryConfig, +) class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): @@ -32,7 +39,7 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summarization_triggered_by_token_limit(self): """Test that summarization is triggered when token limit is reached.""" - config = LLMContextSummarizationConfig( + config = LLMAutoContextSummarizationConfig( max_context_tokens=100, # Very low to trigger easily max_unsummarized_messages=100, # High so it doesn't trigger by message count ) @@ -68,7 +75,7 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summarization_triggered_by_message_count(self): """Test that summarization is triggered when message count threshold is reached.""" - config = LLMContextSummarizationConfig( + config = LLMAutoContextSummarizationConfig( max_context_tokens=100000, # Very high so it doesn't trigger by tokens max_unsummarized_messages=5, # Low to trigger easily ) @@ -98,7 +105,7 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summarization_not_triggered_below_thresholds(self): """Test that summarization is not triggered when below thresholds.""" - config = LLMContextSummarizationConfig( + config = LLMAutoContextSummarizationConfig( max_context_tokens=10000, max_unsummarized_messages=20, ) @@ -127,7 +134,7 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summarization_in_progress_prevents_duplicate(self): """Test that a summarization in progress prevents triggering another.""" - config = LLMContextSummarizationConfig( + config = LLMAutoContextSummarizationConfig( max_context_tokens=50, # Very low max_unsummarized_messages=100, ) @@ -158,7 +165,10 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_summary_result_handling(self): """Test that summary results are processed and applied correctly.""" - config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) summarizer = LLMContextSummarizer(context=self.context, config=config) await summarizer.setup(self.task_manager) @@ -205,7 +215,7 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_interruption_cancels_summarization(self): """Test that an interruption cancels pending summarization.""" - config = LLMContextSummarizationConfig(max_context_tokens=50) + config = LLMAutoContextSummarizationConfig(max_context_tokens=50) summarizer = LLMContextSummarizer(context=self.context, config=config) await summarizer.setup(self.task_manager) @@ -235,7 +245,10 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): async def test_stale_summary_result_ignored(self): """Test that stale summary results are ignored.""" - config = LLMContextSummarizationConfig(max_context_tokens=50, min_messages_after_summary=2) + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) summarizer = LLMContextSummarizer(context=self.context, config=config) await summarizer.setup(self.task_manager) @@ -291,6 +304,370 @@ class TestLLMContextSummarizer(unittest.IsolatedAsyncioTestCase): await summarizer.cleanup() + async def test_manual_summarization_via_frame(self): + """Test that LLMSummarizeContextFrame triggers summarization on demand.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=100000, # High — auto trigger would never fire + max_unsummarized_messages=100, + ) + + summarizer = LLMContextSummarizer( + context=self.context, + config=config, + auto_trigger=False, # Disable auto; only manual requests should work + ) + await summarizer.setup(self.task_manager) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + # Add messages + for i in range(5): + self.context.add_message({"role": "user", "content": f"Message {i}"}) + + # Auto-trigger should NOT fire even on LLMFullResponseStartFrame + await summarizer.process_frame(LLMFullResponseStartFrame()) + self.assertIsNone(request_frame) + + # Manual trigger via LLMSummarizeContextFrame should fire + await summarizer.process_frame(LLMSummarizeContextFrame()) + self.assertIsNotNone(request_frame) + self.assertIsInstance(request_frame, LLMContextSummaryRequestFrame) + + # The request must have a valid request_id and carry the current context + self.assertTrue(request_frame.request_id) + self.assertEqual(request_frame.context, self.context) + + await summarizer.cleanup() + + async def test_manual_summarization_with_config_override(self): + """Test that LLMSummarizeContextFrame can override default summary config.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=100000, + summary_config=LLMContextSummaryConfig( + target_context_tokens=6000, + min_messages_after_summary=4, + ), + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + for i in range(5): + self.context.add_message({"role": "user", "content": f"Message {i}"}) + + # Push a manual frame with custom config overrides + custom_config = LLMContextSummaryConfig( + target_context_tokens=500, + min_messages_after_summary=1, + ) + await summarizer.process_frame(LLMSummarizeContextFrame(config=custom_config)) + + self.assertIsNotNone(request_frame) + # The request should use the overridden values + self.assertEqual(request_frame.target_context_tokens, 500) + self.assertEqual(request_frame.min_messages_to_keep, 1) + + await summarizer.cleanup() + + async def test_manual_summarization_blocked_when_in_progress(self): + """Test that a second LLMSummarizeContextFrame is ignored while one is in progress.""" + config = LLMAutoContextSummarizationConfig(max_context_tokens=100000) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + request_count = 0 + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_count + request_count += 1 + + for i in range(5): + self.context.add_message({"role": "user", "content": f"Message {i}"}) + + # First manual request + await summarizer.process_frame(LLMSummarizeContextFrame()) + self.assertEqual(request_count, 1) + + # Second manual request while first is in progress — should be ignored + await summarizer.process_frame(LLMSummarizeContextFrame()) + self.assertEqual(request_count, 1) + + await summarizer.cleanup() + + async def test_summary_message_role_is_user(self): + """Test that the summary message uses the user role.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + # Add messages and trigger summarization + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message."}) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + self.assertIsNotNone(request_frame) + + # Simulate receiving a summary result + summary_result = LLMContextSummaryResultFrame( + request_id=request_frame.request_id, + summary="This is a test summary.", + last_summarized_index=5, + ) + await summarizer.process_frame(summary_result) + + # Find the summary message and verify its role is "user" + summary_msg = next( + (msg for msg in self.context.messages if "summary" in msg.get("content", "").lower()), + None, + ) + self.assertIsNotNone(summary_msg) + self.assertEqual(summary_msg["role"], "user") + + await summarizer.cleanup() + + async def test_summary_message_default_template(self): + """Test that the default summary_message_template is used.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message."}) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + summary_result = LLMContextSummaryResultFrame( + request_id=request_frame.request_id, + summary="Key facts from conversation.", + last_summarized_index=5, + ) + await summarizer.process_frame(summary_result) + + # Default template wraps with "Conversation summary: {summary}" + summary_msg = next( + ( + msg + for msg in self.context.messages + if "Conversation summary:" in msg.get("content", "") + ), + None, + ) + self.assertIsNotNone(summary_msg) + self.assertEqual( + summary_msg["content"], "Conversation summary: Key facts from conversation." + ) + + await summarizer.cleanup() + + async def test_summary_message_custom_template(self): + """Test that a custom summary_message_template is applied.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig( + min_messages_after_summary=2, + summary_message_template="\n{summary}\n", + ), + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message."}) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + summary_result = LLMContextSummaryResultFrame( + request_id=request_frame.request_id, + summary="Key facts from conversation.", + last_summarized_index=5, + ) + await summarizer.process_frame(summary_result) + + # Custom template wraps with XML tags + summary_msg = next( + (msg for msg in self.context.messages if "" in msg.get("content", "")), + None, + ) + self.assertIsNotNone(summary_msg) + self.assertEqual( + summary_msg["content"], + "\nKey facts from conversation.\n", + ) + + await summarizer.cleanup() + + async def test_on_summary_applied_event(self): + """Test that on_summary_applied event fires with correct data.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + # Add messages (1 system + 10 user = 11 total) + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message."}) + + request_frame = None + applied_event = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event): + nonlocal applied_event + applied_event = event + + original_count = len(self.context.messages) # 11 + await summarizer.process_frame(LLMFullResponseStartFrame()) + + # Summarize up to index 7 (system=0, user1..user7), keep last 3 (user8, user9, user10) + summary_result = LLMContextSummaryResultFrame( + request_id=request_frame.request_id, + summary="Test summary.", + last_summarized_index=7, + ) + await summarizer.process_frame(summary_result) + + # Allow async event handler to complete + await asyncio.sleep(0.05) + + # Verify event was fired + self.assertIsNotNone(applied_event) + self.assertIsInstance(applied_event, SummaryAppliedEvent) + self.assertEqual(applied_event.original_message_count, original_count) + + # After summarization: system + summary + 3 recent = 5 + self.assertEqual(applied_event.new_message_count, 5) + + # Summarized messages: indices 1-7 = 7 messages (excluding system at index 0) + self.assertEqual(applied_event.summarized_message_count, 7) + + # Preserved: system (1) + recent messages after index 7 (3) = 4 + self.assertEqual(applied_event.preserved_message_count, 4) + + await summarizer.cleanup() + + async def test_on_summary_applied_not_fired_on_error(self): + """Test that on_summary_applied event is NOT fired when summarization fails.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(min_messages_after_summary=2), + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message."}) + + request_frame = None + applied_event = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + @summarizer.event_handler("on_summary_applied") + async def on_summary_applied(summarizer, event): + nonlocal applied_event + applied_event = event + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + # Send a result with an error + error_result = LLMContextSummaryResultFrame( + request_id=request_frame.request_id, + summary="", + last_summarized_index=-1, + error="Summarization timed out", + ) + await summarizer.process_frame(error_result) + + await asyncio.sleep(0.05) + + # Event should NOT have fired + self.assertIsNone(applied_event) + + await summarizer.cleanup() + + async def test_request_frame_includes_timeout(self): + """Test that the request frame includes the configured summarization_timeout.""" + config = LLMAutoContextSummarizationConfig( + max_context_tokens=50, + summary_config=LLMContextSummaryConfig(summarization_timeout=60.0), + ) + + summarizer = LLMContextSummarizer(context=self.context, config=config) + await summarizer.setup(self.task_manager) + + request_frame = None + + @summarizer.event_handler("on_request_summarization") + async def on_request_summarization(summarizer, frame): + nonlocal request_frame + request_frame = frame + + for i in range(10): + self.context.add_message({"role": "user", "content": "Test message to add tokens."}) + + await summarizer.process_frame(LLMFullResponseStartFrame()) + + self.assertIsNotNone(request_frame) + self.assertEqual(request_frame.summarization_timeout, 60.0) + + await summarizer.cleanup() + if __name__ == "__main__": unittest.main() diff --git a/tests/test_pattern_pair_aggregator.py b/tests/test_pattern_pair_aggregator.py index bcc8d18f7..6c9e23552 100644 --- a/tests/test_pattern_pair_aggregator.py +++ b/tests/test_pattern_pair_aggregator.py @@ -194,5 +194,66 @@ class TestPatternPairAggregator(unittest.IsolatedAsyncioTestCase): self.assertEqual(self.aggregator.text.text, "") +class TestPatternPairAggregatorTokenMode(unittest.IsolatedAsyncioTestCase): + def setUp(self): + from pipecat.utils.text.base_text_aggregator import AggregationType + + self.aggregator = PatternPairAggregator(aggregation_type=AggregationType.TOKEN) + self.handler = AsyncMock() + self.aggregator.add_pattern( + type="think", + start_pattern="", + end_pattern="", + action=MatchAction.REMOVE, + ) + self.aggregator.on_pattern_match("think", self.handler) + + async def test_token_no_patterns(self): + """Non-pattern text passes through as TOKEN, one per aggregate call.""" + results = [] + for token in ["Hello", " world", "."]: + async for r in self.aggregator.aggregate(token): + results.append(r) + + self.assertEqual(len(results), 3) + self.assertEqual(results[0].text, "Hello") + self.assertEqual(results[1].text, " world") + self.assertEqual(results[2].text, ".") + for r in results: + self.assertEqual(r.type, "token") + + async def test_token_pattern_detection(self): + """Pattern detection still works with word-by-word token delivery.""" + results = [] + for token in ["Hi ", "", "secret", "", " bye"]: + async for r in self.aggregator.aggregate(token): + results.append(r) + + # Handler called once when the pattern completes + self.handler.assert_called_once() + call_args = self.handler.call_args[0][0] + self.assertEqual(call_args.text, "secret") + + # "Hi " yields before pattern starts, pattern is removed, " bye" yields after + self.assertEqual(len(results), 2) + self.assertEqual(results[0].text, "Hi ") + self.assertEqual(results[0].type, "token") + self.assertEqual(results[1].text, " bye") + self.assertEqual(results[1].type, "token") + + async def test_token_incomplete_pattern_buffers(self): + """Incomplete pattern is buffered across calls, not leaked to output.""" + results = [] + for token in ["Hi ", "", "partial"]: + async for r in self.aggregator.aggregate(token): + results.append(r) + + # Only "Hi " should be yielded; "partial" stays buffered + self.assertEqual(len(results), 1) + self.assertEqual(results[0].text, "Hi ") + self.assertEqual(results[0].type, "token") + self.handler.assert_not_called() + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 71121a3fc..04601bf14 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -292,6 +292,63 @@ class TestPipelineTask(unittest.IsolatedAsyncioTestCase): assert upstream_received assert downstream_received + async def test_task_queue_frame_upstream(self): + upstream_received = False + + pipeline = Pipeline([IdentityFilter()]) + task = PipelineTask(pipeline, cancel_on_idle_timeout=False) + task.set_reached_upstream_filter((TextFrame,)) + + @task.event_handler("on_frame_reached_upstream") + async def on_frame_reached_upstream(task, frame): + nonlocal upstream_received + if isinstance(frame, TextFrame) and frame.text == "Hello Upstream!": + upstream_received = True + + @task.event_handler("on_pipeline_started") + async def on_pipeline_started(task, frame): + await task.queue_frame(TextFrame(text="Hello Upstream!"), FrameDirection.UPSTREAM) + + try: + await asyncio.wait_for( + task.run(PipelineTaskParams(loop=asyncio.get_event_loop())), + timeout=1.0, + ) + except asyncio.TimeoutError: + pass + + assert upstream_received + + async def test_task_queue_frames_upstream(self): + upstream_texts = [] + + pipeline = Pipeline([IdentityFilter()]) + task = PipelineTask(pipeline, cancel_on_idle_timeout=False) + task.set_reached_upstream_filter((TextFrame,)) + + @task.event_handler("on_frame_reached_upstream") + async def on_frame_reached_upstream(task, frame): + if isinstance(frame, TextFrame): + upstream_texts.append(frame.text) + + @task.event_handler("on_pipeline_started") + async def on_pipeline_started(task, frame): + await task.queue_frames( + [TextFrame(text="First"), TextFrame(text="Second")], + FrameDirection.UPSTREAM, + ) + + try: + await asyncio.wait_for( + task.run(PipelineTaskParams(loop=asyncio.get_event_loop())), + timeout=1.0, + ) + except asyncio.TimeoutError: + pass + + assert "First" in upstream_texts + assert "Second" in upstream_texts + async def test_task_heartbeats(self): heartbeats_counter = 0 diff --git a/tests/test_piper_tts.py b/tests/test_piper_tts.py index 0ce14bd85..662b9a40c 100644 --- a/tests/test_piper_tts.py +++ b/tests/test_piper_tts.py @@ -125,7 +125,7 @@ async def test_run_piper_tts_error(aiohttp_client): ) frames_to_send = [ - TTSSpeakFrame(text="Error case."), + TTSSpeakFrame(text="Error case.", append_to_context=False), ] expected_down_frames = [AggregatedTextFrame, TTSStoppedFrame, TTSTextFrame] diff --git a/tests/test_settings.py b/tests/test_settings.py new file mode 100644 index 000000000..47cb6e4cf --- /dev/null +++ b/tests/test_settings.py @@ -0,0 +1,532 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +"""Tests for the typed settings infrastructure in pipecat.services.settings.""" + +import pytest +from deepgram import LiveOptions + +from pipecat.services.deepgram.stt import DeepgramSTTSettings +from pipecat.services.deepgram.stt_sagemaker import DeepgramSageMakerSTTSettings +from pipecat.services.settings import ( + NOT_GIVEN, + LLMSettings, + ServiceSettings, + STTSettings, + TTSSettings, + _NotGiven, + is_given, +) + +# --------------------------------------------------------------------------- +# NOT_GIVEN sentinel +# --------------------------------------------------------------------------- + + +class TestNotGiven: + def test_singleton(self): + """NOT_GIVEN is a singleton — every reference is the same object.""" + assert _NotGiven() is _NotGiven() + assert NOT_GIVEN is _NotGiven() + + def test_repr(self): + assert repr(NOT_GIVEN) == "NOT_GIVEN" + + def test_bool_is_false(self): + assert not NOT_GIVEN + assert bool(NOT_GIVEN) is False + + def test_is_given_with_not_given(self): + assert is_given(NOT_GIVEN) is False + + def test_is_given_with_none(self): + assert is_given(None) is True + + def test_is_given_with_values(self): + assert is_given(0) is True + assert is_given("") is True + assert is_given(False) is True + assert is_given(42) is True + assert is_given("hello") is True + + +# --------------------------------------------------------------------------- +# ServiceSettings base +# --------------------------------------------------------------------------- + + +class TestServiceSettings: + def test_default_fields_are_not_given(self): + s = ServiceSettings() + assert not is_given(s.model) + assert s.extra == {} + + def test_given_fields_empty_by_default(self): + s = ServiceSettings() + assert s.given_fields() == {} + + def test_given_fields_includes_set_values(self): + s = ServiceSettings(model="gpt-4o") + assert s.given_fields() == {"model": "gpt-4o"} + + def test_given_fields_includes_extra(self): + s = ServiceSettings(model="gpt-4o") + s.extra = {"custom_key": 42} + result = s.given_fields() + assert result == {"model": "gpt-4o", "custom_key": 42} + + def test_copy_is_deep(self): + s = ServiceSettings(model="gpt-4o") + s.extra = {"nested": {"a": 1}} + c = s.copy() + assert c.model == "gpt-4o" + assert c.extra == {"nested": {"a": 1}} + # Mutating the copy shouldn't affect the original + c.extra["nested"]["a"] = 999 + assert s.extra["nested"]["a"] == 1 + + +# --------------------------------------------------------------------------- +# apply_update +# --------------------------------------------------------------------------- + + +class TestApplyUpdate: + def test_apply_update_basic(self): + current = TTSSettings(voice="alice", language="en") + delta = TTSSettings(voice="bob") + changed = current.apply_update(delta) + assert changed.keys() == {"voice"} + assert changed["voice"] == "alice" # old value + assert current.voice == "bob" + assert current.language == "en" + + def test_apply_update_no_change(self): + current = TTSSettings(voice="alice", language="en") + delta = TTSSettings(voice="alice") + changed = current.apply_update(delta) + assert changed == {} + assert current.voice == "alice" + + def test_apply_update_not_given_skipped(self): + current = TTSSettings(voice="alice", language="en") + delta = TTSSettings() # all NOT_GIVEN + changed = current.apply_update(delta) + assert changed == {} + assert current.voice == "alice" + assert current.language == "en" + + def test_apply_update_multiple_fields(self): + current = LLMSettings(temperature=0.7, max_tokens=100) + delta = LLMSettings(temperature=0.9, max_tokens=200, top_p=0.95) + changed = current.apply_update(delta) + assert changed.keys() == {"temperature", "max_tokens", "top_p"} + assert changed["temperature"] == 0.7 + assert changed["max_tokens"] == 100 + assert current.temperature == 0.9 + assert current.max_tokens == 200 + assert current.top_p == 0.95 + + def test_apply_update_extra_merged(self): + current = TTSSettings(voice="alice") + current.extra = {"speed": 1.0, "stability": 0.5} + delta = TTSSettings() + delta.extra = {"speed": 1.2} + changed = current.apply_update(delta) + assert "speed" in changed + assert changed["speed"] == 1.0 # old value + assert current.extra == {"speed": 1.2, "stability": 0.5} + + def test_apply_update_extra_no_change(self): + current = TTSSettings(voice="alice") + current.extra = {"speed": 1.0} + delta = TTSSettings() + delta.extra = {"speed": 1.0} + changed = current.apply_update(delta) + assert changed == {} + + def test_apply_update_model_field(self): + current = ServiceSettings(model="old-model") + delta = ServiceSettings(model="new-model") + changed = current.apply_update(delta) + assert changed.keys() == {"model"} + assert changed["model"] == "old-model" + assert current.model == "new-model" + + def test_apply_update_none_is_a_valid_value(self): + """Setting a field to None should be treated as a change from NOT_GIVEN.""" + current = TTSSettings() + delta = TTSSettings(language=None) + changed = current.apply_update(delta) + assert "language" in changed + assert current.language is None + + def test_apply_update_none_to_value(self): + current = TTSSettings(language=None) + delta = TTSSettings(language="en") + changed = current.apply_update(delta) + assert "language" in changed + assert changed["language"] is None # old value was None + assert current.language == "en" + + +# --------------------------------------------------------------------------- +# from_mapping +# --------------------------------------------------------------------------- + + +class TestFromMapping: + def test_basic_mapping(self): + s = TTSSettings.from_mapping({"voice": "alice", "language": "en"}) + assert s.voice == "alice" + assert s.language == "en" + assert not is_given(s.model) + + def test_alias_resolution(self): + """'voice_id' is an alias for 'voice' in TTSSettings.""" + s = TTSSettings.from_mapping({"voice_id": "alice"}) + assert s.voice == "alice" + + def test_unknown_keys_go_to_extra(self): + s = TTSSettings.from_mapping({"voice": "alice", "speed": 1.2, "stability": 0.5}) + assert s.voice == "alice" + assert s.extra == {"speed": 1.2, "stability": 0.5} + + def test_model_field(self): + s = LLMSettings.from_mapping({"model": "gpt-4o", "temperature": 0.7}) + assert s.model == "gpt-4o" + assert s.temperature == 0.7 + + def test_empty_mapping(self): + s = ServiceSettings.from_mapping({}) + assert s.given_fields() == {} + + def test_all_unknown_keys(self): + s = ServiceSettings.from_mapping({"foo": 1, "bar": 2}) + assert not is_given(s.model) + assert s.extra == {"foo": 1, "bar": 2} + + def test_llm_settings_from_mapping(self): + s = LLMSettings.from_mapping({"temperature": 0.5, "max_tokens": 1000, "custom_param": True}) + assert s.temperature == 0.5 + assert s.max_tokens == 1000 + assert s.extra == {"custom_param": True} + + def test_stt_settings_from_mapping(self): + s = STTSettings.from_mapping({"language": "fr", "model": "whisper-large"}) + assert s.language == "fr" + assert s.model == "whisper-large" + + +# --------------------------------------------------------------------------- +# LLMSettings specifics +# --------------------------------------------------------------------------- + + +class TestLLMSettings: + def test_all_fields_not_given_by_default(self): + s = LLMSettings() + for name in ( + "model", + "temperature", + "max_tokens", + "top_p", + "top_k", + "frequency_penalty", + "presence_penalty", + "seed", + ): + assert not is_given(getattr(s, name)), f"{name} should be NOT_GIVEN" + + def test_given_fields(self): + s = LLMSettings(temperature=0.7, seed=42) + assert s.given_fields() == {"temperature": 0.7, "seed": 42} + + +# --------------------------------------------------------------------------- +# TTSSettings specifics +# --------------------------------------------------------------------------- + + +class TestTTSSettings: + def test_all_fields_not_given_by_default(self): + s = TTSSettings() + for name in ("model", "voice", "language"): + assert not is_given(getattr(s, name)), f"{name} should be NOT_GIVEN" + + def test_aliases_class_var(self): + assert TTSSettings._aliases == {"voice_id": "voice"} + + def test_given_fields(self): + s = TTSSettings(voice="alice") + assert s.given_fields() == {"voice": "alice"} + + +# --------------------------------------------------------------------------- +# STTSettings specifics +# --------------------------------------------------------------------------- + + +class TestSTTSettings: + def test_all_fields_not_given_by_default(self): + s = STTSettings() + for name in ("model", "language"): + assert not is_given(getattr(s, name)), f"{name} should be NOT_GIVEN" + + def test_given_fields(self): + s = STTSettings(language="en", model="whisper-large") + assert s.given_fields() == {"language": "en", "model": "whisper-large"} + + +# --------------------------------------------------------------------------- +# Integration: roundtrip from_mapping → apply_update +# --------------------------------------------------------------------------- + + +class TestRoundtrip: + def test_from_mapping_then_apply_update(self): + """Simulate the real flow: dict arrives via frame, gets converted, applied.""" + # Simulating current service state + current = TTSSettings(model="eleven_turbo_v2_5", voice="alice", language="en") + current.extra = {"stability": 0.5, "speed": 1.0} + + # Incoming dict-based update + raw = {"voice_id": "bob", "speed": 1.2} + delta = TTSSettings.from_mapping(raw) + + changed = current.apply_update(delta) + assert changed.keys() == {"voice", "speed"} + assert changed["voice"] == "alice" + assert changed["speed"] == 1.0 + assert current.voice == "bob" + assert current.language == "en" + assert current.extra["speed"] == 1.2 + assert current.extra["stability"] == 0.5 + + def test_from_mapping_preserves_model(self): + current = LLMSettings(model="gpt-4o", temperature=0.7) + delta = LLMSettings.from_mapping({"model": "gpt-4o-mini", "temperature": 0.9}) + changed = current.apply_update(delta) + assert changed.keys() == {"model", "temperature"} + assert changed["model"] == "gpt-4o" + assert current.model == "gpt-4o-mini" + assert current.temperature == 0.9 + + +# --------------------------------------------------------------------------- +# DeepgramSTTSettings: live_options delta merge +# --------------------------------------------------------------------------- + + +class TestDeepgramSTTSettingsApplyUpdate: + def _make_store(self, **lo_kwargs) -> DeepgramSTTSettings: + """Helper to build a store-mode DeepgramSTTSettings.""" + defaults = dict( + encoding="linear16", + channels=1, + interim_results=True, + smart_format=False, + punctuate=True, + profanity_filter=True, + vad_events=False, + ) + defaults.update(lo_kwargs) + s = DeepgramSTTSettings( + model="nova-3-general", + language="en", + live_options=LiveOptions(**defaults), + ) + return s + + def test_apply_update_merges_live_options_as_delta(self): + """Only the given fields in the delta LiveOptions are merged.""" + current = self._make_store() + assert current.live_options.punctuate is True + + delta = DeepgramSTTSettings(live_options=LiveOptions(punctuate=False)) + changed = current.apply_update(delta) + + assert current.live_options.punctuate is False + assert "punctuate" in changed + # Other fields are untouched + assert current.live_options.encoding == "linear16" + assert current.live_options.channels == 1 + + def test_apply_update_syncs_model_from_live_options_to_top_level(self): + """model inside live_options delta should sync to top-level model.""" + current = self._make_store() + assert current.model == "nova-3-general" + + delta = DeepgramSTTSettings(live_options=LiveOptions(model="nova-2")) + changed = current.apply_update(delta) + + assert current.model == "nova-2" + assert "model" in changed + + def test_apply_update_syncs_language_from_live_options_to_top_level(self): + """language inside live_options delta should sync to top-level language.""" + current = self._make_store() + assert current.language == "en" + + delta = DeepgramSTTSettings(live_options=LiveOptions(language="es")) + changed = current.apply_update(delta) + + assert current.language == "es" + assert "language" in changed + + def test_apply_update_syncs_top_level_model_into_live_options(self): + """Top-level model change should propagate into stored live_options.""" + current = self._make_store() + assert current.model == "nova-3-general" + + delta = DeepgramSTTSettings(model="nova-2") + changed = current.apply_update(delta) + + assert current.model == "nova-2" + assert current.live_options.model == "nova-2" + assert "model" in changed + + def test_apply_update_syncs_top_level_language_into_live_options(self): + """Top-level language change should propagate into stored live_options.""" + current = self._make_store() + + delta = DeepgramSTTSettings(language="fr") + changed = current.apply_update(delta) + + assert current.language == "fr" + assert current.live_options.language == "fr" + assert "language" in changed + + def test_apply_update_no_change(self): + """Delta with same values should report no changes.""" + current = self._make_store() + delta = DeepgramSTTSettings(live_options=LiveOptions(punctuate=True)) + changed = current.apply_update(delta) + assert changed == {} + + def test_apply_update_top_level_model_takes_precedence_over_live_options(self): + """When both top-level model and live_options.model are set, top-level wins.""" + current = self._make_store() + assert current.model == "nova-3-general" + + delta = DeepgramSTTSettings( + model="nova-2", + live_options=LiveOptions(model="nova-3"), + ) + changed = current.apply_update(delta) + + assert current.model == "nova-2" + assert current.live_options.model == "nova-2" + assert "model" in changed + + def test_apply_update_top_level_language_takes_precedence_over_live_options(self): + """When both top-level language and live_options.language are set, top-level wins.""" + current = self._make_store() + assert current.language == "en" + + delta = DeepgramSTTSettings( + language="fr", + live_options=LiveOptions(language="es"), + ) + changed = current.apply_update(delta) + + assert current.language == "fr" + assert current.live_options.language == "fr" + assert "language" in changed + + +class TestDeepgramSTTSettingsFromMapping: + def test_routes_live_options_kwargs(self): + """LiveOptions-valid keys should be collected into live_options.""" + delta = DeepgramSTTSettings.from_mapping({"punctuate": False, "filler_words": True}) + assert is_given(delta.live_options) + assert delta.live_options.punctuate is False + assert delta.live_options.filler_words is True + + def test_routes_model_and_language_to_top_level(self): + """model and language should be top-level fields, not in live_options.""" + delta = DeepgramSTTSettings.from_mapping({"model": "nova-2", "language": "es"}) + assert delta.model == "nova-2" + assert delta.language == "es" + assert not is_given(delta.live_options) + + def test_unknown_keys_go_to_extra(self): + """Keys that aren't LiveOptions params or STT fields go to extra.""" + delta = DeepgramSTTSettings.from_mapping({"unknown_param": 42}) + assert delta.extra == {"unknown_param": 42} + assert not is_given(delta.live_options) + + def test_mixed_keys(self): + """model + LiveOptions keys + unknown keys are routed correctly.""" + delta = DeepgramSTTSettings.from_mapping( + {"model": "nova-2", "punctuate": False, "unknown": "val"} + ) + assert delta.model == "nova-2" + assert delta.live_options.punctuate is False + assert delta.extra == {"unknown": "val"} + + def test_roundtrip_from_mapping_apply_update(self): + """Simulate dict-style update: from_mapping -> apply_update.""" + current = DeepgramSTTSettings( + model="nova-3-general", + language="en", + live_options=LiveOptions( + encoding="linear16", + channels=1, + interim_results=True, + punctuate=True, + profanity_filter=True, + vad_events=False, + ), + ) + + raw = {"punctuate": False, "filler_words": True} + delta = DeepgramSTTSettings.from_mapping(raw) + changed = current.apply_update(delta) + + assert current.live_options.punctuate is False + assert current.live_options.filler_words is True + # Unchanged fields stay put + assert current.live_options.encoding == "linear16" + assert current.model == "nova-3-general" + assert "punctuate" in changed + + def test_roundtrip_model_via_dict(self): + """Dict update with model should change top-level and NOT create live_options.""" + current = DeepgramSTTSettings( + model="nova-3-general", + language="en", + live_options=LiveOptions(encoding="linear16", channels=1), + ) + + raw = {"model": "nova-2"} + delta = DeepgramSTTSettings.from_mapping(raw) + changed = current.apply_update(delta) + + assert current.model == "nova-2" + assert current.live_options.model == "nova-2" + assert "model" in changed + + +# --------------------------------------------------------------------------- +# DeepgramSageMakerSTTSettings: smoke test that the shared base is inherited +# --------------------------------------------------------------------------- + + +class TestDeepgramSageMakerSTTSettings: + def test_inherits_live_options_behavior(self): + """Smoke test: SageMaker settings inherit the shared base correctly.""" + store = DeepgramSageMakerSTTSettings( + model="nova-3", + language="en", + live_options=LiveOptions(encoding="linear16", channels=1, punctuate=True), + ) + delta = DeepgramSageMakerSTTSettings(live_options=LiveOptions(punctuate=False)) + changed = store.apply_update(delta) + + assert store.live_options.punctuate is False + assert store.live_options.encoding == "linear16" + assert "punctuate" in changed diff --git a/tests/test_simple_text_aggregator.py b/tests/test_simple_text_aggregator.py index 4b3613e27..46c77df42 100644 --- a/tests/test_simple_text_aggregator.py +++ b/tests/test_simple_text_aggregator.py @@ -181,5 +181,39 @@ class TestSimpleTextAggregator(unittest.IsolatedAsyncioTestCase): assert result.text == "こんにちは。" +class TestSimpleTextAggregatorTokenMode(unittest.IsolatedAsyncioTestCase): + def setUp(self): + from pipecat.utils.text.base_text_aggregator import AggregationType + + self.aggregator = SimpleTextAggregator(aggregation_type=AggregationType.TOKEN) + + async def test_token_passthrough(self): + """TOKEN mode yields text immediately without buffering.""" + results = [agg async for agg in self.aggregator.aggregate("Hello")] + assert len(results) == 1 + assert results[0].text == "Hello" + assert results[0].type == "token" + + async def test_token_multiple_calls(self): + """Each aggregate call yields its text independently.""" + r1 = [agg async for agg in self.aggregator.aggregate("Hello ")] + r2 = [agg async for agg in self.aggregator.aggregate("world.")] + assert len(r1) == 1 + assert r1[0].text == "Hello " + assert len(r2) == 1 + assert r2[0].text == "world." + + async def test_token_empty_text(self): + """Empty text yields nothing.""" + results = [agg async for agg in self.aggregator.aggregate("")] + assert len(results) == 0 + + async def test_token_flush_returns_none(self): + """Flush returns None in TOKEN mode since nothing is buffered.""" + await self.aggregator.aggregate("Hello").__anext__() + result = await self.aggregator.flush() + assert result is None + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_skip_tags_aggregator.py b/tests/test_skip_tags_aggregator.py index c7fea22c3..882b26e82 100644 --- a/tests/test_skip_tags_aggregator.py +++ b/tests/test_skip_tags_aggregator.py @@ -64,5 +64,60 @@ class TestSkipTagsAggregator(unittest.IsolatedAsyncioTestCase): self.assertEqual(self.aggregator.text.type, "sentence") +class TestSkipTagsAggregatorTokenMode(unittest.IsolatedAsyncioTestCase): + def setUp(self): + from pipecat.utils.text.base_text_aggregator import AggregationType + + self.aggregator = SkipTagsAggregator( + [("", "")], aggregation_type=AggregationType.TOKEN + ) + + async def test_token_no_tags(self): + """No tags: text passes through immediately as TOKEN.""" + results = [agg async for agg in self.aggregator.aggregate("Hello!")] + self.assertEqual(len(results), 1) + self.assertEqual(results[0].text, "Hello!") + self.assertEqual(results[0].type, "token") + + async def test_token_inside_tag_buffers(self): + """Inside a tag, text is buffered until the closing tag is found.""" + results = [agg async for agg in self.aggregator.aggregate("foo@bar")] + # Still inside tag, nothing yielded + self.assertEqual(len(results), 0) + + # Close the tag + results = [agg async for agg in self.aggregator.aggregate("")] + self.assertEqual(len(results), 1) + self.assertEqual(results[0].text, "foo@bar") + self.assertEqual(results[0].type, "token") + + async def test_token_flush_unclosed_tag(self): + """Flush with unclosed tag returns remaining text.""" + async for _ in self.aggregator.aggregate("unclosed"): + pass + result = await self.aggregator.flush() + # TOKEN mode flush returns None (parent behavior) + self.assertIsNone(result) + + async def test_token_text_around_tags(self): + """Simulate word-by-word token delivery with tags.""" + results = [] + # Simulate LLM streaming tokens one at a time + for token in ["Hi ", "", "X", "", " bye"]: + async for agg in self.aggregator.aggregate(token): + results.append(agg) + + self.assertEqual(len(results), 3) + # Text before tag passes through immediately + self.assertEqual(results[0].text, "Hi ") + self.assertEqual(results[0].type, "token") + # Tagged content is buffered until the closing tag, then yielded whole + self.assertEqual(results[1].text, "X") + self.assertEqual(results[1].type, "token") + # Text after tag passes through immediately + self.assertEqual(results[2].text, " bye") + self.assertEqual(results[2].type, "token") + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_startup_timing_observer.py b/tests/test_startup_timing_observer.py new file mode 100644 index 000000000..6355c6081 --- /dev/null +++ b/tests/test_startup_timing_observer.py @@ -0,0 +1,337 @@ +import asyncio +import unittest + +from pipecat.frames.frames import ( + BotConnectedFrame, + ClientConnectedFrame, + Frame, + StartFrame, + TextFrame, +) +from pipecat.observers.startup_timing_observer import ( + StartupTimingObserver, + StartupTimingReport, + TransportTimingReport, +) +from pipecat.processors.frame_processor import FrameDirection, FrameProcessor +from pipecat.tests.utils import run_test + + +class SlowStartProcessor(FrameProcessor): + """A processor that sleeps during start to simulate slow initialization.""" + + def __init__(self, delay: float = 0.1, **kwargs): + super().__init__(**kwargs) + self._delay = delay + + async def process_frame(self, frame: Frame, direction: FrameDirection): + await super().process_frame(frame, direction) + if isinstance(frame, StartFrame): + await asyncio.sleep(self._delay) + await self.push_frame(frame, direction) + + +class FastProcessor(FrameProcessor): + """A processor with no start delay.""" + + async def process_frame(self, frame: Frame, direction: FrameDirection): + await super().process_frame(frame, direction) + await self.push_frame(frame, direction) + + +class TestStartupTimingObserver(unittest.IsolatedAsyncioTestCase): + """Tests for StartupTimingObserver.""" + + async def test_timing_reported(self): + """Test that startup timing is measured and reported.""" + observer = StartupTimingObserver() + processor = SlowStartProcessor(delay=0.1) + + reports = [] + + @observer.event_handler("on_startup_timing_report") + async def on_report(obs, report): + reports.append(report) + + frames_to_send = [TextFrame(text="hello")] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[TextFrame], + observers=[observer], + ) + + self.assertEqual(len(reports), 1) + report = reports[0] + self.assertGreater(report.total_duration_secs, 0) + self.assertGreater(len(report.processor_timings), 0) + + # Find our slow processor in the timings. + slow_timings = [ + t for t in report.processor_timings if "SlowStartProcessor" in t.processor_name + ] + self.assertEqual(len(slow_timings), 1) + self.assertGreaterEqual(slow_timings[0].duration_secs, 0.05) + + async def test_processor_types_filter(self): + """Test that processor_types filter limits which processors appear.""" + observer = StartupTimingObserver(processor_types=(SlowStartProcessor,)) + processor = SlowStartProcessor(delay=0.05) + + reports = [] + + @observer.event_handler("on_startup_timing_report") + async def on_report(obs, report): + reports.append(report) + + frames_to_send = [TextFrame(text="hello")] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[TextFrame], + observers=[observer], + ) + + self.assertEqual(len(reports), 1) + report = reports[0] + + # Only SlowStartProcessor should be in the timings. + for t in report.processor_timings: + self.assertIn("SlowStartProcessor", t.processor_name) + + async def test_report_emits_once(self): + """Test that the report is emitted only once even with multiple frames.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + reports = [] + + @observer.event_handler("on_startup_timing_report") + async def on_report(obs, report): + reports.append(report) + + frames_to_send = [ + TextFrame(text="first"), + TextFrame(text="second"), + TextFrame(text="third"), + ] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[TextFrame, TextFrame, TextFrame], + observers=[observer], + ) + + self.assertEqual(len(reports), 1) + + async def test_event_handler_receives_report(self): + """Test that the event handler receives a proper StartupTimingReport.""" + observer = StartupTimingObserver() + processor = SlowStartProcessor(delay=0.05) + + reports = [] + + @observer.event_handler("on_startup_timing_report") + async def on_report(obs, report): + reports.append(report) + + frames_to_send = [TextFrame(text="hello")] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[TextFrame], + observers=[observer], + ) + + self.assertEqual(len(reports), 1) + report = reports[0] + self.assertIsInstance(report, StartupTimingReport) + self.assertIsInstance(report.total_duration_secs, float) + self.assertGreater(report.start_time, 0) + for timing in report.processor_timings: + self.assertIsInstance(timing.processor_name, str) + self.assertIsInstance(timing.duration_secs, float) + self.assertGreaterEqual(timing.start_offset_secs, 0) + + async def test_excludes_internal_processors(self): + """Test that internal pipeline processors are excluded by default.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + reports = [] + + @observer.event_handler("on_startup_timing_report") + async def on_report(obs, report): + reports.append(report) + + frames_to_send = [TextFrame(text="hello")] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[TextFrame], + observers=[observer], + ) + + self.assertEqual(len(reports), 1) + report = reports[0] + + # No internal processors (PipelineSource, PipelineSink, Pipeline) in the report. + internal_names = ("Pipeline#", "PipelineTask#") + for t in report.processor_timings: + for prefix in internal_names: + self.assertNotIn( + prefix, + t.processor_name, + f"Internal processor {t.processor_name} should be excluded by default", + ) + + async def test_transport_timing_client_only(self): + """Test that ClientConnectedFrame emits on_transport_timing_report.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + transport_reports = [] + + @observer.event_handler("on_transport_timing_report") + async def on_transport(obs, report): + transport_reports.append(report) + + frames_to_send = [ClientConnectedFrame(), TextFrame(text="hello")] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[ClientConnectedFrame, TextFrame], + observers=[observer], + ) + + self.assertEqual(len(transport_reports), 1) + report = transport_reports[0] + self.assertIsInstance(report, TransportTimingReport) + self.assertGreater(report.start_time, 0) + self.assertGreater(report.client_connected_secs, 0) + self.assertIsNone(report.bot_connected_secs) + + async def test_transport_timing_only_first_client(self): + """Test that only the first ClientConnectedFrame triggers the event.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + transport_reports = [] + + @observer.event_handler("on_transport_timing_report") + async def on_transport(obs, report): + transport_reports.append(report) + + frames_to_send = [ + ClientConnectedFrame(), + ClientConnectedFrame(), + TextFrame(text="hello"), + ] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[ClientConnectedFrame, ClientConnectedFrame, TextFrame], + observers=[observer], + ) + + self.assertEqual(len(transport_reports), 1) + + async def test_transport_timing_without_start_frame(self): + """Test that ClientConnectedFrame before StartFrame does not crash.""" + observer = StartupTimingObserver() + + # Directly call on_push_frame with a ClientConnectedFrame before any + # StartFrame has been seen. This should be a no-op (no crash). + from pipecat.observers.base_observer import FramePushed + + processor = FastProcessor() + destination = FastProcessor() + data = FramePushed( + source=processor, + destination=destination, + frame=ClientConnectedFrame(), + direction=FrameDirection.DOWNSTREAM, + timestamp=1000, + ) + await observer.on_push_frame(data) + + # No event should have been emitted. + self.assertFalse(observer._transport_timing_reported) + + async def test_bot_and_client_connected(self): + """Test that BotConnectedFrame timing is included in the transport report.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + transport_reports = [] + + @observer.event_handler("on_transport_timing_report") + async def on_transport(obs, report): + transport_reports.append(report) + + frames_to_send = [ + BotConnectedFrame(), + ClientConnectedFrame(), + TextFrame(text="hello"), + ] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[BotConnectedFrame, ClientConnectedFrame, TextFrame], + observers=[observer], + ) + + self.assertEqual(len(transport_reports), 1) + report = transport_reports[0] + self.assertGreater(report.client_connected_secs, 0) + self.assertIsNotNone(report.bot_connected_secs) + self.assertGreater(report.bot_connected_secs, 0) + + # Client connected should be >= bot connected. + self.assertGreaterEqual(report.client_connected_secs, report.bot_connected_secs) + + async def test_bot_connected_only_first(self): + """Test that only the first BotConnectedFrame is recorded.""" + observer = StartupTimingObserver() + processor = FastProcessor() + + transport_reports = [] + + @observer.event_handler("on_transport_timing_report") + async def on_transport(obs, report): + transport_reports.append(report) + + frames_to_send = [ + BotConnectedFrame(), + BotConnectedFrame(), + ClientConnectedFrame(), + TextFrame(text="hello"), + ] + + await run_test( + processor, + frames_to_send=frames_to_send, + expected_down_frames=[ + BotConnectedFrame, + BotConnectedFrame, + ClientConnectedFrame, + TextFrame, + ], + observers=[observer], + ) + + # Only one transport report, with bot timing from first frame. + self.assertEqual(len(transport_reports), 1) + self.assertIsNotNone(transport_reports[0].bot_connected_secs) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_stt_mute_filter.py b/tests/test_stt_mute_filter.py index adf4611df..8f55bdecb 100644 --- a/tests/test_stt_mute_filter.py +++ b/tests/test_stt_mute_filter.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: BSD 2-Clause License # -import asyncio import unittest from pipecat.frames.frames import ( @@ -329,17 +328,13 @@ class TestSTTMuteFilter(unittest.IsolatedAsyncioTestCase): expected_down_frames=expected_returned_frames, ) - async def test_interruption_frame_completed_when_muted(self): - """Test that InterruptionFrame.complete() is called when the frame is - suppressed due to muting, so push_interruption_task_frame_and_wait() - doesn't hang.""" + async def test_interruption_frame_suppressed_when_muted(self): + """Test that InterruptionFrame is suppressed when the filter is muted.""" filter = STTMuteFilter(config=STTMuteConfig(strategies={STTMuteStrategy.ALWAYS})) - event = asyncio.Event() - frames_to_send = [ BotStartedSpeakingFrame(), - InterruptionFrame(event=event), + InterruptionFrame(), BotStoppedSpeakingFrame(), ] @@ -354,8 +349,6 @@ class TestSTTMuteFilter(unittest.IsolatedAsyncioTestCase): expected_down_frames=expected_returned_frames, ) - self.assertTrue(event.is_set(), "InterruptionFrame.complete() should be called when muted") - if __name__ == "__main__": unittest.main() diff --git a/uv.lock b/uv.lock index 527ffb217..49cfa089b 100644 --- a/uv.lock +++ b/uv.lock @@ -15,7 +15,8 @@ version = "1.10.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "packaging" }, { name = "psutil" }, { name = "pyyaml" }, @@ -41,7 +42,8 @@ name = "aic-sdk" version = "2.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/68/c6/1f0b3d3d226c6d19ec654fdaea7859ee9931e0286735385b1f9ea4bcfba1/aic_sdk-2.0.1.tar.gz", hash = "sha256:2480d8398a26639ed7fb5175c37da82cf5e6b1138a1a301938cd8491fe461c20", size = 73091, upload-time = "2026-01-23T23:38:15.77Z" } wheels = [ @@ -663,7 +665,7 @@ wheels = [ [[package]] name = "camb-sdk" -version = "1.5.8" +version = "1.5.9" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -672,9 +674,9 @@ dependencies = [ { name = "websocket-client" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6c/f9/4d3f62909f62f98556e09958f40934abf226289f55a43e149dfc426dc1cf/camb_sdk-1.5.8.tar.gz", hash = "sha256:4ace563accb6aab35d2a4dce53789c98d8809a8c48806a69d0873fc8b0361300", size = 83508, upload-time = "2026-01-27T14:55:49.16Z" } +sdist = { url = "https://files.pythonhosted.org/packages/86/29/17527519a72ed1592f28a4d380fd50ed72978ac38148efc0f9e796504496/camb_sdk-1.5.9.tar.gz", hash = "sha256:c8daaa8eea20c94523ffddd2aa630a902932f78ea8af37e140603e52ff0025ad", size = 83521, upload-time = "2026-02-27T22:57:18.283Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/2d/e7aeef5d5f48205020d153f4a6ffb39d8971fca78b2cc64fdf0a36ceeb12/camb_sdk-1.5.8-py3-none-any.whl", hash = "sha256:7e1a4764376791ab7cccc27014cdfb691b8c73eecdcaeb01457f506ffd3425be", size = 152371, upload-time = "2026-01-27T14:55:45.637Z" }, + { url = "https://files.pythonhosted.org/packages/fc/2a/b759c32c60c51f33ceb299b52f8f73348773cd75d3177a15eefc25b2dee9/camb_sdk-1.5.9-py3-none-any.whl", hash = "sha256:8c3fe9d05adee1d8de121eb6f1ee0a37e913f072d89c11ed3399746a9b69adbc", size = 152395, upload-time = "2026-02-27T22:57:14.137Z" }, ] [[package]] @@ -714,11 +716,11 @@ wheels = [ [[package]] name = "certifi" -version = "2026.1.4" +version = "2026.2.25" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" } +sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, + { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" }, ] [[package]] @@ -942,7 +944,7 @@ resolution-markers = [ "python_full_version < '3.11'", ] dependencies = [ - { name = "numpy", marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/66/54/eb9bfc647b19f2009dd5c7f5ec51c4e6ca831725f1aea7a993034f483147/contourpy-1.3.2.tar.gz", hash = "sha256:b6945942715a034c671b7fc54f9588126b0b8bf23db2696e3ca8328f3ff0ab54", size = 13466130, upload-time = "2025-04-15T17:47:53.79Z" } wheels = [ @@ -1015,7 +1017,7 @@ resolution-markers = [ "python_full_version == '3.11.*'", ] dependencies = [ - { name = "numpy", marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" } wheels = [ @@ -1099,7 +1101,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, { name = "cattrs" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "packaging" }, { name = "protobuf" }, { name = "pyaml" }, @@ -1273,7 +1276,8 @@ name = "ctranslate2" version = "4.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pyyaml" }, { name = "setuptools" }, ] @@ -1329,10 +1333,10 @@ wheels = [ [[package]] name = "cuda-pathfinder" -version = "1.3.4" +version = "1.4.0" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/5e/db279a3bfbd18d59d0598922a3b3c1454908d0969e8372260afec9736376/cuda_pathfinder-1.3.4-py3-none-any.whl", hash = "sha256:fb983f6e0d43af27ef486e14d5989b5f904ef45cedf40538bfdcbffa6bb01fb2", size = 30878, upload-time = "2026-02-11T18:50:31.008Z" }, + { url = "https://files.pythonhosted.org/packages/ff/60/d8f1dbfb7f06b94c662e98c95189e6f39b817da638bc8fcea0d003f89e5d/cuda_pathfinder-1.4.0-py3-none-any.whl", hash = "sha256:437079ca59e7b61ae439ecc501d69ed87b3accc34d58153ef1e54815e2c2e118", size = 38406, upload-time = "2026-02-25T22:13:00.807Z" }, ] [[package]] @@ -1573,7 +1577,7 @@ all = [ [[package]] name = "fastapi-cli" -version = "0.0.23" +version = "0.0.24" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "rich-toolkit" }, @@ -1581,9 +1585,9 @@ dependencies = [ { name = "typer" }, { name = "uvicorn", extra = ["standard"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/71/9f/cbd463e57de4e977b8ea0403f95347f9150441568b1d3fe3e4949ef80ef3/fastapi_cli-0.0.23.tar.gz", hash = "sha256:210ac280ea41e73aac5a57688781256beb23c2cba3a41266896fa43e6445c8e7", size = 19763, upload-time = "2026-02-16T19:45:53.358Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6e/58/74797ae9e4610cfa0c6b34c8309096d3b20bb29be3b8b5fbf1004d10fa5f/fastapi_cli-0.0.24.tar.gz", hash = "sha256:1afc9c9e21d7ebc8a3ca5e31790cd8d837742be7e4f8b9236e99cb3451f0de00", size = 19043, upload-time = "2026-02-24T10:45:10.476Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/68/89/19dcfd5cd289b306abdcabac68b88a4f54b7710a2c33adc16a337ecdcdfa/fastapi_cli-0.0.23-py3-none-any.whl", hash = "sha256:7e9634fc212da0b6cfc75bd3ac366cc9dfdb43b5e9ec12e58bfd1acdd2697f25", size = 12305, upload-time = "2026-02-16T19:45:52.554Z" }, + { url = "https://files.pythonhosted.org/packages/c7/4b/68f9fe268e535d79c76910519530026a4f994ce07189ac0dded45c6af825/fastapi_cli-0.0.24-py3-none-any.whl", hash = "sha256:4a1f78ed798f106b4fee85ca93b85d8fe33c0a3570f775964d37edb80b8f0edc", size = 12304, upload-time = "2026-02-24T10:45:09.552Z" }, ] [package.optional-dependencies] @@ -1594,7 +1598,7 @@ standard = [ [[package]] name = "fastapi-cloud-cli" -version = "0.13.0" +version = "0.14.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "fastar" }, @@ -1606,9 +1610,9 @@ dependencies = [ { name = "typer" }, { name = "uvicorn", extra = ["standard"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/de/0b/f07f4976784978ef159fd2e8f5c16f1f9d610578fb1fd976ff1315c11ea6/fastapi_cloud_cli-0.13.0.tar.gz", hash = "sha256:4d8f42337e8021c648f6cb0672de7d5b31b0fc7387a83d7b12f974600ac3f2fd", size = 38436, upload-time = "2026-02-17T05:18:19.033Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2b/eb/e78ebd05a714c62a0578cdce4339cb6cd138421a7d865fbddedd7242420b/fastapi_cloud_cli-0.14.0.tar.gz", hash = "sha256:d3ecb8c942685a71df0af7bd59f463b5eff76f5818b48e5a03c6159726831e68", size = 39822, upload-time = "2026-02-25T14:19:53.535Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/88/71a1e989d17b9edb483f32e28b7891ffdd3005271518c98ba6415987c430/fastapi_cloud_cli-0.13.0-py3-none-any.whl", hash = "sha256:874a9ed8dba34ec828f198c72de9f9a38de77ac1b15083d6bc3a4d772b0bc477", size = 27631, upload-time = "2026-02-17T05:18:18.094Z" }, + { url = "https://files.pythonhosted.org/packages/d9/18/7bf922ee0b6a737a9d88cf613182ecd6031f52298da893556f158eba763f/fastapi_cloud_cli-0.14.0-py3-none-any.whl", hash = "sha256:325fcb4b45e661184152da6db861d9fb718739fbcd561a4d334dbe78c026586f", size = 28350, upload-time = "2026-02-25T14:19:52.416Z" }, ] [[package]] @@ -2070,10 +2074,9 @@ wheels = [ [[package]] name = "google-genai" -version = "1.64.0" +version = "1.65.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "aiohttp" }, { name = "anyio" }, { name = "distro" }, { name = "google-auth", extra = ["requests"] }, @@ -2085,9 +2088,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bc/14/344b450d4387845fc5c8b7f168ffbe734b831b729ece3333fc0fe8556f04/google_genai-1.64.0.tar.gz", hash = "sha256:8db94ab031f745d08c45c69674d1892f7447c74ed21542abe599f7888e28b924", size = 496434, upload-time = "2026-02-19T02:06:13.95Z" } +sdist = { url = "https://files.pythonhosted.org/packages/79/f9/cc1191c2540d6a4e24609a586c4ed45d2db57cfef47931c139ee70e5874a/google_genai-1.65.0.tar.gz", hash = "sha256:d470eb600af802d58a79c7f13342d9ea0d05d965007cae8f76c7adff3d7a4750", size = 497206, upload-time = "2026-02-26T00:20:33.824Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/54/56/765eca90c781fedbe2a7e7dc873ef6045048e28ba5f2d4a5bcb13e13062b/google_genai-1.64.0-py3-none-any.whl", hash = "sha256:78a4d2deeb33b15ad78eaa419f6f431755e7f0e03771254f8000d70f717e940b", size = 728836, upload-time = "2026-02-19T02:06:11.655Z" }, + { url = "https://files.pythonhosted.org/packages/68/3c/3fea4e7c91357c71782d7dcaad7a2577d636c90317e003386893c25bc62c/google_genai-1.65.0-py3-none-any.whl", hash = "sha256:68c025205856919bc03edb0155c11b4b833810b7ce17ad4b7a9eeba5158f6c44", size = 724429, upload-time = "2026-02-26T00:20:32.186Z" }, ] [[package]] @@ -2304,31 +2307,34 @@ wheels = [ [[package]] name = "hf-xet" -version = "1.2.0" +version = "1.3.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020, upload-time = "2025-10-24T19:04:32.129Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/cb/9bb543bd987ffa1ee48202cc96a756951b734b79a542335c566148ade36c/hf_xet-1.3.2.tar.gz", hash = "sha256:e130ee08984783d12717444e538587fa2119385e5bd8fc2bb9f930419b73a7af", size = 643646, upload-time = "2026-02-27T17:26:08.051Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/a5/85ef910a0aa034a2abcfadc360ab5ac6f6bc4e9112349bd40ca97551cff0/hf_xet-1.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:ceeefcd1b7aed4956ae8499e2199607765fbd1c60510752003b6cc0b8413b649", size = 2861870, upload-time = "2025-10-24T19:04:11.422Z" }, - { url = "https://files.pythonhosted.org/packages/ea/40/e2e0a7eb9a51fe8828ba2d47fe22a7e74914ea8a0db68a18c3aa7449c767/hf_xet-1.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b70218dd548e9840224df5638fdc94bd033552963cfa97f9170829381179c813", size = 2717584, upload-time = "2025-10-24T19:04:09.586Z" }, - { url = "https://files.pythonhosted.org/packages/a5/7d/daf7f8bc4594fdd59a8a596f9e3886133fdc68e675292218a5e4c1b7e834/hf_xet-1.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d40b18769bb9a8bc82a9ede575ce1a44c75eb80e7375a01d76259089529b5dc", size = 3315004, upload-time = "2025-10-24T19:04:00.314Z" }, - { url = "https://files.pythonhosted.org/packages/b1/ba/45ea2f605fbf6d81c8b21e4d970b168b18a53515923010c312c06cd83164/hf_xet-1.2.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd3a6027d59cfb60177c12d6424e31f4b5ff13d8e3a1247b3a584bf8977e6df5", size = 3222636, upload-time = "2025-10-24T19:03:58.111Z" }, - { url = "https://files.pythonhosted.org/packages/4a/1d/04513e3cab8f29ab8c109d309ddd21a2705afab9d52f2ba1151e0c14f086/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6de1fc44f58f6dd937956c8d304d8c2dea264c80680bcfa61ca4a15e7b76780f", size = 3408448, upload-time = "2025-10-24T19:04:20.951Z" }, - { url = "https://files.pythonhosted.org/packages/f0/7c/60a2756d7feec7387db3a1176c632357632fbe7849fce576c5559d4520c7/hf_xet-1.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f182f264ed2acd566c514e45da9f2119110e48a87a327ca271027904c70c5832", size = 3503401, upload-time = "2025-10-24T19:04:22.549Z" }, - { url = "https://files.pythonhosted.org/packages/4e/64/48fffbd67fb418ab07451e4ce641a70de1c40c10a13e25325e24858ebe5a/hf_xet-1.2.0-cp313-cp313t-win_amd64.whl", hash = "sha256:293a7a3787e5c95d7be1857358a9130694a9c6021de3f27fa233f37267174382", size = 2900866, upload-time = "2025-10-24T19:04:33.461Z" }, - { url = "https://files.pythonhosted.org/packages/e2/51/f7e2caae42f80af886db414d4e9885fac959330509089f97cccb339c6b87/hf_xet-1.2.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:10bfab528b968c70e062607f663e21e34e2bba349e8038db546646875495179e", size = 2861861, upload-time = "2025-10-24T19:04:19.01Z" }, - { url = "https://files.pythonhosted.org/packages/6e/1d/a641a88b69994f9371bd347f1dd35e5d1e2e2460a2e350c8d5165fc62005/hf_xet-1.2.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a212e842647b02eb6a911187dc878e79c4aa0aa397e88dd3b26761676e8c1f8", size = 2717699, upload-time = "2025-10-24T19:04:17.306Z" }, - { url = "https://files.pythonhosted.org/packages/df/e0/e5e9bba7d15f0318955f7ec3f4af13f92e773fbb368c0b8008a5acbcb12f/hf_xet-1.2.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30e06daccb3a7d4c065f34fc26c14c74f4653069bb2b194e7f18f17cbe9939c0", size = 3314885, upload-time = "2025-10-24T19:04:07.642Z" }, - { url = "https://files.pythonhosted.org/packages/21/90/b7fe5ff6f2b7b8cbdf1bd56145f863c90a5807d9758a549bf3d916aa4dec/hf_xet-1.2.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:29c8fc913a529ec0a91867ce3d119ac1aac966e098cf49501800c870328cc090", size = 3221550, upload-time = "2025-10-24T19:04:05.55Z" }, - { url = "https://files.pythonhosted.org/packages/6f/cb/73f276f0a7ce46cc6a6ec7d6c7d61cbfe5f2e107123d9bbd0193c355f106/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e159cbfcfbb29f920db2c09ed8b660eb894640d284f102ada929b6e3dc410a", size = 3408010, upload-time = "2025-10-24T19:04:28.598Z" }, - { url = "https://files.pythonhosted.org/packages/b8/1e/d642a12caa78171f4be64f7cd9c40e3ca5279d055d0873188a58c0f5fbb9/hf_xet-1.2.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9c91d5ae931510107f148874e9e2de8a16052b6f1b3ca3c1b12f15ccb491390f", size = 3503264, upload-time = "2025-10-24T19:04:30.397Z" }, - { url = "https://files.pythonhosted.org/packages/17/b5/33764714923fa1ff922770f7ed18c2daae034d21ae6e10dbf4347c854154/hf_xet-1.2.0-cp314-cp314t-win_amd64.whl", hash = "sha256:210d577732b519ac6ede149d2f2f34049d44e8622bf14eb3d63bbcd2d4b332dc", size = 2901071, upload-time = "2025-10-24T19:04:37.463Z" }, - { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099, upload-time = "2025-10-24T19:04:15.366Z" }, - { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178, upload-time = "2025-10-24T19:04:13.695Z" }, - { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214, upload-time = "2025-10-24T19:04:03.596Z" }, - { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054, upload-time = "2025-10-24T19:04:01.949Z" }, - { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812, upload-time = "2025-10-24T19:04:24.585Z" }, - { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920, upload-time = "2025-10-24T19:04:26.927Z" }, - { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" }, + { url = "https://files.pythonhosted.org/packages/49/75/462285971954269432aad2e7938c5c7ff9ec7d60129cec542ab37121e3d6/hf_xet-1.3.2-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:335a8f36c55fd35a92d0062f4e9201b4015057e62747b7e7001ffb203c0ee1d2", size = 3761019, upload-time = "2026-02-27T17:25:49.441Z" }, + { url = "https://files.pythonhosted.org/packages/35/56/987b0537ddaf88e17192ea09afa8eca853e55f39a4721578be436f8409df/hf_xet-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c1ae4d3a716afc774e66922f3cac8206bfa707db13f6a7e62dfff74bfc95c9a8", size = 3521565, upload-time = "2026-02-27T17:25:47.469Z" }, + { url = "https://files.pythonhosted.org/packages/a8/5c/7e4a33a3d689f77761156cc34558047569e54af92e4d15a8f493229f6767/hf_xet-1.3.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d6dbdf231efac0b9b39adcf12a07f0c030498f9212a18e8c50224d0e84ab803d", size = 4176494, upload-time = "2026-02-27T17:25:40.247Z" }, + { url = "https://files.pythonhosted.org/packages/6b/b3/71e856bf9d9a69b3931837e8bf22e095775f268c8edcd4a9e8c355f92484/hf_xet-1.3.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c1980abfb68ecf6c1c7983379ed7b1e2b49a1aaf1a5aca9acc7d48e5e2e0a961", size = 3955601, upload-time = "2026-02-27T17:25:38.376Z" }, + { url = "https://files.pythonhosted.org/packages/63/d7/aecf97b3f0a981600a67ff4db15e2d433389d698a284bb0ea5d8fcdd6f7f/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1c88fbd90ad0d27c46b77a445f0a436ebaa94e14965c581123b68b1c52f5fd30", size = 4154770, upload-time = "2026-02-27T17:25:56.756Z" }, + { url = "https://files.pythonhosted.org/packages/e2/e1/3af961f71a40e09bf5ee909842127b6b00f5ab4ee3817599dc0771b79893/hf_xet-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:35b855024ca37f2dd113ac1c08993e997fbe167b9d61f9ef66d3d4f84015e508", size = 4394161, upload-time = "2026-02-27T17:25:58.111Z" }, + { url = "https://files.pythonhosted.org/packages/a1/c3/859509bade9178e21b8b1db867b8e10e9f817ab9ac1de77cb9f461ced765/hf_xet-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:31612ba0629046e425ba50375685a2586e11fb9144270ebabd75878c3eaf6378", size = 3637377, upload-time = "2026-02-27T17:26:10.611Z" }, + { url = "https://files.pythonhosted.org/packages/05/7f/724cfbef4da92d577b71f68bf832961c8919f36c60d28d289a9fc9d024d4/hf_xet-1.3.2-cp313-cp313t-win_arm64.whl", hash = "sha256:433c77c9f4e132b562f37d66c9b22c05b5479f243a1f06a120c1c06ce8b1502a", size = 3497875, upload-time = "2026-02-27T17:26:09.034Z" }, + { url = "https://files.pythonhosted.org/packages/ba/75/9d54c1ae1d05fb704f977eca1671747babf1957f19f38ae75c5933bc2dc1/hf_xet-1.3.2-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:c34e2c7aefad15792d57067c1c89b2b02c1bbaeabd7f8456ae3d07b4bbaf4094", size = 3761076, upload-time = "2026-02-27T17:25:55.42Z" }, + { url = "https://files.pythonhosted.org/packages/f2/8a/08a24b6c6f52b5d26848c16e4b6d790bb810d1bf62c3505bed179f7032d3/hf_xet-1.3.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4bc995d6c41992831f762096020dc14a65fdf3963f86ffed580b596d04de32e3", size = 3521745, upload-time = "2026-02-27T17:25:54.217Z" }, + { url = "https://files.pythonhosted.org/packages/b5/db/a75cf400dd8a1a8acf226a12955ff6ee999f272dfc0505bafd8079a61267/hf_xet-1.3.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:959083c89dee30f7d6f890b36cdadda823386c4de63b1a30384a75bfd2ae995d", size = 4176301, upload-time = "2026-02-27T17:25:46.044Z" }, + { url = "https://files.pythonhosted.org/packages/01/40/6c4c798ffdd83e740dd3925c4e47793b07442a9efa3bc3866ba141a82365/hf_xet-1.3.2-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:cfa760888633b08c01b398d212ce7e8c0d7adac6c86e4b20dfb2397d8acd78ee", size = 3955437, upload-time = "2026-02-27T17:25:44.703Z" }, + { url = "https://files.pythonhosted.org/packages/0c/09/9a3aa7c5f07d3e5cc57bb750d12a124ffa72c273a87164bd848f9ac5cc14/hf_xet-1.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3155a02e083aa21fd733a7485c7c36025e49d5975c8d6bda0453d224dd0b0ac4", size = 4154535, upload-time = "2026-02-27T17:26:05.207Z" }, + { url = "https://files.pythonhosted.org/packages/ae/e0/831f7fa6d90cb47a230bc23284b502c700e1483bbe459437b3844cdc0776/hf_xet-1.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:91b1dc03c31cbf733d35dc03df7c5353686233d86af045e716f1e0ea4a2673cf", size = 4393891, upload-time = "2026-02-27T17:26:06.607Z" }, + { url = "https://files.pythonhosted.org/packages/ab/96/6ed472fdce7f8b70f5da6e3f05be76816a610063003bfd6d9cea0bbb58a3/hf_xet-1.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:211f30098512d95e85ad03ae63bd7dd2c4df476558a5095d09f9e38e78cbf674", size = 3637583, upload-time = "2026-02-27T17:26:17.349Z" }, + { url = "https://files.pythonhosted.org/packages/8b/e8/a069edc4570b3f8e123c0b80fadc94530f3d7b01394e1fc1bb223339366c/hf_xet-1.3.2-cp314-cp314t-win_arm64.whl", hash = "sha256:4a6817c41de7c48ed9270da0b02849347e089c5ece9a0e72ae4f4b3a57617f82", size = 3497977, upload-time = "2026-02-27T17:26:14.966Z" }, + { url = "https://files.pythonhosted.org/packages/d8/28/dbb024e2e3907f6f3052847ca7d1a2f7a3972fafcd53ff79018977fcb3e4/hf_xet-1.3.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f93b7595f1d8fefddfede775c18b5c9256757824f7f6832930b49858483cd56f", size = 3763961, upload-time = "2026-02-27T17:25:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/e4/71/b99aed3823c9d1795e4865cf437d651097356a3f38c7d5877e4ac544b8e4/hf_xet-1.3.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:a85d3d43743174393afe27835bde0cd146e652b5fcfdbcd624602daef2ef3259", size = 3526171, upload-time = "2026-02-27T17:25:50.968Z" }, + { url = "https://files.pythonhosted.org/packages/9d/ca/907890ce6ef5598b5920514f255ed0a65f558f820515b18db75a51b2f878/hf_xet-1.3.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7c2a054a97c44e136b1f7f5a78f12b3efffdf2eed3abc6746fc5ea4b39511633", size = 4180750, upload-time = "2026-02-27T17:25:43.125Z" }, + { url = "https://files.pythonhosted.org/packages/8c/ad/bc7f41f87173d51d0bce497b171c4ee0cbde1eed2d7b4216db5d0ada9f50/hf_xet-1.3.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:06b724a361f670ae557836e57801b82c75b534812e351a87a2c739f77d1e0635", size = 3961035, upload-time = "2026-02-27T17:25:41.837Z" }, + { url = "https://files.pythonhosted.org/packages/73/38/600f4dda40c4a33133404d9fe644f1d35ff2d9babb4d0435c646c63dd107/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:305f5489d7241a47e0458ef49334be02411d1d0f480846363c1c8084ed9916f7", size = 4161378, upload-time = "2026-02-27T17:26:00.365Z" }, + { url = "https://files.pythonhosted.org/packages/00/b3/7bc1ff91d1ac18420b7ad1e169b618b27c00001b96310a89f8a9294fe509/hf_xet-1.3.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:06cdbde243c85f39a63b28e9034321399c507bcd5e7befdd17ed2ccc06dfe14e", size = 4398020, upload-time = "2026-02-27T17:26:03.977Z" }, + { url = "https://files.pythonhosted.org/packages/2b/0b/99bfd948a3ed3620ab709276df3ad3710dcea61976918cce8706502927af/hf_xet-1.3.2-cp37-abi3-win_amd64.whl", hash = "sha256:9298b47cce6037b7045ae41482e703c471ce36b52e73e49f71226d2e8e5685a1", size = 3641624, upload-time = "2026-02-27T17:26:13.542Z" }, + { url = "https://files.pythonhosted.org/packages/cc/02/9a6e4ca1f3f73a164c0cd48e41b3cc56585dcc37e809250de443d673266f/hf_xet-1.3.2-cp37-abi3-win_arm64.whl", hash = "sha256:83d8ec273136171431833a6957e8f3af496bee227a0fe47c7b8b39c106d1749a", size = 3503976, upload-time = "2026-02-27T17:26:12.123Z" }, ] [[package]] @@ -2467,7 +2473,7 @@ wheels = [ [[package]] name = "hume" -version = "0.13.8" +version = "0.13.10" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, @@ -2479,9 +2485,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/93/5b/849ac072161e985ce5758f19f792043274b64a9f9dd73fdd14333b7446f4/hume-0.13.8.tar.gz", hash = "sha256:067691b0ce0353e4438d32d5fbfcbb6ed2099533bf5e06af99084c8c76fad24f", size = 142326, upload-time = "2026-02-10T16:05:22.234Z" } +sdist = { url = "https://files.pythonhosted.org/packages/78/d7/97845c3903ef5782b6f4581138f06a595513c2e129b2cbeacfc6e3645f61/hume-0.13.10.tar.gz", hash = "sha256:425596d17bd8b85bdf4f27bd0d3680c50ce50b4339f64adf39f69557907dc41c", size = 144063, upload-time = "2026-02-27T21:06:17.913Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/10/ec2c1e9a0401a39c3575ff8c5e42ad4b03687d5dbdefaa94ec5d52dbe088/hume-0.13.8-py3-none-any.whl", hash = "sha256:8295c095e4e04918512eec2df3adf4a0900b8d7ef06e3e8487c45ab520ed0ad5", size = 353023, upload-time = "2026-02-10T16:05:20.537Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ee/52598b811660f874f84b880b2b46481c78f8f7df2d9cff95b8130af95826/hume-0.13.10-py3-none-any.whl", hash = "sha256:a724b6cd9fc2278dff0b831276b1b2c82604edece3e036e0d46c312aea2d70b8", size = 355071, upload-time = "2026-02-27T21:06:14.847Z" }, ] [[package]] @@ -2522,93 +2528,93 @@ wheels = [ [[package]] name = "ijson" -version = "3.4.0.post0" +version = "3.5.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2d/30/7ab4b9e88e7946f6beef419f74edcc541df3ea562c7882257b4eaa82417d/ijson-3.4.0.post0.tar.gz", hash = "sha256:9aa02dc70bb245670a6ca7fba737b992aeeb4895360980622f7e568dbf23e41e", size = 67216, upload-time = "2025-10-10T05:29:25.62Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f4/57/60d1a6a512f2f0508d0bc8b4f1cc5616fd3196619b66bd6a01f9155a1292/ijson-3.5.0.tar.gz", hash = "sha256:94688760720e3f5212731b3cb8d30267f9a045fb38fb3870254e7b9504246f31", size = 68658, upload-time = "2026-02-24T03:58:30.974Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b5/15/4f4921ed9ab94032fd0b03ecb211ff9dbd5cc9953463f5b5c4ddeab406fc/ijson-3.4.0.post0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8f904a405b58a04b6ef0425f1babbc5c65feb66b0a4cc7f214d4ad7de106f77d", size = 88244, upload-time = "2025-10-10T05:27:42.001Z" }, - { url = "https://files.pythonhosted.org/packages/af/d6/b85d4da1752362a789bc3e0fc4b55e812a374a50d2fe1c06cab2e2bcb170/ijson-3.4.0.post0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a07dcc1a8a1ddd76131a7c7528cbd12951c2e34eb3c3d63697b905069a2d65b1", size = 59880, upload-time = "2025-10-10T05:27:44.791Z" }, - { url = "https://files.pythonhosted.org/packages/c3/96/e1027e6d0efb5b9192bdc9f0af5633c20a56999cce4cf7ad35427f823138/ijson-3.4.0.post0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ab3be841b8c430c1883b8c0775eb551f21b5500c102c7ee828afa35ddd701bdd", size = 59939, upload-time = "2025-10-10T05:27:45.66Z" }, - { url = "https://files.pythonhosted.org/packages/e3/71/b9ca0a19afb2f36be35c6afa2c4d1c19950dc45f6a50b483b56082b3e165/ijson-3.4.0.post0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:43059ae0d657b11c5ddb11d149bc400c44f9e514fb8663057e9b2ea4d8d44c1f", size = 125894, upload-time = "2025-10-10T05:27:46.551Z" }, - { url = "https://files.pythonhosted.org/packages/02/1b/f7356de078d85564829c5e2a2a31473ee0ad1876258ceecf550b582e57b7/ijson-3.4.0.post0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0d3e82963096579d1385c06b2559570d7191e225664b7fa049617da838e1a4a4", size = 132385, upload-time = "2025-10-10T05:27:48Z" }, - { url = "https://files.pythonhosted.org/packages/57/7b/08f86eed5df0849b673260dd2943b6a7367a55b5a4b6e73ddbfbdf4206f1/ijson-3.4.0.post0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:461ce4e87a21a261b60c0a68a2ad17c7dd214f0b90a0bec7e559a66b6ae3bd7e", size = 129567, upload-time = "2025-10-10T05:27:49.188Z" }, - { url = "https://files.pythonhosted.org/packages/96/e1/69672d95b1a16e7c6bf89cef6c892b228cc84b484945a731786a425700d2/ijson-3.4.0.post0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:890cf6610c9554efcb9765a93e368efeb5bb6135f59ce0828d92eaefff07fde5", size = 132821, upload-time = "2025-10-10T05:27:50.342Z" }, - { url = "https://files.pythonhosted.org/packages/0b/15/9ed4868e2e92db2454508f7ea1282bec0b039bd344ac0cbac4a2de16786d/ijson-3.4.0.post0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:6793c29a5728e7751a7df01be58ba7da9b9690c12bf79d32094c70a908fa02b9", size = 127757, upload-time = "2025-10-10T05:27:51.203Z" }, - { url = "https://files.pythonhosted.org/packages/5b/aa/08a308d3aaa6e98511f3100f8a1e4e8ff8c853fa4ec3f18b71094ac36bbe/ijson-3.4.0.post0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a56b6674d7feec0401c91f86c376f4e3d8ff8129128a8ad21ca43ec0b1242f79", size = 130439, upload-time = "2025-10-10T05:27:52.123Z" }, - { url = "https://files.pythonhosted.org/packages/56/46/3da05a044f335b97635d59eede016ea158fbf1b59e584149177b6524e1e5/ijson-3.4.0.post0-cp310-cp310-win32.whl", hash = "sha256:01767fcbd75a5fa5a626069787b41f04681216b798510d5f63bcf66884386368", size = 52004, upload-time = "2025-10-10T05:27:53.441Z" }, - { url = "https://files.pythonhosted.org/packages/60/d7/a126d58f379df16fa9a0c2532ac00ae3debf1d28c090020775bc735032b8/ijson-3.4.0.post0-cp310-cp310-win_amd64.whl", hash = "sha256:09127c06e5dec753feb9e4b8c5f6a23603d1cd672d098159a17e53a73b898eec", size = 54407, upload-time = "2025-10-10T05:27:54.259Z" }, - { url = "https://files.pythonhosted.org/packages/a7/ac/3d57249d4acba66a33eaef794edb5b2a2222ca449ae08800f8abe9286645/ijson-3.4.0.post0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0b473112e72c0c506da425da3278367b6680f340ecc093084693a1e819d28435", size = 88278, upload-time = "2025-10-10T05:27:55.403Z" }, - { url = "https://files.pythonhosted.org/packages/12/fb/2d068d23d1a665f500282ceb6f2473952a95fc7107d739fd629b4ab41959/ijson-3.4.0.post0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:043f9b7cf9cc744263a78175e769947733710d2412d25180df44b1086b23ebd5", size = 59898, upload-time = "2025-10-10T05:27:56.361Z" }, - { url = "https://files.pythonhosted.org/packages/26/3d/8b14589dfb0e5dbb7bcf9063e53d3617c041cf315ff3dfa60945382237ce/ijson-3.4.0.post0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b55e49045f4c8031f3673f56662fd828dc9e8d65bd3b03a9420dda0d370e64ba", size = 59945, upload-time = "2025-10-10T05:27:57.581Z" }, - { url = "https://files.pythonhosted.org/packages/77/57/086a75094397d4b7584698a540a279689e12905271af78cdfc903bf9eaf8/ijson-3.4.0.post0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:11f13b73194ea2a5a8b4a2863f25b0b4624311f10db3a75747b510c4958179b0", size = 131318, upload-time = "2025-10-10T05:27:58.453Z" }, - { url = "https://files.pythonhosted.org/packages/df/35/7f61e9ce4a9ff1306ec581eb851f8a660439126d92ee595c6dc8084aac97/ijson-3.4.0.post0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:659acb2843433e080c271ecedf7d19c71adde1ee5274fc7faa2fec0a793f9f1c", size = 137990, upload-time = "2025-10-10T05:27:59.328Z" }, - { url = "https://files.pythonhosted.org/packages/59/bf/590bbc3c3566adce5e2f43ba5894520cbaf19a3e7f38c1250926ba67eee4/ijson-3.4.0.post0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:deda4cfcaafa72ca3fa845350045b1d0fef9364ec9f413241bb46988afbe6ee6", size = 134416, upload-time = "2025-10-10T05:28:00.317Z" }, - { url = "https://files.pythonhosted.org/packages/24/c1/fb719049851979df71f3e039d6f1a565d349c9cb1b29c0f8775d9db141b4/ijson-3.4.0.post0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47352563e8c594360bacee2e0753e97025f0861234722d02faace62b1b6d2b2a", size = 138034, upload-time = "2025-10-10T05:28:01.627Z" }, - { url = "https://files.pythonhosted.org/packages/10/ce/ccda891f572876aaf2c43f0b2079e31d5b476c3ae53196187eab1a788eff/ijson-3.4.0.post0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5a48b9486242d1295abe7fd0fbb6308867da5ca3f69b55c77922a93c2b6847aa", size = 132510, upload-time = "2025-10-10T05:28:03.141Z" }, - { url = "https://files.pythonhosted.org/packages/11/b5/ca8e64ab7cf5252f358e467be767630f085b5bbcd3c04333a3a5f36c3dd3/ijson-3.4.0.post0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9c0886234d1fae15cf4581a430bdba03d79251c1ab3b07e30aa31b13ef28d01c", size = 134907, upload-time = "2025-10-10T05:28:04.438Z" }, - { url = "https://files.pythonhosted.org/packages/93/14/63a4d5dc548690f29f0c2fc9cabd5ecbb37532547439c05f5b3b9ce73021/ijson-3.4.0.post0-cp311-cp311-win32.whl", hash = "sha256:fecae19b5187d92900c73debb3a979b0b3290a53f85df1f8f3c5ba7d1e9fb9cb", size = 52006, upload-time = "2025-10-10T05:28:05.424Z" }, - { url = "https://files.pythonhosted.org/packages/fa/bf/932740899e572a97f9be0c6cd64ebda557eae7701ac216fc284aba21786d/ijson-3.4.0.post0-cp311-cp311-win_amd64.whl", hash = "sha256:b39dbf87071f23a23c8077eea2ae7cfeeca9ff9ffec722dfc8b5f352e4dd729c", size = 54410, upload-time = "2025-10-10T05:28:06.264Z" }, - { url = "https://files.pythonhosted.org/packages/7d/fe/3b6af0025288e769dbfa30485dae1b3bd3f33f00390f3ee532cbb1c33e9b/ijson-3.4.0.post0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b607a500fca26101be47d2baf7cddb457b819ab60a75ce51ed1092a40da8b2f9", size = 87847, upload-time = "2025-10-10T05:28:07.229Z" }, - { url = "https://files.pythonhosted.org/packages/6e/a5/95ee2ca82f3b1a57892452f6e5087607d56c620beb8ce625475194568698/ijson-3.4.0.post0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4827d9874a6a81625412c59f7ca979a84d01f7f6bfb3c6d4dc4c46d0382b14e0", size = 59815, upload-time = "2025-10-10T05:28:08.448Z" }, - { url = "https://files.pythonhosted.org/packages/51/8d/5a704ab3c17c55c21c86423458db8610626ca99cc9086a74dfeb7ee9054c/ijson-3.4.0.post0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d4d4afec780881edb2a0d2dd40b1cdbe246e630022d5192f266172a0307986a7", size = 59648, upload-time = "2025-10-10T05:28:09.307Z" }, - { url = "https://files.pythonhosted.org/packages/25/56/ca5d6ca145d007f30b44e747f3c163bc08710ce004af0deaad4a2301339b/ijson-3.4.0.post0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:432fb60ffb952926f9438e0539011e2dfcd108f8426ee826ccc6173308c3ff2c", size = 138279, upload-time = "2025-10-10T05:28:10.489Z" }, - { url = "https://files.pythonhosted.org/packages/c3/d3/22e3cc806fcdda7ad4c8482ed74db7a017d4a1d49b4300c7bc07052fb561/ijson-3.4.0.post0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:54a0e3e05d9a0c95ecba73d9579f146cf6d5c5874116c849dba2d39a5f30380e", size = 149110, upload-time = "2025-10-10T05:28:12.263Z" }, - { url = "https://files.pythonhosted.org/packages/3e/04/efb30f413648b9267f5a33920ac124d7ebef3bc4063af8f6ffc8ca11ddcb/ijson-3.4.0.post0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05807edc0bcbd222dc6ea32a2b897f0c81dc7f12c8580148bc82f6d7f5e7ec7b", size = 149026, upload-time = "2025-10-10T05:28:13.557Z" }, - { url = "https://files.pythonhosted.org/packages/2d/cf/481165f7046ade32488719300a3994a437020bc41cfbb54334356348f513/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a5269af16f715855d9864937f9dd5c348ca1ac49cee6a2c7a1b7091c159e874f", size = 150012, upload-time = "2025-10-10T05:28:14.859Z" }, - { url = "https://files.pythonhosted.org/packages/0f/24/642e3289917ecf860386e26dfde775f9962d26ab7f6c2e364ed3ca3c25d8/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b200df83c901f5bfa416d069ac71077aa1608f854a4c50df1b84ced560e9c9ec", size = 142193, upload-time = "2025-10-10T05:28:16.131Z" }, - { url = "https://files.pythonhosted.org/packages/0f/f5/fd2f038abe95e553e1c3ee207cda19db9196eb416e63c7c89699a8cf0db7/ijson-3.4.0.post0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6458bd8e679cdff459a0a5e555b107c3bbacb1f382da3fe0f40e392871eb518d", size = 150904, upload-time = "2025-10-10T05:28:17.401Z" }, - { url = "https://files.pythonhosted.org/packages/49/35/24259d22519987928164e6cb8fe3486e1df0899b2999ada4b0498639b463/ijson-3.4.0.post0-cp312-cp312-win32.whl", hash = "sha256:55f7f656b5986326c978cbb3a9eea9e33f3ef6ecc4535b38f1d452c731da39ab", size = 52358, upload-time = "2025-10-10T05:28:18.315Z" }, - { url = "https://files.pythonhosted.org/packages/a1/2b/6f7ade27a8ff5758fc41006dadd2de01730def84fe3e60553b329c59e0d4/ijson-3.4.0.post0-cp312-cp312-win_amd64.whl", hash = "sha256:e15833dcf6f6d188fdc624a31cd0520c3ba21b6855dc304bc7c1a8aeca02d4ac", size = 54789, upload-time = "2025-10-10T05:28:19.552Z" }, - { url = "https://files.pythonhosted.org/packages/1b/20/aaec6977f9d538bbadd760c7fa0f6a0937742abdcc920ec6478a8576e55f/ijson-3.4.0.post0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:114ed248166ac06377e87a245a158d6b98019d2bdd3bb93995718e0bd996154f", size = 87863, upload-time = "2025-10-10T05:28:20.786Z" }, - { url = "https://files.pythonhosted.org/packages/5b/29/06bf56a866e2fe21453a1ad8f3a5d7bca3c723f73d96329656dfee969783/ijson-3.4.0.post0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ffb21203736b08fe27cb30df6a4f802fafb9ef7646c5ff7ef79569b63ea76c57", size = 59806, upload-time = "2025-10-10T05:28:21.596Z" }, - { url = "https://files.pythonhosted.org/packages/ba/ae/e1d0fda91ba7a444b75f0d60cb845fdb1f55d3111351529dcbf4b1c276fe/ijson-3.4.0.post0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:07f20ecd748602ac7f18c617637e53bd73ded7f3b22260bba3abe401a7fc284e", size = 59643, upload-time = "2025-10-10T05:28:22.45Z" }, - { url = "https://files.pythonhosted.org/packages/4d/24/5a24533be2726396cc1724dc237bada09b19715b5bfb0e7b9400db0901ad/ijson-3.4.0.post0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:27aa193d47ffc6bc4e45453896ad98fb089a367e8283b973f1fe5c0198b60b4e", size = 138082, upload-time = "2025-10-10T05:28:23.319Z" }, - { url = "https://files.pythonhosted.org/packages/05/60/026c3efcec23c329657e878cbc0a9a25b42e7eb3971e8c2377cb3284e2b7/ijson-3.4.0.post0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ccddb2894eb7af162ba43b9475ac5825d15d568832f82eb8783036e5d2aebd42", size = 149145, upload-time = "2025-10-10T05:28:24.279Z" }, - { url = "https://files.pythonhosted.org/packages/ed/c2/036499909b7a1bc0bcd85305e4348ad171aeb9df57581287533bdb3497e9/ijson-3.4.0.post0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:61ab0b8c5bf707201dc67e02c116f4b6545c4afd7feb2264b989d242d9c4348a", size = 149046, upload-time = "2025-10-10T05:28:25.186Z" }, - { url = "https://files.pythonhosted.org/packages/ba/75/e7736073ad96867c129f9e799e3e65086badd89dbf3911f76d9b3bf8a115/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:254cfb8c124af68327a0e7a49b50bbdacafd87c4690a3d62c96eb01020a685ef", size = 150356, upload-time = "2025-10-10T05:28:26.135Z" }, - { url = "https://files.pythonhosted.org/packages/9d/1b/1c1575d2cda136985561fcf774fe6c54412cd0fa08005342015af0403193/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:04ac9ca54db20f82aeda6379b5f4f6112fdb150d09ebce04affeab98a17b4ed3", size = 142322, upload-time = "2025-10-10T05:28:27.125Z" }, - { url = "https://files.pythonhosted.org/packages/28/4d/aba9871feb624df8494435d1a9ddc7b6a4f782c6044bfc0d770a4b59f145/ijson-3.4.0.post0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a603d7474bf35e7b3a8e49c8dabfc4751841931301adff3f3318171c4e407f32", size = 151386, upload-time = "2025-10-10T05:28:28.274Z" }, - { url = "https://files.pythonhosted.org/packages/3f/9a/791baa83895fb6e492bce2c7a0ea6427b6a41fe854349e62a37d0c9deaf0/ijson-3.4.0.post0-cp313-cp313-win32.whl", hash = "sha256:ec5bb1520cb212ebead7dba048bb9b70552c3440584f83b01b0abc96862e2a09", size = 52352, upload-time = "2025-10-10T05:28:29.191Z" }, - { url = "https://files.pythonhosted.org/packages/a9/0c/061f51493e1da21116d74ee8f6a6b9ae06ca5fa2eb53c3b38b64f9a9a5ae/ijson-3.4.0.post0-cp313-cp313-win_amd64.whl", hash = "sha256:3505dff18bdeb8b171eb28af6df34857e2be80dc01e2e3b624e77215ad58897f", size = 54783, upload-time = "2025-10-10T05:28:30.048Z" }, - { url = "https://files.pythonhosted.org/packages/c7/89/4344e176f2c5f5ef3251c9bfa4ddd5b4cf3f9601fd6ec3f677a3ba0b9c71/ijson-3.4.0.post0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:45a0b1c833ed2620eaf8da958f06ac8351c59e5e470e078400d23814670ed708", size = 92342, upload-time = "2025-10-10T05:28:31.389Z" }, - { url = "https://files.pythonhosted.org/packages/d4/b1/85012c586a6645f9fb8bfa3ef62ed2f303c8d73fc7c2f705111582925980/ijson-3.4.0.post0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7809ec8c8f40228edaaa089f33e811dff4c5b8509702652870d3f286c9682e27", size = 62028, upload-time = "2025-10-10T05:28:32.849Z" }, - { url = "https://files.pythonhosted.org/packages/65/ea/7b7e2815c101d78b33e74d64ddb70cccc377afccd5dda76e566ed3fcb56f/ijson-3.4.0.post0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cf4a34c2cfe852aee75c89c05b0a4531c49dc0be27eeed221afd6fbf9c3e149c", size = 61773, upload-time = "2025-10-10T05:28:34.016Z" }, - { url = "https://files.pythonhosted.org/packages/59/7d/2175e599cb77a64f528629bad3ce95dfdf2aa6171d313c1fc00bbfaf0d22/ijson-3.4.0.post0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a39d5d36067604b26b78de70b8951c90e9272450642661fe531a8f7a6936a7fa", size = 198562, upload-time = "2025-10-10T05:28:34.878Z" }, - { url = "https://files.pythonhosted.org/packages/13/97/82247c501c92405bb2fc44ab5efb497335bcb9cf0f5d3a0b04a800737bd8/ijson-3.4.0.post0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83fc738d81c9ea686b452996110b8a6678296c481e0546857db24785bff8da92", size = 216212, upload-time = "2025-10-10T05:28:36.208Z" }, - { url = "https://files.pythonhosted.org/packages/95/ca/b956f507bb02e05ce109fd11ab6a2c054f8b686cc5affe41afe50630984d/ijson-3.4.0.post0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b2a81aee91633868f5b40280e2523f7c5392e920a5082f47c5e991e516b483f6", size = 206618, upload-time = "2025-10-10T05:28:37.243Z" }, - { url = "https://files.pythonhosted.org/packages/3e/12/e827840ab81d86a9882e499097934df53294f05155f1acfcb9a211ac1142/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:56169e298c5a2e7196aaa55da78ddc2415876a74fe6304f81b1eb0d3273346f7", size = 210689, upload-time = "2025-10-10T05:28:38.252Z" }, - { url = "https://files.pythonhosted.org/packages/1b/3b/59238d9422c31a4aefa22ebeb8e599e706158a0ab03669ef623be77a499a/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:eeb9540f0b1a575cbb5968166706946458f98c16e7accc6f2fe71efa29864241", size = 199927, upload-time = "2025-10-10T05:28:39.233Z" }, - { url = "https://files.pythonhosted.org/packages/b6/0f/ec01c36c128c37edb8a5ae8f3de3256009f886338d459210dfe121ee4ba9/ijson-3.4.0.post0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ba3478ff0bb49d7ba88783f491a99b6e3fa929c930ab062d2bb7837e6a38fe88", size = 204455, upload-time = "2025-10-10T05:28:40.644Z" }, - { url = "https://files.pythonhosted.org/packages/c8/cf/5560e1db96c6d10a5313be76bf5a1754266cbfb5cc13ff64d107829e07b1/ijson-3.4.0.post0-cp313-cp313t-win32.whl", hash = "sha256:b005ce84e82f28b00bf777a464833465dfe3efa43a0a26c77b5ac40723e1a728", size = 54566, upload-time = "2025-10-10T05:28:41.663Z" }, - { url = "https://files.pythonhosted.org/packages/22/5a/cbb69144c3b25dd56f5421ff7dc0cf3051355579062024772518e4f4b3c5/ijson-3.4.0.post0-cp313-cp313t-win_amd64.whl", hash = "sha256:fe9c84c9b1c8798afa407be1cea1603401d99bfc7c34497e19f4f5e5ddc9b441", size = 57298, upload-time = "2025-10-10T05:28:42.881Z" }, - { url = "https://files.pythonhosted.org/packages/af/0b/a4ce8524fd850302bbf5d9f38d07c0fa981fdbe44951d2fcd036935b67dd/ijson-3.4.0.post0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da6a21b88cbf5ecbc53371283988d22c9643aa71ae2873bbeaefd2dea3b6160b", size = 88361, upload-time = "2025-10-10T05:28:43.73Z" }, - { url = "https://files.pythonhosted.org/packages/be/90/a5e5f33e46f28174a9c8142d12dcb3d26ce358d9a2230b9b15f5c987b3a5/ijson-3.4.0.post0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cf24a48a1c3ca9d44a04feb59ccefeb9aa52bb49b9cb70ad30518c25cce74bb7", size = 59960, upload-time = "2025-10-10T05:28:44.585Z" }, - { url = "https://files.pythonhosted.org/packages/83/e2/551dd7037dda759aa0ce53f0d3d7be03b03c6b05c0b0a5d5ab7a47e6b4b1/ijson-3.4.0.post0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d14427d366f95f21adcb97d0ed1f6d30f6fdc04d0aa1e4de839152c50c2b8d65", size = 59957, upload-time = "2025-10-10T05:28:45.748Z" }, - { url = "https://files.pythonhosted.org/packages/ac/b9/3006384f85cc26cf83dbbd542d362cc336f1e1ddd491e32147cfa46ea8ae/ijson-3.4.0.post0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:339d49f6c5d24051c85d9226be96d2d56e633cb8b7d09dd8099de8d8b51a97e2", size = 139967, upload-time = "2025-10-10T05:28:47.229Z" }, - { url = "https://files.pythonhosted.org/packages/77/3b/b5234add8115cbfe8635b6c152fb527327f45e4c0f0bf2e93844b36b5217/ijson-3.4.0.post0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7206afcb396aaef66c2b066997b4e9d9042c4b7d777f4d994e9cec6d322c2fe6", size = 149196, upload-time = "2025-10-10T05:28:48.226Z" }, - { url = "https://files.pythonhosted.org/packages/a2/d2/c4ae543e37d7a9fba09740c221976a63705dbad23a9cda9022fc9fa0f3de/ijson-3.4.0.post0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c8dd327da225887194fe8b93f2b3c9c256353e14a6b9eefc940ed17fde38f5b8", size = 148516, upload-time = "2025-10-10T05:28:49.237Z" }, - { url = "https://files.pythonhosted.org/packages/0d/a1/914b5fb1c26af2474cd04841626e0e95576499a4ca940661fb105ee12dd2/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4810546e66128af51fd4a0c9a640e84e8508e9c15c4f247d8a3e3253b20e1465", size = 149770, upload-time = "2025-10-10T05:28:50.501Z" }, - { url = "https://files.pythonhosted.org/packages/7a/c1/51c3584102d0d85d4aa10cc88dbbe431ecb9fe98160a9e2fad62a4456aed/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:103a0838061297d063bca81d724b0958b616f372bd893bbc278320152252c652", size = 143688, upload-time = "2025-10-10T05:28:51.823Z" }, - { url = "https://files.pythonhosted.org/packages/47/3d/a54f13d766332620bded8ee76bcdd274509ecc53cf99573450f95b3ad910/ijson-3.4.0.post0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:40007c977e230e04118b27322f25a72ae342a3d61464b2057fcd9b21eeb7427a", size = 150688, upload-time = "2025-10-10T05:28:52.757Z" }, - { url = "https://files.pythonhosted.org/packages/72/49/43d97cccf3266da7c044bd42e5083340ad1fd97fbb16d1bcd6791fd8918f/ijson-3.4.0.post0-cp314-cp314-win32.whl", hash = "sha256:f932969fc1fd4449ca141cf5f47ff357656a154a361f28d9ebca0badc5b02297", size = 52882, upload-time = "2025-10-10T05:28:53.708Z" }, - { url = "https://files.pythonhosted.org/packages/e9/f0/008f1ed4e0fc6f6dc7a5a82ecf08a59bb212514e158954374d440d700e6c/ijson-3.4.0.post0-cp314-cp314-win_amd64.whl", hash = "sha256:3ed19b1e4349240773a8ce4a4bfa450892d4a57949c02c515cd6be5a46b7696a", size = 55568, upload-time = "2025-10-10T05:28:54.79Z" }, - { url = "https://files.pythonhosted.org/packages/69/1c/8a199fded709e762aced89bb7086973c837e432dd714bbad78a6ac789c23/ijson-3.4.0.post0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:226447e40ca9340a39ed07d68ea02ee14b52cb4fe649425b256c1f0073531c83", size = 92345, upload-time = "2025-10-10T05:28:55.657Z" }, - { url = "https://files.pythonhosted.org/packages/be/60/04e97f6a403203bd2eb8849570bdce5719d696b5fb96aa2a62566fe7a1d9/ijson-3.4.0.post0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2c88f0669d45d4b1aa017c9b68d378e7cd15d188dfb6f0209adc78b7f45590a7", size = 62029, upload-time = "2025-10-10T05:28:56.561Z" }, - { url = "https://files.pythonhosted.org/packages/2a/97/e88295f9456ba939d90d4603af28fcabda3b443ef55e709e9381df3daa58/ijson-3.4.0.post0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:56b3089dc28c12492d92cc4896d2be585a89ecae34e25d08c1df88f21815cb50", size = 61776, upload-time = "2025-10-10T05:28:57.401Z" }, - { url = "https://files.pythonhosted.org/packages/1b/9f/0e9c236e720c2de887ab0d7cad8a15d2aa55fb449f792437fc99899957a9/ijson-3.4.0.post0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c117321cfa7b749cc1213f9b4c80dc958f0a206df98ec038ae4bcbbdb8463a15", size = 199808, upload-time = "2025-10-10T05:28:58.62Z" }, - { url = "https://files.pythonhosted.org/packages/0e/70/c21de30e7013e074924cd82057acfc5760e7b2cc41180f80770621b0ad36/ijson-3.4.0.post0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8311f48db6a33116db5c81682f08b6e2405501a4b4e460193ae69fec3cd1f87a", size = 217152, upload-time = "2025-10-10T05:28:59.656Z" }, - { url = "https://files.pythonhosted.org/packages/64/78/63a0bcc0707037df4e22bb836451279d850592258c859685a402c27f5d6d/ijson-3.4.0.post0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91c61a3e63e04da648737e6b4abd537df1b46fb8cdf3219b072e790bb3c1a46b", size = 207663, upload-time = "2025-10-10T05:29:00.73Z" }, - { url = "https://files.pythonhosted.org/packages/7d/85/834e9838d69893cb7567e1210be044444213c78f7414aaf1cd241df16078/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1709171023ce82651b2f132575c2e6282e47f64ad67bd3260da476418d0e7895", size = 211157, upload-time = "2025-10-10T05:29:01.87Z" }, - { url = "https://files.pythonhosted.org/packages/2e/9b/9fda503799ebc30397710552e5dedc1d98d9ea6a694e5717415892623a94/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:5f0a72b1e3c0f78551670c12b2fdc1bf05f2796254d9c2055ba319bec2216020", size = 200231, upload-time = "2025-10-10T05:29:02.883Z" }, - { url = "https://files.pythonhosted.org/packages/15/f3/6419d1d5795a16591233d3aa3747b084e82c0c1d7184bdad9be638174560/ijson-3.4.0.post0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b982a3597b0439ce9c8f4cfc929d86c6ed43907908be1e8463a34dc35fe5b258", size = 204825, upload-time = "2025-10-10T05:29:04.242Z" }, - { url = "https://files.pythonhosted.org/packages/1f/8d/a520e6902129c55fa94428ea0a22e8547540d5e7ca30f18b39594a5feea2/ijson-3.4.0.post0-cp314-cp314t-win32.whl", hash = "sha256:4e39bfdc36b0b460ef15a06550a6a385c64c81f7ac205ccff39bd45147918912", size = 55559, upload-time = "2025-10-10T05:29:05.681Z" }, - { url = "https://files.pythonhosted.org/packages/20/67/0ac6dd0045957ba1270b7b1860864f7d8cea4062e70b1083134c587e5768/ijson-3.4.0.post0-cp314-cp314t-win_amd64.whl", hash = "sha256:17e45262a5ddef39894013fb1548ee7094e444c8389eb1a97f86708b19bea03e", size = 58238, upload-time = "2025-10-10T05:29:06.656Z" }, - { url = "https://files.pythonhosted.org/packages/43/66/27cfcea16e85b95e33814eae2052dab187206b8820cdd90aa39d32ffb441/ijson-3.4.0.post0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:add9242f886eae844a7410b84aee2bbb8bdc83c624f227cb1fdb2d0476a96cb1", size = 57029, upload-time = "2025-10-10T05:29:19.733Z" }, - { url = "https://files.pythonhosted.org/packages/b8/1b/df3f1561c6629241fb2f8bd7ea1da14e3c2dd16fe9d7cbc97120870ed09c/ijson-3.4.0.post0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:69718ed41710dfcaa7564b0af42abc05875d4f7aaa24627c808867ef32634bc7", size = 56523, upload-time = "2025-10-10T05:29:20.641Z" }, - { url = "https://files.pythonhosted.org/packages/39/0a/6c6a3221ddecf62b696fde0e864415237e05b9a36ab6685a606b8fb3b5a2/ijson-3.4.0.post0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:636b6eca96c6c43c04629c6b37fad0181662eaacf9877c71c698485637f752f9", size = 70546, upload-time = "2025-10-10T05:29:21.526Z" }, - { url = "https://files.pythonhosted.org/packages/42/cb/edf69755e86a3a9f8b418efd60239cb308af46c7c8e12f869423f51c9851/ijson-3.4.0.post0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb5e73028f6e63d27b3d286069fe350ed80a4ccc493b022b590fea4bb086710d", size = 70532, upload-time = "2025-10-10T05:29:22.718Z" }, - { url = "https://files.pythonhosted.org/packages/96/7e/c8730ea39b8712622cd5a1bdff676098208400e37bb92052ba52f93e2aa1/ijson-3.4.0.post0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:461acf4320219459dabe5ed90a45cb86c9ba8cc6d6db9dad0d9427d42f57794c", size = 67927, upload-time = "2025-10-10T05:29:23.596Z" }, - { url = "https://files.pythonhosted.org/packages/ec/f2/53b6e9bdd2a91202066764eaa74b572ba4dede0fe47a5a26f4de34b7541a/ijson-3.4.0.post0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:a0fedf09c0f6ffa2a99e7e7fd9c5f3caf74e655c1ee015a0797383e99382ebc3", size = 54657, upload-time = "2025-10-10T05:29:24.482Z" }, + { url = "https://files.pythonhosted.org/packages/6e/32/21c1b47a1afb7319944d0b9685c0997a9d574a77b030c82f6a1ac2cef4eb/ijson-3.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ea8dcac10d86adaeead454bc25c97b68d0bda573d5fd6f86f5e21cf8f7906f88", size = 88935, upload-time = "2026-02-24T03:56:40.591Z" }, + { url = "https://files.pythonhosted.org/packages/86/f7/6ac7ebbb3cd767c87cdcbb950a6754afd1c0977756347bfe03eb8e5b866d/ijson-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:92b0495bbb2150bbf14fc5d98fb6d76bcd1c526605a172709e602e6fedc96495", size = 60567, upload-time = "2026-02-24T03:56:41.919Z" }, + { url = "https://files.pythonhosted.org/packages/c4/98/1140de9ae872468a8bc2e87c171228e25e58b1eb696b7fb430f7590fea44/ijson-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7af0c4c8943be8b09a4e57bdc1da6001dae7b36526d4154fe5c8224738d0921f", size = 60620, upload-time = "2026-02-24T03:56:42.764Z" }, + { url = "https://files.pythonhosted.org/packages/60/e1/67dfe0774e4c7ca6ec8702e280e8764d356f3db54358999818cda6df7679/ijson-3.5.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:45887d5e84ff0d2b138c926cebd9071830733968afe8d9d12080b3c178c7f918", size = 126558, upload-time = "2026-02-24T03:56:43.922Z" }, + { url = "https://files.pythonhosted.org/packages/1f/ef/23d614fc773d428caeb6e197218b7e32adcc668ff5b98777039149571208/ijson-3.5.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9a70b575be8e57a28c80e90ed349ad3a851c3478524c70e36e07d6092ecd12c9", size = 133091, upload-time = "2026-02-24T03:56:45.291Z" }, + { url = "https://files.pythonhosted.org/packages/b8/80/99727603cd8a1d32edafa4392f4056b2420bf48c15afd34481c68a2d4435/ijson-3.5.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2adeecd45830bfd5580ca79a584154713aabef0b9607e16249133df5d2859813", size = 130249, upload-time = "2026-02-24T03:56:46.333Z" }, + { url = "https://files.pythonhosted.org/packages/0b/94/3a3d623ca80768e834be8a834ef05960e3b9e79af1a911704ff10c9e8792/ijson-3.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d873e72889e7fc5962ab58909f1adff338d7c2f49e450e5b5fe844eff8155a14", size = 133501, upload-time = "2026-02-24T03:56:47.54Z" }, + { url = "https://files.pythonhosted.org/packages/cf/f6/df2c14ad340834eccee379046f155e4b66a16ddafd445429dee7b3323614/ijson-3.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9a88c559456a79708592234d697645d92b599718f4cbbeaa6515f83ac63ca0ae", size = 128438, upload-time = "2026-02-24T03:56:48.455Z" }, + { url = "https://files.pythonhosted.org/packages/0c/7e/9ff5b8b5fee113f5607bc4149b707382a898eeb545153189b075e5ec8d59/ijson-3.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cf83f58ad50dc0d39a2105cb26d4f359b38f42cef68b913170d4d47d97d97ba5", size = 131116, upload-time = "2026-02-24T03:56:49.737Z" }, + { url = "https://files.pythonhosted.org/packages/64/20/954ce0d440d7cf72a3d8361b14406f9cdbf624b1625c10f8488857c769d6/ijson-3.5.0-cp310-cp310-win32.whl", hash = "sha256:aec4580a7712a19b1f95cd41bed260fc6a31266d37ef941827772a4c199e8143", size = 52724, upload-time = "2026-02-24T03:56:50.932Z" }, + { url = "https://files.pythonhosted.org/packages/24/33/ece87d60502c6115642cbabeb8c122fa982212b392bc4f4ff5aab8e02dac/ijson-3.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a9c4c70501e23e8eb1675330686d1598eebfa14b6f0dbc8f00c2e081cc628fa", size = 55125, upload-time = "2026-02-24T03:56:51.942Z" }, + { url = "https://files.pythonhosted.org/packages/65/da/644343198abca5e0f6e2486063f8d8f3c443ca0ef5e5c890e51ef6032e33/ijson-3.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5616311404b858d32740b7ad8b9a799c62165f5ecb85d0a8ed16c21665a90533", size = 88964, upload-time = "2026-02-24T03:56:53.099Z" }, + { url = "https://files.pythonhosted.org/packages/5b/63/8621190aa2baf96156dfd4c632b6aa9f1464411e50b98750c09acc0505ea/ijson-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9733f94029dd41702d573ef64752e2556e72aea14623d6dbb7a44ca1ccf30fd", size = 60582, upload-time = "2026-02-24T03:56:54.261Z" }, + { url = "https://files.pythonhosted.org/packages/20/31/6a3f041fdd17dacff33b7d7d3ba3df6dca48740108340c6042f974b2ad20/ijson-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db8398c6721b98412a4f618da8022550c8b9c5d9214040646071b5deb4d4a393", size = 60632, upload-time = "2026-02-24T03:56:55.159Z" }, + { url = "https://files.pythonhosted.org/packages/e4/68/474541998abbdecfd46a744536878335de89aceb9f085bff1aaf35575ceb/ijson-3.5.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c061314845c08163b1784b6076ea5f075372461a32e6916f4e5f211fd4130b64", size = 131988, upload-time = "2026-02-24T03:56:56.35Z" }, + { url = "https://files.pythonhosted.org/packages/cd/32/e05ff8b72a44fe9d192f41c5dcbc35cfa87efc280cdbfe539ffaf4a7535e/ijson-3.5.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1111a1c5ac79119c5d6e836f900c1a53844b50a18af38311baa6bb61e2645aca", size = 138669, upload-time = "2026-02-24T03:56:57.555Z" }, + { url = "https://files.pythonhosted.org/packages/49/b5/955a83b031102c7a602e2c06d03aff0a0e584212f09edb94ccc754d203ac/ijson-3.5.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e74aff8c681c24002b61b1822f9511d4c384f324f7dbc08c78538e01fdc9fcb", size = 135093, upload-time = "2026-02-24T03:56:59.267Z" }, + { url = "https://files.pythonhosted.org/packages/e8/f2/30250cfcb4d2766669b31f6732689aab2bb91de426a15a3ebe482df7ee48/ijson-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:739a7229b1b0cc5f7e2785a6e7a5fc915e850d3fed9588d0e89a09f88a417253", size = 138715, upload-time = "2026-02-24T03:57:00.491Z" }, + { url = "https://files.pythonhosted.org/packages/a2/05/785a145d7e75e04e04480d59b6323cd4b1d9013a6cd8643fa635fbc93490/ijson-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ef88712160360cab3ca6471a4e5418243f8b267cf1fe1620879d1b5558babc71", size = 133194, upload-time = "2026-02-24T03:57:01.759Z" }, + { url = "https://files.pythonhosted.org/packages/14/eb/80d6f8a748dead4034cea0939494a67d10ccf88d6413bf6e860393139676/ijson-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ca0d1b6b5f8166a6248f4309497585fb8553b04bc8179a0260fad636cfdb798", size = 135588, upload-time = "2026-02-24T03:57:03.131Z" }, + { url = "https://files.pythonhosted.org/packages/ee/a8/bbc21f9400ebdbca48fab272593e0d1f875691be1e927d264d90d48b8c47/ijson-3.5.0-cp311-cp311-win32.whl", hash = "sha256:966039cf9047c7967febf7b9a52ec6f38f5464a4c7fbb5565e0224b7376fefff", size = 52721, upload-time = "2026-02-24T03:57:04.365Z" }, + { url = "https://files.pythonhosted.org/packages/0d/2e/4e8c0208b8f920ee80c88c956f93e78318f2cfb646455353b182738b490c/ijson-3.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:6bad6a1634cb7c9f3f4c7e52325283b35b565f5b6cc27d42660c6912ce883422", size = 55121, upload-time = "2026-02-24T03:57:05.498Z" }, + { url = "https://files.pythonhosted.org/packages/aa/17/9c63c7688025f3a8c47ea717b8306649c8c7244e49e20a2be4e3515dc75c/ijson-3.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1ebefbe149a6106cc848a3eaf536af51a9b5ccc9082de801389f152dba6ab755", size = 88536, upload-time = "2026-02-24T03:57:06.809Z" }, + { url = "https://files.pythonhosted.org/packages/6f/dd/e15c2400244c117b06585452ebc63ae254f5a6964f712306afd1422daae0/ijson-3.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:19e30d9f00f82e64de689c0b8651b9cfed879c184b139d7e1ea5030cec401c21", size = 60499, upload-time = "2026-02-24T03:57:09.155Z" }, + { url = "https://files.pythonhosted.org/packages/77/a9/bf4fe3538a0c965f16b406f180a06105b875da83f0743e36246be64ef550/ijson-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a04a33ee78a6f27b9b8528c1ca3c207b1df3b8b867a4cf2fcc4109986f35c227", size = 60330, upload-time = "2026-02-24T03:57:10.574Z" }, + { url = "https://files.pythonhosted.org/packages/31/76/6f91bdb019dd978fce1bc5ea1cd620cfc096d258126c91db2c03a20a7f34/ijson-3.5.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7d48dc2984af02eb3c56edfb3f13b3f62f2f3e4fe36f058c8cfc75d93adf4fed", size = 138977, upload-time = "2026-02-24T03:57:11.932Z" }, + { url = "https://files.pythonhosted.org/packages/11/be/bbc983059e48a54b0121ee60042979faed7674490bbe7b2c41560db3f436/ijson-3.5.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1e73a44844d9adbca9cf2c4132cd875933e83f3d4b23881fcaf82be83644c7d", size = 149785, upload-time = "2026-02-24T03:57:13.255Z" }, + { url = "https://files.pythonhosted.org/packages/6d/81/2fee58f9024a3449aee83edfa7167fb5ccd7e1af2557300e28531bb68e16/ijson-3.5.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7389a56b8562a19948bdf1d7bae3a2edc8c7f86fb59834dcb1c4c722818e645a", size = 149729, upload-time = "2026-02-24T03:57:14.191Z" }, + { url = "https://files.pythonhosted.org/packages/c7/56/f1706761fcc096c9d414b3dcd000b1e6e5c24364c21cfba429837f98ee8d/ijson-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3176f23f8ebec83f374ed0c3b4e5a0c4db7ede54c005864efebbed46da123608", size = 150697, upload-time = "2026-02-24T03:57:15.855Z" }, + { url = "https://files.pythonhosted.org/packages/d9/6e/ee0d9c875a0193b632b3e9ccd1b22a50685fb510256ad57ba483b6529f77/ijson-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6babd88e508630c6ef86c9bebaaf13bb2fb8ec1d8f8868773a03c20253f599bc", size = 142873, upload-time = "2026-02-24T03:57:16.831Z" }, + { url = "https://files.pythonhosted.org/packages/d2/bf/f9d4399d0e6e3fd615035290a71e97c843f17f329b43638c0a01cf112d73/ijson-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dc1b3836b174b6db2fa8319f1926fb5445abd195dc963368092103f8579cb8ed", size = 151583, upload-time = "2026-02-24T03:57:17.757Z" }, + { url = "https://files.pythonhosted.org/packages/b2/71/a7254a065933c0e2ffd3586f46187d84830d3d7b6f41cfa5901820a4f87d/ijson-3.5.0-cp312-cp312-win32.whl", hash = "sha256:6673de9395fb9893c1c79a43becd8c8fbee0a250be6ea324bfd1487bb5e9ee4c", size = 53079, upload-time = "2026-02-24T03:57:18.703Z" }, + { url = "https://files.pythonhosted.org/packages/8f/7b/2edca79b359fc9f95d774616867a03ecccdf333797baf5b3eea79733918c/ijson-3.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:f4f7fabd653459dcb004175235f310435959b1bb5dfa8878578391c6cc9ad944", size = 55500, upload-time = "2026-02-24T03:57:20.428Z" }, + { url = "https://files.pythonhosted.org/packages/a2/71/d67e764a712c3590627480643a3b51efcc3afa4ef3cb54ee4c989073c97e/ijson-3.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e9cedc10e40dd6023c351ed8bfc7dcfce58204f15c321c3c1546b9c7b12562a4", size = 88544, upload-time = "2026-02-24T03:57:21.293Z" }, + { url = "https://files.pythonhosted.org/packages/1a/39/f1c299371686153fa3cf5c0736b96247a87a1bee1b7145e6d21f359c505a/ijson-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3647649f782ee06c97490b43680371186651f3f69bebe64c6083ee7615d185e5", size = 60495, upload-time = "2026-02-24T03:57:22.501Z" }, + { url = "https://files.pythonhosted.org/packages/16/94/b1438e204d75e01541bebe3e668fe3e68612d210e9931ae1611062dd0a56/ijson-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90e74be1dce05fce73451c62d1118671f78f47c9f6be3991c82b91063bf01fc9", size = 60325, upload-time = "2026-02-24T03:57:23.332Z" }, + { url = "https://files.pythonhosted.org/packages/30/e2/4aa9c116fa86cc8b0f574f3c3a47409edc1cd4face05d0e589a5a176b05d/ijson-3.5.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78e9ad73e7be2dd80627504bd5cbf512348c55ce2c06e362ed7683b5220e8568", size = 138774, upload-time = "2026-02-24T03:57:24.683Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d2/738b88752a70c3be1505faa4dcd7110668c2712e582a6a36488ed1e295d4/ijson-3.5.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9577449313cc94be89a4fe4b3e716c65f09cc19636d5a6b2861c4e80dddebd58", size = 149820, upload-time = "2026-02-24T03:57:26.062Z" }, + { url = "https://files.pythonhosted.org/packages/ed/df/0b3ab9f393ca8f72ea03bc896ba9fdc987e90ae08cdb51c32a4ee0c14d5e/ijson-3.5.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e4c1178fb50aff5f5701a30a5152ead82a14e189ce0f6102fa1b5f10b2f54ff", size = 149747, upload-time = "2026-02-24T03:57:27.308Z" }, + { url = "https://files.pythonhosted.org/packages/cc/a3/b0037119f75131b78cb00acc2657b1a9d0435475f1f2c5f8f5a170b66b9c/ijson-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0eb402ab026ffb37a918d75af2b7260fe6cfbce13232cc83728a714dd30bd81d", size = 151027, upload-time = "2026-02-24T03:57:28.522Z" }, + { url = "https://files.pythonhosted.org/packages/22/a0/cb344de1862bf09d8f769c9d25c944078c87dd59a1b496feec5ad96309a4/ijson-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5b08ee08355f9f729612a8eb9bf69cc14f9310c3b2a487c6f1c3c65d85216ec4", size = 142996, upload-time = "2026-02-24T03:57:29.774Z" }, + { url = "https://files.pythonhosted.org/packages/ca/32/a8ffd67182e02ea61f70f62daf43ded4fa8a830a2520a851d2782460aba8/ijson-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bda62b6d48442903e7bf56152108afb7f0f1293c2b9bef2f2c369defea76ab18", size = 152068, upload-time = "2026-02-24T03:57:30.969Z" }, + { url = "https://files.pythonhosted.org/packages/3c/d1/3578df8e75d446aab0ae92e27f641341f586b85e1988536adebc65300cb4/ijson-3.5.0-cp313-cp313-win32.whl", hash = "sha256:8d073d9b13574cfa11083cc7267c238b7a6ed563c2661e79192da4a25f09c82c", size = 53065, upload-time = "2026-02-24T03:57:31.93Z" }, + { url = "https://files.pythonhosted.org/packages/fb/a2/f7cdaf5896710da3e69e982e44f015a83d168aa0f3a89b6f074b5426779d/ijson-3.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:2419f9e32e0968a876b04d8f26aeac042abd16f582810b576936bbc4c6015069", size = 55499, upload-time = "2026-02-24T03:57:32.773Z" }, + { url = "https://files.pythonhosted.org/packages/42/65/13e2492d17e19a2084523e18716dc2809159f2287fd2700c735f311e76c4/ijson-3.5.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4d4b0cd676b8c842f7648c1a783448fac5cd3b98289abd83711b3e275e143524", size = 93019, upload-time = "2026-02-24T03:57:33.976Z" }, + { url = "https://files.pythonhosted.org/packages/33/92/483fc97ece0c3f1cecabf48f6a7a36e89d19369eec462faaeaa34c788992/ijson-3.5.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:252dec3680a48bb82d475e36b4ae1b3a9d7eb690b951bb98a76c5fe519e30188", size = 62714, upload-time = "2026-02-24T03:57:34.819Z" }, + { url = "https://files.pythonhosted.org/packages/4b/88/793fe020a0fe9d9eed4c285cf4a5cfdb0a935708b3bde0d72f35c794b513/ijson-3.5.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:aa1b5dca97d323931fde2501172337384c958914d81a9dac7f00f0d4bfc76bc7", size = 62460, upload-time = "2026-02-24T03:57:35.874Z" }, + { url = "https://files.pythonhosted.org/packages/51/69/f1a2690aa8d4df1f4e262b385e65a933ffdc250b091531bac9a449c19e16/ijson-3.5.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7a5ec7fd86d606094bba6f6f8f87494897102fa4584ef653f3005c51a784c320", size = 199273, upload-time = "2026-02-24T03:57:37.07Z" }, + { url = "https://files.pythonhosted.org/packages/ea/a2/f1346d5299e79b988ab472dc773d5381ec2d57c23cb2f1af3ede4a810e62/ijson-3.5.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009f41443e1521847701c6d87fa3923c0b1961be3c7e7de90947c8cb92ea7c44", size = 216884, upload-time = "2026-02-24T03:57:38.346Z" }, + { url = "https://files.pythonhosted.org/packages/28/3c/8b637e869be87799e6c2c3c275a30a546f086b1aed77e2b7f11512168c5a/ijson-3.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4c3651d1f9fe2839a93fdf8fd1d5ca3a54975349894249f3b1b572bcc4bd577", size = 207306, upload-time = "2026-02-24T03:57:39.718Z" }, + { url = "https://files.pythonhosted.org/packages/7f/7c/18b1c1df6951ca056782d7580ec40cea4ff9a27a0947d92640d1cc8c4ae3/ijson-3.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:945b7abcfcfeae2cde17d8d900870f03536494245dda7ad4f8d056faa303256c", size = 211364, upload-time = "2026-02-24T03:57:40.953Z" }, + { url = "https://files.pythonhosted.org/packages/f3/55/e795812e82851574a9dba8a53fde045378f531ef14110c6fb55dbd23b443/ijson-3.5.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:0574b0a841ff97495c13e9d7260fbf3d85358b061f540c52a123db9dbbaa2ed6", size = 200608, upload-time = "2026-02-24T03:57:42.272Z" }, + { url = "https://files.pythonhosted.org/packages/5c/cd/013c85b4749b57a4cb4c2670014d1b32b8db4ab1a7be92ea7aeb5d7fe7b5/ijson-3.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f969ffb2b89c5cdf686652d7fb66252bc72126fa54d416317411497276056a18", size = 205127, upload-time = "2026-02-24T03:57:43.286Z" }, + { url = "https://files.pythonhosted.org/packages/0e/7c/faf643733e3ab677f180018f6a855c4ef70b7c46540987424c563c959e42/ijson-3.5.0-cp313-cp313t-win32.whl", hash = "sha256:59d3f9f46deed1332ad669518b8099920512a78bda64c1f021fcd2aff2b36693", size = 55282, upload-time = "2026-02-24T03:57:44.353Z" }, + { url = "https://files.pythonhosted.org/packages/69/22/94ddb47c24b491377aca06cd8fc9202cad6ab50619842457d2beefde21ea/ijson-3.5.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c2839fa233746d8aad3b8cd2354e441613f5df66d721d59da4a09394bd1db2b", size = 58016, upload-time = "2026-02-24T03:57:45.237Z" }, + { url = "https://files.pythonhosted.org/packages/7a/93/0868efe753dc1df80cc405cf0c1f2527a6991643607c741bff8dcb899b3b/ijson-3.5.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:25a5a6b2045c90bb83061df27cfa43572afa43ba9408611d7bfe237c20a731a9", size = 89094, upload-time = "2026-02-24T03:57:46.115Z" }, + { url = "https://files.pythonhosted.org/packages/24/94/fd5a832a0df52ef5e4e740f14ac8640725d61034a1b0c561e8b5fb424706/ijson-3.5.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8976c54c0b864bc82b951bae06567566ac77ef63b90a773a69cd73aab47f4f4f", size = 60715, upload-time = "2026-02-24T03:57:47.552Z" }, + { url = "https://files.pythonhosted.org/packages/70/79/1b9a90af5732491f9eec751ee211b86b11011e1158c555c06576d52c3919/ijson-3.5.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:859eb2038f7f1b0664df4241957694cc35e6295992d71c98659b22c69b3cbc10", size = 60638, upload-time = "2026-02-24T03:57:48.428Z" }, + { url = "https://files.pythonhosted.org/packages/23/6f/2c551ea980fe56f68710a8d5389cfbd015fc45aaafd17c3c52c346db6aa1/ijson-3.5.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c911aa02991c7c0d3639b6619b93a93210ff1e7f58bf7225d613abea10adc78e", size = 140667, upload-time = "2026-02-24T03:57:49.314Z" }, + { url = "https://files.pythonhosted.org/packages/25/0e/27b887879ba6a5bc29766e3c5af4942638c952220fd63e1e442674f7883a/ijson-3.5.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:903cbdc350173605220edc19796fbea9b2203c8b3951fb7335abfa8ed37afda8", size = 149850, upload-time = "2026-02-24T03:57:50.329Z" }, + { url = "https://files.pythonhosted.org/packages/da/1e/23e10e1bc04bf31193b21e2960dce14b17dbd5d0c62204e8401c59d62c08/ijson-3.5.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4549d96ded5b8efa71639b2160235415f6bdb8c83367615e2dbabcb72755c33", size = 149206, upload-time = "2026-02-24T03:57:51.261Z" }, + { url = "https://files.pythonhosted.org/packages/8e/90/e552f6495063b235cf7fa2c592f6597c057077195e517b842a0374fd470c/ijson-3.5.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6b2dcf6349e6042d83f3f8c39ce84823cf7577eba25bac5aae5e39bbbbbe9c1c", size = 150438, upload-time = "2026-02-24T03:57:52.198Z" }, + { url = "https://files.pythonhosted.org/packages/5c/18/45bf8f297c41b42a1c231d261141097babd953d2c28a07be57ae4c3a1a02/ijson-3.5.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:e44af39e6f8a17e5627dcd89715d8279bf3474153ff99aae031a936e5c5572e5", size = 144369, upload-time = "2026-02-24T03:57:53.22Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3a/deb9772bb2c0cead7ad64f00c3598eec9072bdf511818e70e2c512eeabbe/ijson-3.5.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9260332304b7e7828db56d43f08fc970a3ab741bf84ff10189361ea1b60c395b", size = 151352, upload-time = "2026-02-24T03:57:54.375Z" }, + { url = "https://files.pythonhosted.org/packages/e4/51/67f4d80cd58ad7eab0cd1af5fe28b961886338956b2f88c0979e21914346/ijson-3.5.0-cp314-cp314-win32.whl", hash = "sha256:63bc8121bb422f6969ced270173a3fa692c29d4ae30c860a2309941abd81012a", size = 53610, upload-time = "2026-02-24T03:57:55.655Z" }, + { url = "https://files.pythonhosted.org/packages/70/d3/263672ea22983ba3940f1534316dbc9200952c1c2a2332d7a664e4eaa7ae/ijson-3.5.0-cp314-cp314-win_amd64.whl", hash = "sha256:01b6dad72b7b7df225ef970d334556dfad46c696a2c6767fb5d9ed8889728bca", size = 56301, upload-time = "2026-02-24T03:57:56.584Z" }, + { url = "https://files.pythonhosted.org/packages/9f/d9/86f7fac35e0835faa188085ae0579e813493d5261ce056484015ad533445/ijson-3.5.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:2ea4b676ec98e374c1df400a47929859e4fa1239274339024df4716e802aa7e4", size = 93069, upload-time = "2026-02-24T03:57:57.849Z" }, + { url = "https://files.pythonhosted.org/packages/33/d2/e7366ed9c6e60228d35baf4404bac01a126e7775ea8ce57f560125ed190a/ijson-3.5.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:014586eec043e23c80be9a923c56c3a0920a0f1f7d17478ce7bc20ba443968ef", size = 62767, upload-time = "2026-02-24T03:57:58.758Z" }, + { url = "https://files.pythonhosted.org/packages/35/8b/3e703e8cc4b3ada79f13b28070b51d9550c578f76d1968657905857b2ddd/ijson-3.5.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5b8b886b0248652d437f66e7c5ac318bbdcb2c7137a7e5327a68ca00b286f5f", size = 62467, upload-time = "2026-02-24T03:58:00.261Z" }, + { url = "https://files.pythonhosted.org/packages/21/42/0c91af32c1ee8a957fdac2e051b5780756d05fd34e4b60d94a08d51bac1d/ijson-3.5.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:498fd46ae2349297e43acf97cdc421e711dbd7198418677259393d2acdc62d78", size = 200447, upload-time = "2026-02-24T03:58:01.591Z" }, + { url = "https://files.pythonhosted.org/packages/f9/80/796ea0e391b7e2d45c5b1b451734bba03f81c2984cf955ea5eaa6c4920ad/ijson-3.5.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22a51b4f9b81f12793731cf226266d1de2112c3c04ba4a04117ad4e466897e05", size = 217820, upload-time = "2026-02-24T03:58:02.598Z" }, + { url = "https://files.pythonhosted.org/packages/38/14/52b6613fdda4078c62eb5b4fe3efc724ddc55a4ad524c93de51830107aa3/ijson-3.5.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9636c710dc4ac4a281baa266a64f323b4cc165cec26836af702c44328b59a515", size = 208310, upload-time = "2026-02-24T03:58:04.759Z" }, + { url = "https://files.pythonhosted.org/packages/6a/ad/8b3105a78774fd4a65e534a21d975ef3a77e189489fe3029ebcaeba5e243/ijson-3.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f7168a39e8211107666d71b25693fd1b2bac0b33735ef744114c403c6cac21e1", size = 211843, upload-time = "2026-02-24T03:58:05.836Z" }, + { url = "https://files.pythonhosted.org/packages/36/ab/a2739f6072d6e1160581bc3ed32da614c8cced023dcd519d9c5fa66e0425/ijson-3.5.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8696454245415bc617ab03b0dc3ae4c86987df5dc6a90bad378fe72c5409d89e", size = 200906, upload-time = "2026-02-24T03:58:07.788Z" }, + { url = "https://files.pythonhosted.org/packages/6d/5e/e06c2de3c3d4a9cfb655c1ad08a68fb72838d271072cdd3196576ac4431a/ijson-3.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c21bfb61f71f191565885bf1bc29e0a186292d866b4880637b833848360bdc1b", size = 205495, upload-time = "2026-02-24T03:58:09.163Z" }, + { url = "https://files.pythonhosted.org/packages/7c/11/778201eb2e202ddd76b36b0fb29bf3d8e3c167389d8aa883c62524e49f47/ijson-3.5.0-cp314-cp314t-win32.whl", hash = "sha256:a2619460d6795b70d0155e5bf016200ac8a63ab5397aa33588bb02b6c21759e6", size = 56280, upload-time = "2026-02-24T03:58:10.116Z" }, + { url = "https://files.pythonhosted.org/packages/23/28/96711503245339084c8086b892c47415895eba49782d6cc52d9f4ee50301/ijson-3.5.0-cp314-cp314t-win_amd64.whl", hash = "sha256:4f24b78d4ef028d17eb57ad1b16c0aed4a17bdd9badbf232dc5d9305b7e13854", size = 58965, upload-time = "2026-02-24T03:58:11.278Z" }, + { url = "https://files.pythonhosted.org/packages/d9/3b/d31ecfa63a218978617446159f3d77aab2417a5bd2885c425b176353ff78/ijson-3.5.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d64c624da0e9d692d6eb0ff63a79656b59d76bf80773a17c5b0f835e4e8ef627", size = 57715, upload-time = "2026-02-24T03:58:24.545Z" }, + { url = "https://files.pythonhosted.org/packages/30/51/b170e646d378e8cccf9637c05edb5419b00c2c4df64b0258c3af5355608e/ijson-3.5.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:876f7df73b7e0d6474f9caa729b9cdbfc8e76de9075a4887dfd689e29e85c4ca", size = 57205, upload-time = "2026-02-24T03:58:25.681Z" }, + { url = "https://files.pythonhosted.org/packages/ef/83/44dbd0231b0a8c6c14d27473d10c4e27dfbce7d5d9a833c79e3e6c33eb40/ijson-3.5.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e7dbff2c8d9027809b0cde663df44f3210da10ea377121d42896fb6ee405dd31", size = 71229, upload-time = "2026-02-24T03:58:27.103Z" }, + { url = "https://files.pythonhosted.org/packages/c8/98/cf84048b7c6cec888826e696a31f45bee7ebcac15e532b6be1fc4c2c9608/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4217a1edc278660679e1197c83a1a2a2d367792bfbb2a3279577f4b59b93730d", size = 71217, upload-time = "2026-02-24T03:58:28.021Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0a/e34c729a87ff67dc6540f6bcc896626158e691d433ab57db0086d73decd2/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04f0fc740311388ee745ba55a12292b722d6f52000b11acbb913982ba5fbdf87", size = 68618, upload-time = "2026-02-24T03:58:28.918Z" }, + { url = "https://files.pythonhosted.org/packages/c1/0f/e849d072f2e0afe49627de3995fc9dae54b4c804c70c0840f928d95c10e1/ijson-3.5.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:fdeee6957f92e0c114f65c55cf8fe7eabb80cfacab64eea6864060913173f66d", size = 55369, upload-time = "2026-02-24T03:58:29.839Z" }, ] [[package]] @@ -2957,7 +2963,8 @@ version = "0.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "espeakng-loader" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "onnxruntime" }, { name = "phonemizer-fork" }, ] @@ -2996,7 +3003,8 @@ dependencies = [ { name = "langchain" }, { name = "langchain-core" }, { name = "langsmith" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pydantic-settings" }, { name = "pyyaml" }, { name = "requests" }, @@ -3055,7 +3063,7 @@ wheels = [ [[package]] name = "langsmith" -version = "0.7.5" +version = "0.7.9" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, @@ -3068,9 +3076,9 @@ dependencies = [ { name = "xxhash" }, { name = "zstandard" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e9/55/a3641cae990c842d3f4c52e5308b391267c98ce531a7a586dfedf1a78c42/langsmith-0.7.5.tar.gz", hash = "sha256:e3bfc2d7ff0a6f9a719125e1e136b5f4fa11828a2be8979f47ee1a4c0510030e", size = 1038926, upload-time = "2026-02-19T20:47:51.144Z" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/01/c26b1d3a68764acd050cbb98f3ca922a25b3e4ece5768ee868f56206b4d4/langsmith-0.7.9.tar.gz", hash = "sha256:c6dfcc4cb8fea249714ac60a1963faa84cc59ded9cd1882794ffce8a8d1d1588", size = 1136295, upload-time = "2026-02-27T22:37:59.309Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/32/0e/65b3fab6db843150ed38f226b39213565c644f0aaa515e0168bb1eaee5ae/langsmith-0.7.5-py3-none-any.whl", hash = "sha256:c120c43c98af5f5af8877341f8256aba1a170a292645b31572f06b0cf703c683", size = 324337, upload-time = "2026-02-19T20:47:47.537Z" }, + { url = "https://files.pythonhosted.org/packages/b6/c9/2d5e5f654f97a4d38a0ff1b3004751c2cd81ceca05d603174e49f942b196/langsmith-0.7.9-py3-none-any.whl", hash = "sha256:e73478f4c4ae9b7407e0fcdced181f9f8b0e024c62a1552dbf0667ef6b19e82d", size = 344099, upload-time = "2026-02-27T22:37:57.497Z" }, ] [[package]] @@ -3088,7 +3096,8 @@ version = "1.0.25" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "protobuf" }, { name = "types-protobuf" }, ] @@ -3132,30 +3141,30 @@ wheels = [ [[package]] name = "llvmlite" -version = "0.44.0" +version = "0.46.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/89/6a/95a3d3610d5c75293d5dbbb2a76480d5d4eeba641557b69fe90af6c5b84e/llvmlite-0.44.0.tar.gz", hash = "sha256:07667d66a5d150abed9157ab6c0b9393c9356f229784a4385c02f99e94fc94d4", size = 171880, upload-time = "2025-01-20T11:14:41.342Z" } +sdist = { url = "https://files.pythonhosted.org/packages/74/cd/08ae687ba099c7e3d21fe2ea536500563ef1943c5105bf6ab4ee3829f68e/llvmlite-0.46.0.tar.gz", hash = "sha256:227c9fd6d09dce2783c18b754b7cd9d9b3b3515210c46acc2d3c5badd9870ceb", size = 193456, upload-time = "2025-12-08T18:15:36.295Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/41/75/d4863ddfd8ab5f6e70f4504cf8cc37f4e986ec6910f4ef8502bb7d3c1c71/llvmlite-0.44.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9fbadbfba8422123bab5535b293da1cf72f9f478a65645ecd73e781f962ca614", size = 28132306, upload-time = "2025-01-20T11:12:18.634Z" }, - { url = "https://files.pythonhosted.org/packages/37/d9/6e8943e1515d2f1003e8278819ec03e4e653e2eeb71e4d00de6cfe59424e/llvmlite-0.44.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cccf8eb28f24840f2689fb1a45f9c0f7e582dd24e088dcf96e424834af11f791", size = 26201096, upload-time = "2025-01-20T11:12:24.544Z" }, - { url = "https://files.pythonhosted.org/packages/aa/46/8ffbc114def88cc698906bf5acab54ca9fdf9214fe04aed0e71731fb3688/llvmlite-0.44.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7202b678cdf904823c764ee0fe2dfe38a76981f4c1e51715b4cb5abb6cf1d9e8", size = 42361859, upload-time = "2025-01-20T11:12:31.839Z" }, - { url = "https://files.pythonhosted.org/packages/30/1c/9366b29ab050a726af13ebaae8d0dff00c3c58562261c79c635ad4f5eb71/llvmlite-0.44.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:40526fb5e313d7b96bda4cbb2c85cd5374e04d80732dd36a282d72a560bb6408", size = 41184199, upload-time = "2025-01-20T11:12:40.049Z" }, - { url = "https://files.pythonhosted.org/packages/69/07/35e7c594b021ecb1938540f5bce543ddd8713cff97f71d81f021221edc1b/llvmlite-0.44.0-cp310-cp310-win_amd64.whl", hash = "sha256:41e3839150db4330e1b2716c0be3b5c4672525b4c9005e17c7597f835f351ce2", size = 30332381, upload-time = "2025-01-20T11:12:47.054Z" }, - { url = "https://files.pythonhosted.org/packages/b5/e2/86b245397052386595ad726f9742e5223d7aea999b18c518a50e96c3aca4/llvmlite-0.44.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:eed7d5f29136bda63b6d7804c279e2b72e08c952b7c5df61f45db408e0ee52f3", size = 28132305, upload-time = "2025-01-20T11:12:53.936Z" }, - { url = "https://files.pythonhosted.org/packages/ff/ec/506902dc6870249fbe2466d9cf66d531265d0f3a1157213c8f986250c033/llvmlite-0.44.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ace564d9fa44bb91eb6e6d8e7754977783c68e90a471ea7ce913bff30bd62427", size = 26201090, upload-time = "2025-01-20T11:12:59.847Z" }, - { url = "https://files.pythonhosted.org/packages/99/fe/d030f1849ebb1f394bb3f7adad5e729b634fb100515594aca25c354ffc62/llvmlite-0.44.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5d22c3bfc842668168a786af4205ec8e3ad29fb1bc03fd11fd48460d0df64c1", size = 42361858, upload-time = "2025-01-20T11:13:07.623Z" }, - { url = "https://files.pythonhosted.org/packages/d7/7a/ce6174664b9077fc673d172e4c888cb0b128e707e306bc33fff8c2035f0d/llvmlite-0.44.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f01a394e9c9b7b1d4e63c327b096d10f6f0ed149ef53d38a09b3749dcf8c9610", size = 41184200, upload-time = "2025-01-20T11:13:20.058Z" }, - { url = "https://files.pythonhosted.org/packages/5f/c6/258801143975a6d09a373f2641237992496e15567b907a4d401839d671b8/llvmlite-0.44.0-cp311-cp311-win_amd64.whl", hash = "sha256:d8489634d43c20cd0ad71330dde1d5bc7b9966937a263ff1ec1cebb90dc50955", size = 30331193, upload-time = "2025-01-20T11:13:26.976Z" }, - { url = "https://files.pythonhosted.org/packages/15/86/e3c3195b92e6e492458f16d233e58a1a812aa2bfbef9bdd0fbafcec85c60/llvmlite-0.44.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:1d671a56acf725bf1b531d5ef76b86660a5ab8ef19bb6a46064a705c6ca80aad", size = 28132297, upload-time = "2025-01-20T11:13:32.57Z" }, - { url = "https://files.pythonhosted.org/packages/d6/53/373b6b8be67b9221d12b24125fd0ec56b1078b660eeae266ec388a6ac9a0/llvmlite-0.44.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f79a728e0435493611c9f405168682bb75ffd1fbe6fc360733b850c80a026db", size = 26201105, upload-time = "2025-01-20T11:13:38.744Z" }, - { url = "https://files.pythonhosted.org/packages/cb/da/8341fd3056419441286c8e26bf436923021005ece0bff5f41906476ae514/llvmlite-0.44.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0143a5ef336da14deaa8ec26c5449ad5b6a2b564df82fcef4be040b9cacfea9", size = 42361901, upload-time = "2025-01-20T11:13:46.711Z" }, - { url = "https://files.pythonhosted.org/packages/53/ad/d79349dc07b8a395a99153d7ce8b01d6fcdc9f8231355a5df55ded649b61/llvmlite-0.44.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d752f89e31b66db6f8da06df8b39f9b91e78c5feea1bf9e8c1fba1d1c24c065d", size = 41184247, upload-time = "2025-01-20T11:13:56.159Z" }, - { url = "https://files.pythonhosted.org/packages/e2/3b/a9a17366af80127bd09decbe2a54d8974b6d8b274b39bf47fbaedeec6307/llvmlite-0.44.0-cp312-cp312-win_amd64.whl", hash = "sha256:eae7e2d4ca8f88f89d315b48c6b741dcb925d6a1042da694aa16ab3dd4cbd3a1", size = 30332380, upload-time = "2025-01-20T11:14:02.442Z" }, - { url = "https://files.pythonhosted.org/packages/89/24/4c0ca705a717514c2092b18476e7a12c74d34d875e05e4d742618ebbf449/llvmlite-0.44.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:319bddd44e5f71ae2689859b7203080716448a3cd1128fb144fe5c055219d516", size = 28132306, upload-time = "2025-01-20T11:14:09.035Z" }, - { url = "https://files.pythonhosted.org/packages/01/cf/1dd5a60ba6aee7122ab9243fd614abcf22f36b0437cbbe1ccf1e3391461c/llvmlite-0.44.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c58867118bad04a0bb22a2e0068c693719658105e40009ffe95c7000fcde88e", size = 26201090, upload-time = "2025-01-20T11:14:15.401Z" }, - { url = "https://files.pythonhosted.org/packages/d2/1b/656f5a357de7135a3777bd735cc7c9b8f23b4d37465505bd0eaf4be9befe/llvmlite-0.44.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46224058b13c96af1365290bdfebe9a6264ae62fb79b2b55693deed11657a8bf", size = 42361904, upload-time = "2025-01-20T11:14:22.949Z" }, - { url = "https://files.pythonhosted.org/packages/d8/e1/12c5f20cb9168fb3464a34310411d5ad86e4163c8ff2d14a2b57e5cc6bac/llvmlite-0.44.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0097052c32bf721a4efc03bd109d335dfa57d9bffb3d4c24cc680711b8b4fc", size = 41184245, upload-time = "2025-01-20T11:14:31.731Z" }, - { url = "https://files.pythonhosted.org/packages/d0/81/e66fc86539293282fd9cb7c9417438e897f369e79ffb62e1ae5e5154d4dd/llvmlite-0.44.0-cp313-cp313-win_amd64.whl", hash = "sha256:2fb7c4f2fb86cbae6dca3db9ab203eeea0e22d73b99bc2341cdf9de93612e930", size = 30331193, upload-time = "2025-01-20T11:14:38.578Z" }, + { url = "https://files.pythonhosted.org/packages/3d/a4/3959e1c61c5ca9db7921e5fd115b344c29b9d57a5dadd87bef97963ca1a5/llvmlite-0.46.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4323177e936d61ae0f73e653e2e614284d97d14d5dd12579adc92b6c2b0597b0", size = 37232766, upload-time = "2025-12-08T18:14:34.765Z" }, + { url = "https://files.pythonhosted.org/packages/c2/a5/a4d916f1015106e1da876028606a8e87fd5d5c840f98c87bc2d5153b6a2f/llvmlite-0.46.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a2d461cb89537b7c20feb04c46c32e12d5ad4f0896c9dfc0f60336219ff248e", size = 56275176, upload-time = "2025-12-08T18:14:37.944Z" }, + { url = "https://files.pythonhosted.org/packages/79/7f/a7f2028805dac8c1a6fae7bda4e739b7ebbcd45b29e15bf6d21556fcd3d5/llvmlite-0.46.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b1f6595a35b7b39c3518b85a28bf18f45e075264e4b2dce3f0c2a4f232b4a910", size = 55128629, upload-time = "2025-12-08T18:14:41.674Z" }, + { url = "https://files.pythonhosted.org/packages/b2/bc/4689e1ba0c073c196b594471eb21be0aa51d9e64b911728aa13cd85ef0ae/llvmlite-0.46.0-cp310-cp310-win_amd64.whl", hash = "sha256:e7a34d4aa6f9a97ee006b504be6d2b8cb7f755b80ab2f344dda1ef992f828559", size = 38138651, upload-time = "2025-12-08T18:14:45.845Z" }, + { url = "https://files.pythonhosted.org/packages/7a/a1/2ad4b2367915faeebe8447f0a057861f646dbf5fbbb3561db42c65659cf3/llvmlite-0.46.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:82f3d39b16f19aa1a56d5fe625883a6ab600d5cc9ea8906cca70ce94cabba067", size = 37232766, upload-time = "2025-12-08T18:14:48.836Z" }, + { url = "https://files.pythonhosted.org/packages/12/b5/99cf8772fdd846c07da4fd70f07812a3c8fd17ea2409522c946bb0f2b277/llvmlite-0.46.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a3df43900119803bbc52720e758c76f316a9a0f34612a886862dfe0a5591a17e", size = 56275175, upload-time = "2025-12-08T18:14:51.604Z" }, + { url = "https://files.pythonhosted.org/packages/38/f2/ed806f9c003563732da156139c45d970ee435bd0bfa5ed8de87ba972b452/llvmlite-0.46.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de183fefc8022d21b0aa37fc3e90410bc3524aed8617f0ff76732fc6c3af5361", size = 55128630, upload-time = "2025-12-08T18:14:55.107Z" }, + { url = "https://files.pythonhosted.org/packages/19/0c/8f5a37a65fc9b7b17408508145edd5f86263ad69c19d3574e818f533a0eb/llvmlite-0.46.0-cp311-cp311-win_amd64.whl", hash = "sha256:e8b10bc585c58bdffec9e0c309bb7d51be1f2f15e169a4b4d42f2389e431eb93", size = 38138652, upload-time = "2025-12-08T18:14:58.171Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f8/4db016a5e547d4e054ff2f3b99203d63a497465f81ab78ec8eb2ff7b2304/llvmlite-0.46.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b9588ad4c63b4f0175a3984b85494f0c927c6b001e3a246a3a7fb3920d9a137", size = 37232767, upload-time = "2025-12-08T18:15:00.737Z" }, + { url = "https://files.pythonhosted.org/packages/aa/85/4890a7c14b4fa54400945cb52ac3cd88545bbdb973c440f98ca41591cdc5/llvmlite-0.46.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3535bd2bb6a2d7ae4012681ac228e5132cdb75fefb1bcb24e33f2f3e0c865ed4", size = 56275176, upload-time = "2025-12-08T18:15:03.936Z" }, + { url = "https://files.pythonhosted.org/packages/6a/07/3d31d39c1a1a08cd5337e78299fca77e6aebc07c059fbd0033e3edfab45c/llvmlite-0.46.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4cbfd366e60ff87ea6cc62f50bc4cd800ebb13ed4c149466f50cf2163a473d1e", size = 55128630, upload-time = "2025-12-08T18:15:07.196Z" }, + { url = "https://files.pythonhosted.org/packages/2a/6b/d139535d7590a1bba1ceb68751bef22fadaa5b815bbdf0e858e3875726b2/llvmlite-0.46.0-cp312-cp312-win_amd64.whl", hash = "sha256:398b39db462c39563a97b912d4f2866cd37cba60537975a09679b28fbbc0fb38", size = 38138940, upload-time = "2025-12-08T18:15:10.162Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ff/3eba7eb0aed4b6fca37125387cd417e8c458e750621fce56d2c541f67fa8/llvmlite-0.46.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:30b60892d034bc560e0ec6654737aaa74e5ca327bd8114d82136aa071d611172", size = 37232767, upload-time = "2025-12-08T18:15:13.22Z" }, + { url = "https://files.pythonhosted.org/packages/0e/54/737755c0a91558364b9200702c3c9c15d70ed63f9b98a2c32f1c2aa1f3ba/llvmlite-0.46.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6cc19b051753368a9c9f31dc041299059ee91aceec81bd57b0e385e5d5bf1a54", size = 56275176, upload-time = "2025-12-08T18:15:16.339Z" }, + { url = "https://files.pythonhosted.org/packages/e6/91/14f32e1d70905c1c0aa4e6609ab5d705c3183116ca02ac6df2091868413a/llvmlite-0.46.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bca185892908f9ede48c0acd547fe4dc1bafefb8a4967d47db6cf664f9332d12", size = 55128629, upload-time = "2025-12-08T18:15:19.493Z" }, + { url = "https://files.pythonhosted.org/packages/4a/a7/d526ae86708cea531935ae777b6dbcabe7db52718e6401e0fb9c5edea80e/llvmlite-0.46.0-cp313-cp313-win_amd64.whl", hash = "sha256:67438fd30e12349ebb054d86a5a1a57fd5e87d264d2451bcfafbbbaa25b82a35", size = 38138941, upload-time = "2025-12-08T18:15:22.536Z" }, + { url = "https://files.pythonhosted.org/packages/95/ae/af0ffb724814cc2ea64445acad05f71cff5f799bb7efb22e47ee99340dbc/llvmlite-0.46.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:d252edfb9f4ac1fcf20652258e3f102b26b03eef738dc8a6ffdab7d7d341d547", size = 37232768, upload-time = "2025-12-08T18:15:25.055Z" }, + { url = "https://files.pythonhosted.org/packages/c9/19/5018e5352019be753b7b07f7759cdabb69ca5779fea2494be8839270df4c/llvmlite-0.46.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:379fdd1c59badeff8982cb47e4694a6143bec3bb49aa10a466e095410522064d", size = 56275173, upload-time = "2025-12-08T18:15:28.109Z" }, + { url = "https://files.pythonhosted.org/packages/9f/c9/d57877759d707e84c082163c543853245f91b70c804115a5010532890f18/llvmlite-0.46.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2e8cbfff7f6db0fa2c771ad24154e2a7e457c2444d7673e6de06b8b698c3b269", size = 55128628, upload-time = "2025-12-08T18:15:31.098Z" }, + { url = "https://files.pythonhosted.org/packages/30/a8/e61a8c2b3cc7a597073d9cde1fcbb567e9d827f1db30c93cf80422eac70d/llvmlite-0.46.0-cp314-cp314-win_amd64.whl", hash = "sha256:7821eda3ec1f18050f981819756631d60b6d7ab1a6cf806d9efefbe3f4082d61", size = 39153056, upload-time = "2025-12-08T18:15:33.938Z" }, ] [[package]] @@ -3299,7 +3308,8 @@ dependencies = [ { name = "cycler" }, { name = "fonttools" }, { name = "kiwisolver" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "packaging" }, { name = "pillow" }, { name = "pyparsing" }, @@ -3423,47 +3433,47 @@ wheels = [ [[package]] name = "mlx" -version = "0.30.6" +version = "0.31.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mlx-metal", marker = "sys_platform == 'darwin'" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/2e/016527cf1012a68bb25f1ba3a73914f87807a7fee58d7a54fa69adcd2f55/mlx-0.30.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:6c4df52aebfac40563259c04fca4a0c4d05b2061e09cdaad24e4233baa560b4f", size = 573214, upload-time = "2026-02-06T03:45:00.344Z" }, - { url = "https://files.pythonhosted.org/packages/a4/8f/600c6bed6eb6574e4a9d15e7a20a2ec903c2c5b54e2fd782c592a00ff933/mlx-0.30.6-cp310-cp310-macosx_15_0_arm64.whl", hash = "sha256:0df8715b5cb84b6b6314aa868302873a0a94e63e6d195bc9858b8c58c79aa5a4", size = 573213, upload-time = "2026-02-06T03:45:02.208Z" }, - { url = "https://files.pythonhosted.org/packages/11/f7/d15af26c639c3d6000b6478fc0d54a7a528d71e79255190a0abc42f31608/mlx-0.30.6-cp310-cp310-macosx_26_0_arm64.whl", hash = "sha256:7b4742ec2b748d2406c884e364fcd6f89d7f2b3f834f7b65c4c07acfa139cae8", size = 573254, upload-time = "2026-02-06T03:45:03.575Z" }, - { url = "https://files.pythonhosted.org/packages/d4/c3/e4f1fda18068fe0d5213f67d94771f39e219a24072746a02ca70a3a6020f/mlx-0.30.6-cp310-cp310-manylinux_2_35_aarch64.whl", hash = "sha256:45c91ff34690b0d34063d1dc68a7a87f142ff9c5df6e5c611884a6bdcc9a53e1", size = 636558, upload-time = "2026-02-06T03:45:05.262Z" }, - { url = "https://files.pythonhosted.org/packages/70/c7/201e9e3ab3304aca99f850a0c1bc5d52e52e48960b0d415a196cd288faef/mlx-0.30.6-cp310-cp310-manylinux_2_35_x86_64.whl", hash = "sha256:b9b746fa0a44dfe1576925eb343ee9afa7023d3d805f84a3d90d0066096f31b8", size = 669479, upload-time = "2026-02-06T03:45:07.122Z" }, - { url = "https://files.pythonhosted.org/packages/93/81/21d745beeda53ee29e9c027d806f1e1cac983e8ddb3d6b18d44a1b30a11b/mlx-0.30.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:e721d29c4250ada3cba7a5ad43d358b42401600e792c378ed6b52c9d692aaba8", size = 573359, upload-time = "2026-02-06T03:45:08.41Z" }, - { url = "https://files.pythonhosted.org/packages/05/08/826286458df5ea91efc380d71fd8058ee7338207c6b547204f2758e168d8/mlx-0.30.6-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:23f55c1c160a38ab350f4f7ce3ab10c490df39800ad35c4821c3ef5fa89ec24e", size = 573359, upload-time = "2026-02-06T03:45:09.688Z" }, - { url = "https://files.pythonhosted.org/packages/56/aa/3fc9ac795934182e680a0cbeb99202838e4548139cfd580015dcfbfb7ee8/mlx-0.30.6-cp311-cp311-macosx_26_0_arm64.whl", hash = "sha256:37c37571f8c1567c2b7e4871237b92a2b321fb8157d6426373be946c03e49ebd", size = 573406, upload-time = "2026-02-06T03:45:11.383Z" }, - { url = "https://files.pythonhosted.org/packages/af/d1/b8bcc332e3c268bf59632d7a8f1b5c8e6a4b154d651aa20b93e359e3c004/mlx-0.30.6-cp311-cp311-manylinux_2_35_aarch64.whl", hash = "sha256:253317a2bab3a1927d7cb89267690d82525acb5810f30d696ff9b705e7f8a78a", size = 636997, upload-time = "2026-02-06T03:45:12.619Z" }, - { url = "https://files.pythonhosted.org/packages/89/fa/bdc4b8aa6d078e724decb754b0f04ac1a25e46c190e52639906401c3b8b8/mlx-0.30.6-cp311-cp311-manylinux_2_35_x86_64.whl", hash = "sha256:4e2058ac219d99d38baa90f810947c6bfa09a28511dfe660629012a7c470c35d", size = 669638, upload-time = "2026-02-06T03:45:14.103Z" }, - { url = "https://files.pythonhosted.org/packages/85/fe/85acff870a9949494fd505b22c34d63eb127442f5f8751a159d3a78f7ef6/mlx-0.30.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:47d20016cb5733d06c1d017412a31983dbe3237cf70942760430188922ffc1ba", size = 573484, upload-time = "2026-02-06T03:45:15.88Z" }, - { url = "https://files.pythonhosted.org/packages/e1/14/5546082ee37118b33afb6300d8e07d03efea2dbba838d514d9465f87489b/mlx-0.30.6-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:6b8c133df2d6a2ed173d2b7bb50d7032a13be84e1792b7d79171ad8f50a8c0ea", size = 573486, upload-time = "2026-02-06T03:45:17.506Z" }, - { url = "https://files.pythonhosted.org/packages/ef/b5/ae04666a7b8bda74e2c6903756710103e283ea6fa4edd2c92449ad4547d6/mlx-0.30.6-cp312-cp312-macosx_26_0_arm64.whl", hash = "sha256:31eabb5d1da4ac7b16f2042fdb046b993cdf0f32bc3312e0af469232bb67720b", size = 573509, upload-time = "2026-02-06T03:45:18.68Z" }, - { url = "https://files.pythonhosted.org/packages/9f/8e/fdee70051e2c7f523f9b22575f05bdb1b47300aba1ecda15bda98a9b01c1/mlx-0.30.6-cp312-cp312-manylinux_2_35_aarch64.whl", hash = "sha256:070010932d424005e6c9c76b379ccdf4d96b385658fdb34dc780fa4eb24cb1a0", size = 622061, upload-time = "2026-02-06T03:45:19.984Z" }, - { url = "https://files.pythonhosted.org/packages/65/dd/fe29f1e19e5268a8f892c83be35f14e63f1aea3baf7e7e44e246d4fea184/mlx-0.30.6-cp312-cp312-manylinux_2_35_x86_64.whl", hash = "sha256:9084c8f20544ec6a53aa3edcd2da85d205e07ff80bd47151633219bd5cfcd23c", size = 663715, upload-time = "2026-02-06T03:45:21.873Z" }, - { url = "https://files.pythonhosted.org/packages/ae/5b/e460e144a34d5529e010056cccf50b538d56ed001473bc6b246018fd58cb/mlx-0.30.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:ed86f8bffc174c2f259ca589ea25464c96cf69d1bb457074a2bf2ef53737e54f", size = 573515, upload-time = "2026-02-06T03:45:23.405Z" }, - { url = "https://files.pythonhosted.org/packages/60/25/69833fefb9a3fef30b56792b1bcd022496c4fea83e45411d289b77ef7546/mlx-0.30.6-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:c52294958269e20f300639a17c1900ca8fc737d859ddda737f9811e94bd040e5", size = 573516, upload-time = "2026-02-06T03:45:24.618Z" }, - { url = "https://files.pythonhosted.org/packages/9c/6a/7e7fbeebc5cb51b6a5eba96b263a6298707bcbdc059f4b0b73e088bc3dea/mlx-0.30.6-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:b5b6636f7c49a4d86d8ec82643b972f45a144a7a9f3a967b27b2e6e22cf71e6a", size = 573592, upload-time = "2026-02-06T03:45:25.928Z" }, - { url = "https://files.pythonhosted.org/packages/93/06/280f6f2ba80520a7109730425eda0d966658793aa0d02d8be8d351f75253/mlx-0.30.6-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:67e6c9e30a9faeacc209917ef5523177cf9b086914b6b5d83ff886e4294b727d", size = 622011, upload-time = "2026-02-06T03:45:28.165Z" }, - { url = "https://files.pythonhosted.org/packages/fe/35/f872afbee9c079cc69924d9e9c46f5663adb7da58cba3511db082dd307c1/mlx-0.30.6-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:47db8b16fcb6f6c5a47c0bdb24ed377b41237017ac93aa6cb6aa206c9bdf82e4", size = 663650, upload-time = "2026-02-06T03:45:30.315Z" }, - { url = "https://files.pythonhosted.org/packages/60/23/361dc7a5797634e4d7e9bdd6564c6b28f9b1246672632def2f91bf066b18/mlx-0.30.6-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:78804a89dcff4a838f7c2da72392fe87a523e95122a3c840e53df019122aad45", size = 575028, upload-time = "2026-02-06T03:45:31.549Z" }, - { url = "https://files.pythonhosted.org/packages/a8/69/1854484d414171586814dfbe8def95f75c4ea2c7341ba13ba8ee675f7c62/mlx-0.30.6-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:ec13584ab069665cc7ad34a05494d9291cd623aef6ae96be48875fc87cfc25d6", size = 575026, upload-time = "2026-02-06T03:45:33.072Z" }, - { url = "https://files.pythonhosted.org/packages/6b/b8/3adbc441924209a7e4c568308b2a0b54bd09aee6a68db5bae85304791e54/mlx-0.30.6-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:b2c5e8a090a753ef99a1380a4d059c983083f36198864f6df9faaf1223d083df", size = 575041, upload-time = "2026-02-06T03:45:34.814Z" }, - { url = "https://files.pythonhosted.org/packages/3f/54/9d9e06804fb2088202a2cdf60458e00b221f71420bea285720b60f9e82b5/mlx-0.30.6-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:9ceddede4af0de31d1f6b3099f70e5469d60cd7c546975dedbdbeab3519cab3f", size = 624002, upload-time = "2026-02-06T03:45:36Z" }, - { url = "https://files.pythonhosted.org/packages/42/92/3140a15a50cb1f9267a6552171e1dfa577861de53e093124bc43707f2a0e/mlx-0.30.6-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:4a6ffd2d16728cf95f63a1b555d7c2eaeea686a0e6b73228bd265411cb5d77a4", size = 663569, upload-time = "2026-02-06T03:45:37.242Z" }, + { url = "https://files.pythonhosted.org/packages/73/54/269d13847b04b07523d44cf903e1d3c6d48f56e6e89dda7e16418b411629/mlx-0.31.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:38680838e0dd9a621ed4adc5a9ed8b94aeb6a4798142fbe215b821b8c6b8fc36", size = 575395, upload-time = "2026-02-27T23:49:11.886Z" }, + { url = "https://files.pythonhosted.org/packages/3d/86/1fbe1f8f3a23c92c821c235ab7a28395c86c900b0a2b2425f3c8862bbeb6/mlx-0.31.0-cp310-cp310-macosx_15_0_arm64.whl", hash = "sha256:7aded590bcf6839307c3acc899e196936991f97b499ddbdd0cd3b228bf10792f", size = 575394, upload-time = "2026-02-27T23:49:13.738Z" }, + { url = "https://files.pythonhosted.org/packages/20/01/02b79132e91182c779bb6c4f586c5fb86d49c32e8f07f307d2d4ca64cca6/mlx-0.31.0-cp310-cp310-macosx_26_0_arm64.whl", hash = "sha256:6e3ae83607b798b44cb3e44437095cfd26886fecc15f90f29f9eafd206d4d170", size = 575411, upload-time = "2026-02-27T23:49:15.374Z" }, + { url = "https://files.pythonhosted.org/packages/13/86/c501ddb496a185b69f3181d77276907f43a847eaa4d9fff86bc0616d1dcc/mlx-0.31.0-cp310-cp310-manylinux_2_35_aarch64.whl", hash = "sha256:b25f785c94eb47d8104604a5de0e7d749b801e7a40073cbf457aa94c372e5593", size = 639542, upload-time = "2026-02-27T23:49:16.822Z" }, + { url = "https://files.pythonhosted.org/packages/86/7c/508bfc140cf777dbe61fc2be0fbfca56e3f0ceed233cd7a8ef4add84262e/mlx-0.31.0-cp310-cp310-manylinux_2_35_x86_64.whl", hash = "sha256:6a4342027e6608ce69807a8f079c750a7c6161f543ebb49e55654edd03c178d6", size = 672721, upload-time = "2026-02-27T23:49:17.978Z" }, + { url = "https://files.pythonhosted.org/packages/1e/d3/fcb8b9f645ae70b3295a353999c3c6c7a66fd43ed8aa716b13da12bf40d4/mlx-0.31.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:285313eaeba425e58cbb3238c2d1a3894e6252d58f243ce56681d5419a568d6c", size = 575602, upload-time = "2026-02-27T23:49:19.314Z" }, + { url = "https://files.pythonhosted.org/packages/bd/2a/d35072e8dc31d9550f8218cfc388c1cd12c7fd89e8246540a9c7b873d958/mlx-0.31.0-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:acf4f04ff33a80784a0f15c492166dc889e65659b41c410ca5a7c2d78bee2a3a", size = 575603, upload-time = "2026-02-27T23:49:20.651Z" }, + { url = "https://files.pythonhosted.org/packages/43/fa/eca64a514cd50a4a38cc9b8827db85d9e554c3fe407ede043d061055b1ab/mlx-0.31.0-cp311-cp311-macosx_26_0_arm64.whl", hash = "sha256:f624571e23a86654496c42a507b4bb42ded0edb91f33161fabafdbf6b81ba024", size = 575637, upload-time = "2026-02-27T23:49:22.02Z" }, + { url = "https://files.pythonhosted.org/packages/72/cd/0ee01b646010c7a22872d2b849b766941f813c4fd777602306d01af3915f/mlx-0.31.0-cp311-cp311-manylinux_2_35_aarch64.whl", hash = "sha256:5b5306a0934b15c4e3a1088a10066bdde3966c21b95006c63ecc38ca8e3891e0", size = 639267, upload-time = "2026-02-27T23:49:23.265Z" }, + { url = "https://files.pythonhosted.org/packages/73/50/c72e2cabdeefc2bf51ae5c1111bdaa9055a0c2d18bc87314ef965ffff422/mlx-0.31.0-cp311-cp311-manylinux_2_35_x86_64.whl", hash = "sha256:18078bc67dfb7ed602fca233d00ce93e23d590d9347da5009472455a92831066", size = 672858, upload-time = "2026-02-27T23:49:24.627Z" }, + { url = "https://files.pythonhosted.org/packages/1a/7d/87fb0daa006dbbbd8894c3d496c7d9dfc52e4ade260482276d3eca137a15/mlx-0.31.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:de6c0a3e8aa0e7d1365d46634fdbb3f835c164fbdb6ba8a239e039a4efa07fe2", size = 575834, upload-time = "2026-02-27T23:49:26.61Z" }, + { url = "https://files.pythonhosted.org/packages/d4/e3/aa0fac5a9d52b1a4686c7097e56775c1a96dee3084f9c587b74e4c2cd284/mlx-0.31.0-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:d6af01b15177da995336a6fd9878e7c5994720a9f1614d8f4d1dbe9293167c30", size = 575836, upload-time = "2026-02-27T23:49:28.505Z" }, + { url = "https://files.pythonhosted.org/packages/8d/15/6aa3edaa34aeef370634756b7d131b8dc1cdb0002ddecdd3d876b5f9fa0c/mlx-0.31.0-cp312-cp312-macosx_26_0_arm64.whl", hash = "sha256:1ad14ddc3a15818f5bba0de35e88559ed8dcb93ccff2ef879ff604d02d663b25", size = 575828, upload-time = "2026-02-27T23:49:29.684Z" }, + { url = "https://files.pythonhosted.org/packages/6d/d3/53ac650a569f5f5111c0280611acf0dcbdfa5fd0da2d433bad0f5575de73/mlx-0.31.0-cp312-cp312-manylinux_2_35_aarch64.whl", hash = "sha256:a80754ecf64191f71da1946dc5de6cf903344cc90dd286c589792ee9d3fc62f9", size = 624405, upload-time = "2026-02-27T23:49:31.687Z" }, + { url = "https://files.pythonhosted.org/packages/9c/fe/a0c0b73c04f7673a50c505e155dd0088cc7a116d7b8d4eb4d1d9fdcd2c8f/mlx-0.31.0-cp312-cp312-manylinux_2_35_x86_64.whl", hash = "sha256:363282eb094785f6aba27810ff89331c0f7829c6961f571cd0feaad09d2c809f", size = 666952, upload-time = "2026-02-27T23:49:33.262Z" }, + { url = "https://files.pythonhosted.org/packages/4a/09/35d1192cf1f655438213d8baa2264a8bc2426b44d93802dabfc177fd8e81/mlx-0.31.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4f33e9aafc6d3ad29e72743dfb786c4ce67397414f0a091469058626381fc1bc", size = 575815, upload-time = "2026-02-27T23:49:34.607Z" }, + { url = "https://files.pythonhosted.org/packages/59/9d/29e0cb154a31ed05c9d24c776513bf1ec506b8570e214b4563b55bb19ef6/mlx-0.31.0-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:242806b8ad6a4d3ce86cdff513f86520552de7592786712770b2e1ebd178816a", size = 575821, upload-time = "2026-02-27T23:49:35.947Z" }, + { url = "https://files.pythonhosted.org/packages/5f/6c/437aefdca17216aab02d0fb7528cd63e2c3d8d9c1b079c07d579a770645f/mlx-0.31.0-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:7f0bdbac084017820ce513a12318771a06c7ec10fad159839e27c998bc5dad89", size = 575810, upload-time = "2026-02-27T23:49:37.165Z" }, + { url = "https://files.pythonhosted.org/packages/a6/d5/986777b53e2c3eff709ee5a275b41ed84a9c04f60071e97f9d3b60dec845/mlx-0.31.0-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:8642dda2b23195d9921973749ae9bf764e2c7d70bfc0e60b23b6335e660cc610", size = 624713, upload-time = "2026-02-27T23:49:38.672Z" }, + { url = "https://files.pythonhosted.org/packages/2d/29/da0875739d08760461a5b21207c34d959bc7572b27e46ccc0f48badae078/mlx-0.31.0-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:c6daa671cfa3c194951d742aa09030c5008d9d9657034b2903389fa090b3ba92", size = 666888, upload-time = "2026-02-27T23:49:40.222Z" }, + { url = "https://files.pythonhosted.org/packages/66/60/0152a44ed737c3b16e9044909d01212b99e216c6ab4b2f76faa054ae8172/mlx-0.31.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:cce3e15cf11c608c9e721502fe56e54f9f48b897e9b80f1204a48643d68710c0", size = 577579, upload-time = "2026-02-27T23:49:41.723Z" }, + { url = "https://files.pythonhosted.org/packages/e3/6b/70f0a254d7ace58a030547a99219f1342c3cf383029e1af90eee3efaeb85/mlx-0.31.0-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:ba330fe40d73b202880bbb5cac62de0b639cf4c44a12853bcadb34a9e3ffe880", size = 577582, upload-time = "2026-02-27T23:49:42.998Z" }, + { url = "https://files.pythonhosted.org/packages/63/5a/81cf057dbc005a43d27b7dfaff88198c61bbfe76cb8da3499821083c3fca/mlx-0.31.0-cp314-cp314-macosx_26_0_arm64.whl", hash = "sha256:d2014d113070846c6cdee980653f561c92a4a663a449f64e70c15bbf74d637e1", size = 577535, upload-time = "2026-02-27T23:49:44.475Z" }, + { url = "https://files.pythonhosted.org/packages/75/22/1b2bddb2774c7951aa620d286157439f288186215ff6ce18d9a9a45e608e/mlx-0.31.0-cp314-cp314-manylinux_2_35_aarch64.whl", hash = "sha256:994fab25ff521621e03001177a8f0f1a7bf8294ff340f89910ec074f9f681ed9", size = 627410, upload-time = "2026-02-27T23:49:45.654Z" }, + { url = "https://files.pythonhosted.org/packages/46/f4/e9256326912ac21a9853b3a9856da19292b908270ff96cb27abb8421c8c6/mlx-0.31.0-cp314-cp314-manylinux_2_35_x86_64.whl", hash = "sha256:c3bb9961f40d098659326b0edb96e2a16adecfaf3c1f2518cad5a0b7e55a3a5d", size = 667351, upload-time = "2026-02-27T23:49:46.868Z" }, ] [[package]] name = "mlx-metal" -version = "0.30.6" +version = "0.31.0" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f3/85/44406b521f920248fad621334d4dc15e77660a494edf890e7cbee33bf38d/mlx_metal-0.30.6-py3-none-macosx_14_0_arm64.whl", hash = "sha256:ea6d0c973def9a5b4f652cc77036237db3f88c9d0af63701d76b5fddde99b820", size = 38437818, upload-time = "2026-02-06T03:44:56.19Z" }, - { url = "https://files.pythonhosted.org/packages/d0/cb/10a516995f7d0c154b0d7e633c54b51e96977a86a355105b6474cfcbe0d0/mlx_metal-0.30.6-py3-none-macosx_15_0_arm64.whl", hash = "sha256:0f8cb94634d07e06a372d6ad9a090f38a18bab1ff19a140aede60eacf707bb94", size = 38433701, upload-time = "2026-02-06T03:44:59.678Z" }, - { url = "https://files.pythonhosted.org/packages/4c/7d/70cb272f7373c334709f210ed8420511fc9d64d05a7a646c0b3b94c29c04/mlx_metal-0.30.6-py3-none-macosx_26_0_arm64.whl", hash = "sha256:d761ae26304f2c4b454eeea7f612a56919d9e5e57dbb1dc0788f8e34aa6f41c2", size = 47718448, upload-time = "2026-02-06T03:45:03.133Z" }, + { url = "https://files.pythonhosted.org/packages/94/4f/0a0671dfa62b59bf429edab0e2c9c7f9bc77865aa4218cd46f2f41d7d11a/mlx_metal-0.31.0-py3-none-macosx_14_0_arm64.whl", hash = "sha256:1c572a6e3634a63060c103b0c38ac309e2d217be15519e3d8f0d6b452bb015f5", size = 38596752, upload-time = "2026-02-27T23:29:39.52Z" }, + { url = "https://files.pythonhosted.org/packages/8d/42/c6d7bfd097b777f932d6cf8c79e41b565070b63cc452a069b8804e505140/mlx_metal-0.31.0-py3-none-macosx_15_0_arm64.whl", hash = "sha256:554dc7cb29e0ea5fb6941df42f11a1de385b095848e6183c7a99d7c1f1a11f5d", size = 38595434, upload-time = "2026-02-27T23:29:43.285Z" }, + { url = "https://files.pythonhosted.org/packages/ed/8f/cdaffd759b4c71e74c294e773daacad8aafabac103b93e0aa56d4468d279/mlx_metal-0.31.0-py3-none-macosx_26_0_arm64.whl", hash = "sha256:7fd412f55ddf9f1d90c2cd86ce281d19e8eb93d093c6dbd784a49f8bd7d0a22c", size = 47879607, upload-time = "2026-02-27T23:29:46.571Z" }, ] [[package]] @@ -3475,9 +3485,10 @@ dependencies = [ { name = "mlx" }, { name = "more-itertools" }, { name = "numba" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tiktoken" }, { name = "torch" }, { name = "tqdm" }, @@ -3680,7 +3691,7 @@ wheels = [ [[package]] name = "nltk" -version = "3.9.2" +version = "3.9.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -3688,9 +3699,9 @@ dependencies = [ { name = "regex" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f9/76/3a5e4312c19a028770f86fd7c058cf9f4ec4321c6cf7526bab998a5b683c/nltk-3.9.2.tar.gz", hash = "sha256:0f409e9b069ca4177c1903c3e843eef90c7e92992fa4931ae607da6de49e1419", size = 2887629, upload-time = "2025-10-01T07:19:23.764Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e1/8f/915e1c12df07c70ed779d18ab83d065718a926e70d3ea33eb0cd66ffb7c0/nltk-3.9.3.tar.gz", hash = "sha256:cb5945d6424a98d694c2b9a0264519fab4363711065a46aa0ae7a2195b92e71f", size = 2923673, upload-time = "2026-02-24T12:05:53.833Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/60/90/81ac364ef94209c100e12579629dc92bf7a709a84af32f8c551b02c07e94/nltk-3.9.2-py3-none-any.whl", hash = "sha256:1e209d2b3009110635ed9709a67a1a3e33a10f799490fa71cf4bec218c11c88a", size = 1513404, upload-time = "2025-10-01T07:19:21.648Z" }, + { url = "https://files.pythonhosted.org/packages/c2/7e/9af5a710a1236e4772de8dfcc6af942a561327bb9f42b5b4a24d0cf100fd/nltk-3.9.3-py3-none-any.whl", hash = "sha256:60b3db6e9995b3dd976b1f0fa7dec22069b2677e759c28eb69b62ddd44870522", size = 1525385, upload-time = "2026-02-24T12:05:46.54Z" }, ] [[package]] @@ -3709,9 +3720,10 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "joblib" }, { name = "matplotlib" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tqdm" }, ] sdist = { url = "https://files.pythonhosted.org/packages/11/08/539e3cff148b7f9bde5b4b060451a7445d708fa3fe5d8a2bc0c552976e52/noisereduce-3.0.3.tar.gz", hash = "sha256:ff64a28fb92e3c81f153cf29550e5c2db56b2523afa8f56f5e03c177cc5e918f", size = 20968, upload-time = "2024-10-06T13:43:45.431Z" } @@ -3721,40 +3733,44 @@ wheels = [ [[package]] name = "numba" -version = "0.61.2" +version = "0.64.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "llvmlite" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/1c/a0/e21f57604304aa03ebb8e098429222722ad99176a4f979d34af1d1ee80da/numba-0.61.2.tar.gz", hash = "sha256:8750ee147940a6637b80ecf7f95062185ad8726c8c28a2295b8ec1160a196f7d", size = 2820615, upload-time = "2025-04-09T02:58:07.659Z" } +sdist = { url = "https://files.pythonhosted.org/packages/23/c9/a0fb41787d01d621046138da30f6c2100d80857bf34b3390dd68040f27a3/numba-0.64.0.tar.gz", hash = "sha256:95e7300af648baa3308127b1955b52ce6d11889d16e8cfe637b4f85d2fca52b1", size = 2765679, upload-time = "2026-02-18T18:41:20.974Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/ca/f470be59552ccbf9531d2d383b67ae0b9b524d435fb4a0d229fef135116e/numba-0.61.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:cf9f9fc00d6eca0c23fc840817ce9f439b9f03c8f03d6246c0e7f0cb15b7162a", size = 2775663, upload-time = "2025-04-09T02:57:34.143Z" }, - { url = "https://files.pythonhosted.org/packages/f5/13/3bdf52609c80d460a3b4acfb9fdb3817e392875c0d6270cf3fd9546f138b/numba-0.61.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ea0247617edcb5dd61f6106a56255baab031acc4257bddaeddb3a1003b4ca3fd", size = 2778344, upload-time = "2025-04-09T02:57:36.609Z" }, - { url = "https://files.pythonhosted.org/packages/e2/7d/bfb2805bcfbd479f04f835241ecf28519f6e3609912e3a985aed45e21370/numba-0.61.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae8c7a522c26215d5f62ebec436e3d341f7f590079245a2f1008dfd498cc1642", size = 3824054, upload-time = "2025-04-09T02:57:38.162Z" }, - { url = "https://files.pythonhosted.org/packages/e3/27/797b2004745c92955470c73c82f0e300cf033c791f45bdecb4b33b12bdea/numba-0.61.2-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd1e74609855aa43661edffca37346e4e8462f6903889917e9f41db40907daa2", size = 3518531, upload-time = "2025-04-09T02:57:39.709Z" }, - { url = "https://files.pythonhosted.org/packages/b1/c6/c2fb11e50482cb310afae87a997707f6c7d8a48967b9696271347441f650/numba-0.61.2-cp310-cp310-win_amd64.whl", hash = "sha256:ae45830b129c6137294093b269ef0a22998ccc27bf7cf096ab8dcf7bca8946f9", size = 2831612, upload-time = "2025-04-09T02:57:41.559Z" }, - { url = "https://files.pythonhosted.org/packages/3f/97/c99d1056aed767503c228f7099dc11c402906b42a4757fec2819329abb98/numba-0.61.2-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:efd3db391df53aaa5cfbee189b6c910a5b471488749fd6606c3f33fc984c2ae2", size = 2775825, upload-time = "2025-04-09T02:57:43.442Z" }, - { url = "https://files.pythonhosted.org/packages/95/9e/63c549f37136e892f006260c3e2613d09d5120672378191f2dc387ba65a2/numba-0.61.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:49c980e4171948ffebf6b9a2520ea81feed113c1f4890747ba7f59e74be84b1b", size = 2778695, upload-time = "2025-04-09T02:57:44.968Z" }, - { url = "https://files.pythonhosted.org/packages/97/c8/8740616c8436c86c1b9a62e72cb891177d2c34c2d24ddcde4c390371bf4c/numba-0.61.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3945615cd73c2c7eba2a85ccc9c1730c21cd3958bfcf5a44302abae0fb07bb60", size = 3829227, upload-time = "2025-04-09T02:57:46.63Z" }, - { url = "https://files.pythonhosted.org/packages/fc/06/66e99ae06507c31d15ff3ecd1f108f2f59e18b6e08662cd5f8a5853fbd18/numba-0.61.2-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbfdf4eca202cebade0b7d43896978e146f39398909a42941c9303f82f403a18", size = 3523422, upload-time = "2025-04-09T02:57:48.222Z" }, - { url = "https://files.pythonhosted.org/packages/0f/a4/2b309a6a9f6d4d8cfba583401c7c2f9ff887adb5d54d8e2e130274c0973f/numba-0.61.2-cp311-cp311-win_amd64.whl", hash = "sha256:76bcec9f46259cedf888041b9886e257ae101c6268261b19fda8cfbc52bec9d1", size = 2831505, upload-time = "2025-04-09T02:57:50.108Z" }, - { url = "https://files.pythonhosted.org/packages/b4/a0/c6b7b9c615cfa3b98c4c63f4316e3f6b3bbe2387740277006551784218cd/numba-0.61.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:34fba9406078bac7ab052efbf0d13939426c753ad72946baaa5bf9ae0ebb8dd2", size = 2776626, upload-time = "2025-04-09T02:57:51.857Z" }, - { url = "https://files.pythonhosted.org/packages/92/4a/fe4e3c2ecad72d88f5f8cd04e7f7cff49e718398a2fac02d2947480a00ca/numba-0.61.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4ddce10009bc097b080fc96876d14c051cc0c7679e99de3e0af59014dab7dfe8", size = 2779287, upload-time = "2025-04-09T02:57:53.658Z" }, - { url = "https://files.pythonhosted.org/packages/9a/2d/e518df036feab381c23a624dac47f8445ac55686ec7f11083655eb707da3/numba-0.61.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b1bb509d01f23d70325d3a5a0e237cbc9544dd50e50588bc581ba860c213546", size = 3885928, upload-time = "2025-04-09T02:57:55.206Z" }, - { url = "https://files.pythonhosted.org/packages/10/0f/23cced68ead67b75d77cfcca3df4991d1855c897ee0ff3fe25a56ed82108/numba-0.61.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48a53a3de8f8793526cbe330f2a39fe9a6638efcbf11bd63f3d2f9757ae345cd", size = 3577115, upload-time = "2025-04-09T02:57:56.818Z" }, - { url = "https://files.pythonhosted.org/packages/68/1d/ddb3e704c5a8fb90142bf9dc195c27db02a08a99f037395503bfbc1d14b3/numba-0.61.2-cp312-cp312-win_amd64.whl", hash = "sha256:97cf4f12c728cf77c9c1d7c23707e4d8fb4632b46275f8f3397de33e5877af18", size = 2831929, upload-time = "2025-04-09T02:57:58.45Z" }, - { url = "https://files.pythonhosted.org/packages/0b/f3/0fe4c1b1f2569e8a18ad90c159298d862f96c3964392a20d74fc628aee44/numba-0.61.2-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:3a10a8fc9afac40b1eac55717cece1b8b1ac0b946f5065c89e00bde646b5b154", size = 2771785, upload-time = "2025-04-09T02:57:59.96Z" }, - { url = "https://files.pythonhosted.org/packages/e9/71/91b277d712e46bd5059f8a5866862ed1116091a7cb03bd2704ba8ebe015f/numba-0.61.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d3bcada3c9afba3bed413fba45845f2fb9cd0d2b27dd58a1be90257e293d140", size = 2773289, upload-time = "2025-04-09T02:58:01.435Z" }, - { url = "https://files.pythonhosted.org/packages/0d/e0/5ea04e7ad2c39288c0f0f9e8d47638ad70f28e275d092733b5817cf243c9/numba-0.61.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bdbca73ad81fa196bd53dc12e3aaf1564ae036e0c125f237c7644fe64a4928ab", size = 3893918, upload-time = "2025-04-09T02:58:02.933Z" }, - { url = "https://files.pythonhosted.org/packages/17/58/064f4dcb7d7e9412f16ecf80ed753f92297e39f399c905389688cf950b81/numba-0.61.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:5f154aaea625fb32cfbe3b80c5456d514d416fcdf79733dd69c0df3a11348e9e", size = 3584056, upload-time = "2025-04-09T02:58:04.538Z" }, - { url = "https://files.pythonhosted.org/packages/af/a4/6d3a0f2d3989e62a18749e1e9913d5fa4910bbb3e3311a035baea6caf26d/numba-0.61.2-cp313-cp313-win_amd64.whl", hash = "sha256:59321215e2e0ac5fa928a8020ab00b8e57cda8a97384963ac0dfa4d4e6aa54e7", size = 2831846, upload-time = "2025-04-09T02:58:06.125Z" }, + { url = "https://files.pythonhosted.org/packages/4c/5e/604fed821cd7e3426bb3bc99a7ed6ac0bcb489f4cd93052256437d082f95/numba-0.64.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc09b79440952e3098eeebea4bf6e8d2355fb7f12734fcd9fc5039f0dca90727", size = 2683250, upload-time = "2026-02-18T18:40:45.829Z" }, + { url = "https://files.pythonhosted.org/packages/4f/9f/9275a723d050b5f1a9b1c7fb7dbfce324fef301a8e50c5f88338569db06c/numba-0.64.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1afe3a80b8c2f376b211fb7a49e536ef9eafc92436afc95a2f41ea5392f8cc65", size = 3742168, upload-time = "2026-02-18T18:40:48.066Z" }, + { url = "https://files.pythonhosted.org/packages/e2/d1/97ca7dddaa36b16f4c46319bdb6b4913ba15d0245317d0d8ccde7b2d7d92/numba-0.64.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:23804194b93b8cd416c6444b5fbc4956082a45fed2d25436ef49c594666e7f7e", size = 3449103, upload-time = "2026-02-18T18:40:49.905Z" }, + { url = "https://files.pythonhosted.org/packages/52/0a/b9e137ad78415373e3353564500e8bf29dbce3c0d73633bb384d4e5d7537/numba-0.64.0-cp310-cp310-win_amd64.whl", hash = "sha256:e2a9fe998bb2cf848960b34db02c2c3b5e02cf82c07a26d9eef3494069740278", size = 2749950, upload-time = "2026-02-18T18:40:51.536Z" }, + { url = "https://files.pythonhosted.org/packages/89/a3/1a4286a1c16136c8896d8e2090d950e79b3ec626d3a8dc9620f6234d5a38/numba-0.64.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:766156ee4b8afeeb2b2e23c81307c5d19031f18d5ce76ae2c5fb1429e72fa92b", size = 2682938, upload-time = "2026-02-18T18:40:52.897Z" }, + { url = "https://files.pythonhosted.org/packages/19/16/aa6e3ba3cd45435c117d1101b278b646444ed05b7c712af631b91353f573/numba-0.64.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d17071b4ffc9d39b75d8e6c101a36f0c81b646123859898c9799cb31807c8f78", size = 3747376, upload-time = "2026-02-18T18:40:54.925Z" }, + { url = "https://files.pythonhosted.org/packages/c0/f1/dd2f25e18d75fdf897f730b78c5a7b00cc4450f2405564dbebfaf359f21f/numba-0.64.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4ead5630434133bac87fa67526eacb264535e4e9a2d5ec780e0b4fc381a7d275", size = 3453292, upload-time = "2026-02-18T18:40:56.818Z" }, + { url = "https://files.pythonhosted.org/packages/31/29/e09d5630578a50a2b3fa154990b6b839cf95327aa0709e2d50d0b6816cd1/numba-0.64.0-cp311-cp311-win_amd64.whl", hash = "sha256:f2b1fd93e7aaac07d6fbaed059c00679f591f2423885c206d8c1b55d65ca3f2d", size = 2749824, upload-time = "2026-02-18T18:40:58.392Z" }, + { url = "https://files.pythonhosted.org/packages/70/a6/9fc52cb4f0d5e6d8b5f4d81615bc01012e3cf24e1052a60f17a68deb8092/numba-0.64.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:69440a8e8bc1a81028446f06b363e28635aa67bd51b1e498023f03b812e0ce68", size = 2683418, upload-time = "2026-02-18T18:40:59.886Z" }, + { url = "https://files.pythonhosted.org/packages/9b/89/1a74ea99b180b7a5587b0301ed1b183a2937c4b4b67f7994689b5d36fc34/numba-0.64.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13721011f693ba558b8dd4e4db7f2640462bba1b855bdc804be45bbeb55031a", size = 3804087, upload-time = "2026-02-18T18:41:01.699Z" }, + { url = "https://files.pythonhosted.org/packages/91/e1/583c647404b15f807410510fec1eb9b80cb8474165940b7749f026f21cbc/numba-0.64.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e0b180b1133f2b5d8b3f09d96b6d7a9e51a7da5dda3c09e998b5bcfac85d222c", size = 3504309, upload-time = "2026-02-18T18:41:03.252Z" }, + { url = "https://files.pythonhosted.org/packages/85/23/0fce5789b8a5035e7ace21216a468143f3144e02013252116616c58339aa/numba-0.64.0-cp312-cp312-win_amd64.whl", hash = "sha256:e63dc94023b47894849b8b106db28ccb98b49d5498b98878fac1a38f83ac007a", size = 2752740, upload-time = "2026-02-18T18:41:05.097Z" }, + { url = "https://files.pythonhosted.org/packages/52/80/2734de90f9300a6e2503b35ee50d9599926b90cbb7ac54f9e40074cd07f1/numba-0.64.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:3bab2c872194dcd985f1153b70782ec0fbbe348fffef340264eacd3a76d59fd6", size = 2683392, upload-time = "2026-02-18T18:41:06.563Z" }, + { url = "https://files.pythonhosted.org/packages/42/e8/14b5853ebefd5b37723ef365c5318a30ce0702d39057eaa8d7d76392859d/numba-0.64.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:703a246c60832cad231d2e73c1182f25bf3cc8b699759ec8fe58a2dbc689a70c", size = 3812245, upload-time = "2026-02-18T18:41:07.963Z" }, + { url = "https://files.pythonhosted.org/packages/8a/a2/f60dc6c96d19b7185144265a5fbf01c14993d37ff4cd324b09d0212aa7ce/numba-0.64.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7e2e49a7900ee971d32af7609adc0cfe6aa7477c6f6cccdf6d8138538cf7756f", size = 3511328, upload-time = "2026-02-18T18:41:09.504Z" }, + { url = "https://files.pythonhosted.org/packages/9c/2a/fe7003ea7e7237ee7014f8eaeeb7b0d228a2db22572ca85bab2648cf52cb/numba-0.64.0-cp313-cp313-win_amd64.whl", hash = "sha256:396f43c3f77e78d7ec84cdfc6b04969c78f8f169351b3c4db814b97e7acf4245", size = 2752668, upload-time = "2026-02-18T18:41:11.455Z" }, + { url = "https://files.pythonhosted.org/packages/3d/8a/77d26afe0988c592dd97cb8d4e80bfb3dfc7dbdacfca7d74a7c5c81dd8c2/numba-0.64.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:f565d55eaeff382cbc86c63c8c610347453af3d1e7afb2b6569aac1c9b5c93ce", size = 2683590, upload-time = "2026-02-18T18:41:12.897Z" }, + { url = "https://files.pythonhosted.org/packages/8e/4b/600b8b7cdbc7f9cebee9ea3d13bb70052a79baf28944024ffcb59f0712e3/numba-0.64.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9b55169b18892c783f85e9ad9e6f5297a6d12967e4414e6b71361086025ff0bb", size = 3781163, upload-time = "2026-02-18T18:41:15.377Z" }, + { url = "https://files.pythonhosted.org/packages/ff/73/53f2d32bfa45b7175e9944f6b816d8c32840178c3eee9325033db5bf838e/numba-0.64.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:196bcafa02c9dd1707e068434f6d5cedde0feb787e3432f7f1f0e993cc336c4c", size = 3481172, upload-time = "2026-02-18T18:41:17.281Z" }, + { url = "https://files.pythonhosted.org/packages/b5/00/aebd2f7f1e11e38814bb96e95a27580817a7b340608d3ac085fdbab83174/numba-0.64.0-cp314-cp314-win_amd64.whl", hash = "sha256:213e9acbe7f1c05090592e79020315c1749dd52517b90e94c517dca3f014d4a1", size = 2754700, upload-time = "2026-02-18T18:41:19.277Z" }, ] [[package]] name = "numpy" version = "2.2.6" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version < '3.11'", +] sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/9a/3e/ed6db5be21ce87955c0cbd3009f2803f59fa08df21b5df06862e2d8e2bdd/numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb", size = 21165245, upload-time = "2025-05-17T21:27:58.555Z" }, @@ -3813,6 +3829,91 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload-time = "2025-05-17T21:45:31.426Z" }, ] +[[package]] +name = "numpy" +version = "2.4.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version == '3.13.*'", + "python_full_version == '3.12.*'", + "python_full_version == '3.11.*'", +] +sdist = { url = "https://files.pythonhosted.org/packages/57/fd/0005efbd0af48e55eb3c7208af93f2862d4b1a56cd78e84309a2d959208d/numpy-2.4.2.tar.gz", hash = "sha256:659a6107e31a83c4e33f763942275fd278b21d095094044eb35569e86a21ddae", size = 20723651, upload-time = "2026-01-31T23:13:10.135Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/44/71852273146957899753e69986246d6a176061ea183407e95418c2aa4d9a/numpy-2.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825", size = 16955478, upload-time = "2026-01-31T23:10:25.623Z" }, + { url = "https://files.pythonhosted.org/packages/74/41/5d17d4058bd0cd96bcbd4d9ff0fb2e21f52702aab9a72e4a594efa18692f/numpy-2.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7edc794af8b36ca37ef5fcb5e0d128c7e0595c7b96a2318d1badb6fcd8ee86b1", size = 14965467, upload-time = "2026-01-31T23:10:28.186Z" }, + { url = "https://files.pythonhosted.org/packages/49/48/fb1ce8136c19452ed15f033f8aee91d5defe515094e330ce368a0647846f/numpy-2.4.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:6e9f61981ace1360e42737e2bae58b27bf28a1b27e781721047d84bd754d32e7", size = 5475172, upload-time = "2026-01-31T23:10:30.848Z" }, + { url = "https://files.pythonhosted.org/packages/40/a9/3feb49f17bbd1300dd2570432961f5c8a4ffeff1db6f02c7273bd020a4c9/numpy-2.4.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:cb7bbb88aa74908950d979eeaa24dbdf1a865e3c7e45ff0121d8f70387b55f73", size = 6805145, upload-time = "2026-01-31T23:10:32.352Z" }, + { url = "https://files.pythonhosted.org/packages/3f/39/fdf35cbd6d6e2fcad42fcf85ac04a85a0d0fbfbf34b30721c98d602fd70a/numpy-2.4.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f069069931240b3fc703f1e23df63443dbd6390614c8c44a87d96cd0ec81eb1", size = 15966084, upload-time = "2026-01-31T23:10:34.502Z" }, + { url = "https://files.pythonhosted.org/packages/1b/46/6fa4ea94f1ddf969b2ee941290cca6f1bfac92b53c76ae5f44afe17ceb69/numpy-2.4.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c02ef4401a506fb60b411467ad501e1429a3487abca4664871d9ae0b46c8ba32", size = 16899477, upload-time = "2026-01-31T23:10:37.075Z" }, + { url = "https://files.pythonhosted.org/packages/09/a1/2a424e162b1a14a5bd860a464ab4e07513916a64ab1683fae262f735ccd2/numpy-2.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2653de5c24910e49c2b106499803124dde62a5a1fe0eedeaecf4309a5f639390", size = 17323429, upload-time = "2026-01-31T23:10:39.704Z" }, + { url = "https://files.pythonhosted.org/packages/ce/a2/73014149ff250628df72c58204822ac01d768697913881aacf839ff78680/numpy-2.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1ae241bbfc6ae276f94a170b14785e561cb5e7f626b6688cf076af4110887413", size = 18635109, upload-time = "2026-01-31T23:10:41.924Z" }, + { url = "https://files.pythonhosted.org/packages/6c/0c/73e8be2f1accd56df74abc1c5e18527822067dced5ec0861b5bb882c2ce0/numpy-2.4.2-cp311-cp311-win32.whl", hash = "sha256:df1b10187212b198dd45fa943d8985a3c8cf854aed4923796e0e019e113a1bda", size = 6237915, upload-time = "2026-01-31T23:10:45.26Z" }, + { url = "https://files.pythonhosted.org/packages/76/ae/e0265e0163cf127c24c3969d29f1c4c64551a1e375d95a13d32eab25d364/numpy-2.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:b9c618d56a29c9cb1c4da979e9899be7578d2e0b3c24d52079c166324c9e8695", size = 12607972, upload-time = "2026-01-31T23:10:47.021Z" }, + { url = "https://files.pythonhosted.org/packages/29/a5/c43029af9b8014d6ea157f192652c50042e8911f4300f8f6ed3336bf437f/numpy-2.4.2-cp311-cp311-win_arm64.whl", hash = "sha256:47c5a6ed21d9452b10227e5e8a0e1c22979811cad7dcc19d8e3e2fb8fa03f1a3", size = 10485763, upload-time = "2026-01-31T23:10:50.087Z" }, + { url = "https://files.pythonhosted.org/packages/51/6e/6f394c9c77668153e14d4da83bcc247beb5952f6ead7699a1a2992613bea/numpy-2.4.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:21982668592194c609de53ba4933a7471880ccbaadcc52352694a59ecc860b3a", size = 16667963, upload-time = "2026-01-31T23:10:52.147Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f8/55483431f2b2fd015ae6ed4fe62288823ce908437ed49db5a03d15151678/numpy-2.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40397bda92382fcec844066efb11f13e1c9a3e2a8e8f318fb72ed8b6db9f60f1", size = 14693571, upload-time = "2026-01-31T23:10:54.789Z" }, + { url = "https://files.pythonhosted.org/packages/2f/20/18026832b1845cdc82248208dd929ca14c9d8f2bac391f67440707fff27c/numpy-2.4.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:b3a24467af63c67829bfaa61eecf18d5432d4f11992688537be59ecd6ad32f5e", size = 5203469, upload-time = "2026-01-31T23:10:57.343Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/2eb97c8a77daaba34eaa3fa7241a14ac5f51c46a6bd5911361b644c4a1e2/numpy-2.4.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:805cc8de9fd6e7a22da5aed858e0ab16be5a4db6c873dde1d7451c541553aa27", size = 6550820, upload-time = "2026-01-31T23:10:59.429Z" }, + { url = "https://files.pythonhosted.org/packages/b1/91/b97fdfd12dc75b02c44e26c6638241cc004d4079a0321a69c62f51470c4c/numpy-2.4.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d82351358ffbcdcd7b686b90742a9b86632d6c1c051016484fa0b326a0a1548", size = 15663067, upload-time = "2026-01-31T23:11:01.291Z" }, + { url = "https://files.pythonhosted.org/packages/f5/c6/a18e59f3f0b8071cc85cbc8d80cd02d68aa9710170b2553a117203d46936/numpy-2.4.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e35d3e0144137d9fdae62912e869136164534d64a169f86438bc9561b6ad49f", size = 16619782, upload-time = "2026-01-31T23:11:03.669Z" }, + { url = "https://files.pythonhosted.org/packages/b7/83/9751502164601a79e18847309f5ceec0b1446d7b6aa12305759b72cf98b2/numpy-2.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adb6ed2ad29b9e15321d167d152ee909ec73395901b70936f029c3bc6d7f4460", size = 17013128, upload-time = "2026-01-31T23:11:05.913Z" }, + { url = "https://files.pythonhosted.org/packages/61/c4/c4066322256ec740acc1c8923a10047818691d2f8aec254798f3dd90f5f2/numpy-2.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8906e71fd8afcb76580404e2a950caef2685df3d2a57fe82a86ac8d33cc007ba", size = 18345324, upload-time = "2026-01-31T23:11:08.248Z" }, + { url = "https://files.pythonhosted.org/packages/ab/af/6157aa6da728fa4525a755bfad486ae7e3f76d4c1864138003eb84328497/numpy-2.4.2-cp312-cp312-win32.whl", hash = "sha256:ec055f6dae239a6299cace477b479cca2fc125c5675482daf1dd886933a1076f", size = 5960282, upload-time = "2026-01-31T23:11:10.497Z" }, + { url = "https://files.pythonhosted.org/packages/92/0f/7ceaaeaacb40567071e94dbf2c9480c0ae453d5bb4f52bea3892c39dc83c/numpy-2.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:209fae046e62d0ce6435fcfe3b1a10537e858249b3d9b05829e2a05218296a85", size = 12314210, upload-time = "2026-01-31T23:11:12.176Z" }, + { url = "https://files.pythonhosted.org/packages/2f/a3/56c5c604fae6dd40fa2ed3040d005fca97e91bd320d232ac9931d77ba13c/numpy-2.4.2-cp312-cp312-win_arm64.whl", hash = "sha256:fbde1b0c6e81d56f5dccd95dd4a711d9b95df1ae4009a60887e56b27e8d903fa", size = 10220171, upload-time = "2026-01-31T23:11:14.684Z" }, + { url = "https://files.pythonhosted.org/packages/a1/22/815b9fe25d1d7ae7d492152adbc7226d3eff731dffc38fe970589fcaaa38/numpy-2.4.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:25f2059807faea4b077a2b6837391b5d830864b3543627f381821c646f31a63c", size = 16663696, upload-time = "2026-01-31T23:11:17.516Z" }, + { url = "https://files.pythonhosted.org/packages/09/f0/817d03a03f93ba9c6c8993de509277d84e69f9453601915e4a69554102a1/numpy-2.4.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bd3a7a9f5847d2fb8c2c6d1c862fa109c31a9abeca1a3c2bd5a64572955b2979", size = 14688322, upload-time = "2026-01-31T23:11:19.883Z" }, + { url = "https://files.pythonhosted.org/packages/da/b4/f805ab79293c728b9a99438775ce51885fd4f31b76178767cfc718701a39/numpy-2.4.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8e4549f8a3c6d13d55041925e912bfd834285ef1dd64d6bc7d542583355e2e98", size = 5198157, upload-time = "2026-01-31T23:11:22.375Z" }, + { url = "https://files.pythonhosted.org/packages/74/09/826e4289844eccdcd64aac27d13b0fd3f32039915dd5b9ba01baae1f436c/numpy-2.4.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:aea4f66ff44dfddf8c2cffd66ba6538c5ec67d389285292fe428cb2c738c8aef", size = 6546330, upload-time = "2026-01-31T23:11:23.958Z" }, + { url = "https://files.pythonhosted.org/packages/19/fb/cbfdbfa3057a10aea5422c558ac57538e6acc87ec1669e666d32ac198da7/numpy-2.4.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3cd545784805de05aafe1dde61752ea49a359ccba9760c1e5d1c88a93bbf2b7", size = 15660968, upload-time = "2026-01-31T23:11:25.713Z" }, + { url = "https://files.pythonhosted.org/packages/04/dc/46066ce18d01645541f0186877377b9371b8fa8017fa8262002b4ef22612/numpy-2.4.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0d9b7c93578baafcbc5f0b83eaf17b79d345c6f36917ba0c67f45226911d499", size = 16607311, upload-time = "2026-01-31T23:11:28.117Z" }, + { url = "https://files.pythonhosted.org/packages/14/d9/4b5adfc39a43fa6bf918c6d544bc60c05236cc2f6339847fc5b35e6cb5b0/numpy-2.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f74f0f7779cc7ae07d1810aab8ac6b1464c3eafb9e283a40da7309d5e6e48fbb", size = 17012850, upload-time = "2026-01-31T23:11:30.888Z" }, + { url = "https://files.pythonhosted.org/packages/b7/20/adb6e6adde6d0130046e6fdfb7675cc62bc2f6b7b02239a09eb58435753d/numpy-2.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:c7ac672d699bf36275c035e16b65539931347d68b70667d28984c9fb34e07fa7", size = 18334210, upload-time = "2026-01-31T23:11:33.214Z" }, + { url = "https://files.pythonhosted.org/packages/78/0e/0a73b3dff26803a8c02baa76398015ea2a5434d9b8265a7898a6028c1591/numpy-2.4.2-cp313-cp313-win32.whl", hash = "sha256:8e9afaeb0beff068b4d9cd20d322ba0ee1cecfb0b08db145e4ab4dd44a6b5110", size = 5958199, upload-time = "2026-01-31T23:11:35.385Z" }, + { url = "https://files.pythonhosted.org/packages/43/bc/6352f343522fcb2c04dbaf94cb30cca6fd32c1a750c06ad6231b4293708c/numpy-2.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:7df2de1e4fba69a51c06c28f5a3de36731eb9639feb8e1cf7e4a7b0daf4cf622", size = 12310848, upload-time = "2026-01-31T23:11:38.001Z" }, + { url = "https://files.pythonhosted.org/packages/6e/8d/6da186483e308da5da1cc6918ce913dcfe14ffde98e710bfeff2a6158d4e/numpy-2.4.2-cp313-cp313-win_arm64.whl", hash = "sha256:0fece1d1f0a89c16b03442eae5c56dc0be0c7883b5d388e0c03f53019a4bfd71", size = 10221082, upload-time = "2026-01-31T23:11:40.392Z" }, + { url = "https://files.pythonhosted.org/packages/25/a1/9510aa43555b44781968935c7548a8926274f815de42ad3997e9e83680dd/numpy-2.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5633c0da313330fd20c484c78cdd3f9b175b55e1a766c4a174230c6b70ad8262", size = 14815866, upload-time = "2026-01-31T23:11:42.495Z" }, + { url = "https://files.pythonhosted.org/packages/36/30/6bbb5e76631a5ae46e7923dd16ca9d3f1c93cfa8d4ed79a129814a9d8db3/numpy-2.4.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d9f64d786b3b1dd742c946c42d15b07497ed14af1a1f3ce840cce27daa0ce913", size = 5325631, upload-time = "2026-01-31T23:11:44.7Z" }, + { url = "https://files.pythonhosted.org/packages/46/00/3a490938800c1923b567b3a15cd17896e68052e2145d8662aaf3e1ffc58f/numpy-2.4.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:b21041e8cb6a1eb5312dd1d2f80a94d91efffb7a06b70597d44f1bd2dfc315ab", size = 6646254, upload-time = "2026-01-31T23:11:46.341Z" }, + { url = "https://files.pythonhosted.org/packages/d3/e9/fac0890149898a9b609caa5af7455a948b544746e4b8fe7c212c8edd71f8/numpy-2.4.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:00ab83c56211a1d7c07c25e3217ea6695e50a3e2f255053686b081dc0b091a82", size = 15720138, upload-time = "2026-01-31T23:11:48.082Z" }, + { url = "https://files.pythonhosted.org/packages/ea/5c/08887c54e68e1e28df53709f1893ce92932cc6f01f7c3d4dc952f61ffd4e/numpy-2.4.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2fb882da679409066b4603579619341c6d6898fc83a8995199d5249f986e8e8f", size = 16655398, upload-time = "2026-01-31T23:11:50.293Z" }, + { url = "https://files.pythonhosted.org/packages/4d/89/253db0fa0e66e9129c745e4ef25631dc37d5f1314dad2b53e907b8538e6d/numpy-2.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:66cb9422236317f9d44b67b4d18f44efe6e9c7f8794ac0462978513359461554", size = 17079064, upload-time = "2026-01-31T23:11:52.927Z" }, + { url = "https://files.pythonhosted.org/packages/2a/d5/cbade46ce97c59c6c3da525e8d95b7abe8a42974a1dc5c1d489c10433e88/numpy-2.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0f01dcf33e73d80bd8dc0f20a71303abbafa26a19e23f6b68d1aa9990af90257", size = 18379680, upload-time = "2026-01-31T23:11:55.22Z" }, + { url = "https://files.pythonhosted.org/packages/40/62/48f99ae172a4b63d981babe683685030e8a3df4f246c893ea5c6ef99f018/numpy-2.4.2-cp313-cp313t-win32.whl", hash = "sha256:52b913ec40ff7ae845687b0b34d8d93b60cb66dcee06996dd5c99f2fc9328657", size = 6082433, upload-time = "2026-01-31T23:11:58.096Z" }, + { url = "https://files.pythonhosted.org/packages/07/38/e054a61cfe48ad9f1ed0d188e78b7e26859d0b60ef21cd9de4897cdb5326/numpy-2.4.2-cp313-cp313t-win_amd64.whl", hash = "sha256:5eea80d908b2c1f91486eb95b3fb6fab187e569ec9752ab7d9333d2e66bf2d6b", size = 12451181, upload-time = "2026-01-31T23:11:59.782Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a4/a05c3a6418575e185dd84d0b9680b6bb2e2dc3e4202f036b7b4e22d6e9dc/numpy-2.4.2-cp313-cp313t-win_arm64.whl", hash = "sha256:fd49860271d52127d61197bb50b64f58454e9f578cb4b2c001a6de8b1f50b0b1", size = 10290756, upload-time = "2026-01-31T23:12:02.438Z" }, + { url = "https://files.pythonhosted.org/packages/18/88/b7df6050bf18fdcfb7046286c6535cabbdd2064a3440fca3f069d319c16e/numpy-2.4.2-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:444be170853f1f9d528428eceb55f12918e4fda5d8805480f36a002f1415e09b", size = 16663092, upload-time = "2026-01-31T23:12:04.521Z" }, + { url = "https://files.pythonhosted.org/packages/25/7a/1fee4329abc705a469a4afe6e69b1ef7e915117747886327104a8493a955/numpy-2.4.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:d1240d50adff70c2a88217698ca844723068533f3f5c5fa6ee2e3220e3bdb000", size = 14698770, upload-time = "2026-01-31T23:12:06.96Z" }, + { url = "https://files.pythonhosted.org/packages/fb/0b/f9e49ba6c923678ad5bc38181c08ac5e53b7a5754dbca8e581aa1a56b1ff/numpy-2.4.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:7cdde6de52fb6664b00b056341265441192d1291c130e99183ec0d4b110ff8b1", size = 5208562, upload-time = "2026-01-31T23:12:09.632Z" }, + { url = "https://files.pythonhosted.org/packages/7d/12/d7de8f6f53f9bb76997e5e4c069eda2051e3fe134e9181671c4391677bb2/numpy-2.4.2-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:cda077c2e5b780200b6b3e09d0b42205a3d1c68f30c6dceb90401c13bff8fe74", size = 6543710, upload-time = "2026-01-31T23:12:11.969Z" }, + { url = "https://files.pythonhosted.org/packages/09/63/c66418c2e0268a31a4cf8a8b512685748200f8e8e8ec6c507ce14e773529/numpy-2.4.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d30291931c915b2ab5717c2974bb95ee891a1cf22ebc16a8006bd59cd210d40a", size = 15677205, upload-time = "2026-01-31T23:12:14.33Z" }, + { url = "https://files.pythonhosted.org/packages/5d/6c/7f237821c9642fb2a04d2f1e88b4295677144ca93285fd76eff3bcba858d/numpy-2.4.2-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bba37bc29d4d85761deed3954a1bc62be7cf462b9510b51d367b769a8c8df325", size = 16611738, upload-time = "2026-01-31T23:12:16.525Z" }, + { url = "https://files.pythonhosted.org/packages/c2/a7/39c4cdda9f019b609b5c473899d87abff092fc908cfe4d1ecb2fcff453b0/numpy-2.4.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:b2f0073ed0868db1dcd86e052d37279eef185b9c8db5bf61f30f46adac63c909", size = 17028888, upload-time = "2026-01-31T23:12:19.306Z" }, + { url = "https://files.pythonhosted.org/packages/da/b3/e84bb64bdfea967cc10950d71090ec2d84b49bc691df0025dddb7c26e8e3/numpy-2.4.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7f54844851cdb630ceb623dcec4db3240d1ac13d4990532446761baede94996a", size = 18339556, upload-time = "2026-01-31T23:12:21.816Z" }, + { url = "https://files.pythonhosted.org/packages/88/f5/954a291bc1192a27081706862ac62bb5920fbecfbaa302f64682aa90beed/numpy-2.4.2-cp314-cp314-win32.whl", hash = "sha256:12e26134a0331d8dbd9351620f037ec470b7c75929cb8a1537f6bfe411152a1a", size = 6006899, upload-time = "2026-01-31T23:12:24.14Z" }, + { url = "https://files.pythonhosted.org/packages/05/cb/eff72a91b2efdd1bc98b3b8759f6a1654aa87612fc86e3d87d6fe4f948c4/numpy-2.4.2-cp314-cp314-win_amd64.whl", hash = "sha256:068cdb2d0d644cdb45670810894f6a0600797a69c05f1ac478e8d31670b8ee75", size = 12443072, upload-time = "2026-01-31T23:12:26.33Z" }, + { url = "https://files.pythonhosted.org/packages/37/75/62726948db36a56428fce4ba80a115716dc4fad6a3a4352487f8bb950966/numpy-2.4.2-cp314-cp314-win_arm64.whl", hash = "sha256:6ed0be1ee58eef41231a5c943d7d1375f093142702d5723ca2eb07db9b934b05", size = 10494886, upload-time = "2026-01-31T23:12:28.488Z" }, + { url = "https://files.pythonhosted.org/packages/36/2f/ee93744f1e0661dc267e4b21940870cabfae187c092e1433b77b09b50ac4/numpy-2.4.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:98f16a80e917003a12c0580f97b5f875853ebc33e2eaa4bccfc8201ac6869308", size = 14818567, upload-time = "2026-01-31T23:12:30.709Z" }, + { url = "https://files.pythonhosted.org/packages/a7/24/6535212add7d76ff938d8bdc654f53f88d35cddedf807a599e180dcb8e66/numpy-2.4.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:20abd069b9cda45874498b245c8015b18ace6de8546bf50dfa8cea1696ed06ef", size = 5328372, upload-time = "2026-01-31T23:12:32.962Z" }, + { url = "https://files.pythonhosted.org/packages/5e/9d/c48f0a035725f925634bf6b8994253b43f2047f6778a54147d7e213bc5a7/numpy-2.4.2-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:e98c97502435b53741540a5717a6749ac2ada901056c7db951d33e11c885cc7d", size = 6649306, upload-time = "2026-01-31T23:12:34.797Z" }, + { url = "https://files.pythonhosted.org/packages/81/05/7c73a9574cd4a53a25907bad38b59ac83919c0ddc8234ec157f344d57d9a/numpy-2.4.2-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:da6cad4e82cb893db4b69105c604d805e0c3ce11501a55b5e9f9083b47d2ffe8", size = 15722394, upload-time = "2026-01-31T23:12:36.565Z" }, + { url = "https://files.pythonhosted.org/packages/35/fa/4de10089f21fc7d18442c4a767ab156b25c2a6eaf187c0db6d9ecdaeb43f/numpy-2.4.2-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e4424677ce4b47fe73c8b5556d876571f7c6945d264201180db2dc34f676ab5", size = 16653343, upload-time = "2026-01-31T23:12:39.188Z" }, + { url = "https://files.pythonhosted.org/packages/b8/f9/d33e4ffc857f3763a57aa85650f2e82486832d7492280ac21ba9efda80da/numpy-2.4.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2b8f157c8a6f20eb657e240f8985cc135598b2b46985c5bccbde7616dc9c6b1e", size = 17078045, upload-time = "2026-01-31T23:12:42.041Z" }, + { url = "https://files.pythonhosted.org/packages/c8/b8/54bdb43b6225badbea6389fa038c4ef868c44f5890f95dd530a218706da3/numpy-2.4.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5daf6f3914a733336dab21a05cdec343144600e964d2fcdabaac0c0269874b2a", size = 18380024, upload-time = "2026-01-31T23:12:44.331Z" }, + { url = "https://files.pythonhosted.org/packages/a5/55/6e1a61ded7af8df04016d81b5b02daa59f2ea9252ee0397cb9f631efe9e5/numpy-2.4.2-cp314-cp314t-win32.whl", hash = "sha256:8c50dd1fc8826f5b26a5ee4d77ca55d88a895f4e4819c7ecc2a9f5905047a443", size = 6153937, upload-time = "2026-01-31T23:12:47.229Z" }, + { url = "https://files.pythonhosted.org/packages/45/aa/fa6118d1ed6d776b0983f3ceac9b1a5558e80df9365b1c3aa6d42bf9eee4/numpy-2.4.2-cp314-cp314t-win_amd64.whl", hash = "sha256:fcf92bee92742edd401ba41135185866f7026c502617f422eb432cfeca4fe236", size = 12631844, upload-time = "2026-01-31T23:12:48.997Z" }, + { url = "https://files.pythonhosted.org/packages/32/0a/2ec5deea6dcd158f254a7b372fb09cfba5719419c8d66343bab35237b3fb/numpy-2.4.2-cp314-cp314t-win_arm64.whl", hash = "sha256:1f92f53998a17265194018d1cc321b2e96e900ca52d54c7c77837b71b9465181", size = 10565379, upload-time = "2026-01-31T23:12:51.345Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f8/50e14d36d915ef64d8f8bc4a087fc8264d82c785eda6711f80ab7e620335/numpy-2.4.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:89f7268c009bc492f506abd6f5265defa7cb3f7487dc21d357c3d290add45082", size = 16833179, upload-time = "2026-01-31T23:12:53.5Z" }, + { url = "https://files.pythonhosted.org/packages/17/17/809b5cad63812058a8189e91a1e2d55a5a18fd04611dbad244e8aeae465c/numpy-2.4.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:e6dee3bb76aa4009d5a912180bf5b2de012532998d094acee25d9cb8dee3e44a", size = 14889755, upload-time = "2026-01-31T23:12:55.933Z" }, + { url = "https://files.pythonhosted.org/packages/3e/ea/181b9bcf7627fc8371720316c24db888dcb9829b1c0270abf3d288b2e29b/numpy-2.4.2-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:cd2bd2bbed13e213d6b55dc1d035a4f91748a7d3edc9480c13898b0353708920", size = 5399500, upload-time = "2026-01-31T23:12:58.671Z" }, + { url = "https://files.pythonhosted.org/packages/33/9f/413adf3fc955541ff5536b78fcf0754680b3c6d95103230252a2c9408d23/numpy-2.4.2-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:cf28c0c1d4c4bf00f509fa7eb02c58d7caf221b50b467bcb0d9bbf1584d5c821", size = 6714252, upload-time = "2026-01-31T23:13:00.518Z" }, + { url = "https://files.pythonhosted.org/packages/91/da/643aad274e29ccbdf42ecd94dafe524b81c87bcb56b83872d54827f10543/numpy-2.4.2-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e04ae107ac591763a47398bb45b568fc38f02dbc4aa44c063f67a131f99346cb", size = 15797142, upload-time = "2026-01-31T23:13:02.219Z" }, + { url = "https://files.pythonhosted.org/packages/66/27/965b8525e9cb5dc16481b30a1b3c21e50c7ebf6e9dbd48d0c4d0d5089c7e/numpy-2.4.2-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:602f65afdef699cda27ec0b9224ae5dc43e328f4c24c689deaf77133dbee74d0", size = 16727979, upload-time = "2026-01-31T23:13:04.62Z" }, + { url = "https://files.pythonhosted.org/packages/de/e5/b7d20451657664b07986c2f6e3be564433f5dcaf3482d68eaecd79afaf03/numpy-2.4.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:be71bf1edb48ebbbf7f6337b5bfd2f895d1902f6335a5830b20141fc126ffba0", size = 12502577, upload-time = "2026-01-31T23:13:07.08Z" }, +] + [[package]] name = "nvidia-cublas-cu12" version = "12.8.4.1" @@ -3968,7 +4069,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "coloredlogs" }, { name = "flatbuffers" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "packaging" }, { name = "protobuf" }, { name = "sympy" }, @@ -4022,7 +4124,8 @@ name = "opencv-python" version = "4.13.0.92" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/fc/6f/5a28fef4c4a382be06afe3938c64cc168223016fa520c5abaf37e8862aa5/opencv_python-4.13.0.92-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:caf60c071ec391ba51ed00a4a920f996d0b64e3e46068aac1f646b5de0326a19", size = 46247052, upload-time = "2026-02-05T07:01:25.046Z" }, @@ -4391,7 +4494,8 @@ dependencies = [ { name = "markdown" }, { name = "nltk" }, { name = "numba" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "onnxruntime" }, { name = "openai" }, { name = "pillow" }, @@ -4509,10 +4613,6 @@ local-smart-turn = [ { name = "torchaudio" }, { name = "transformers" }, ] -local-smart-turn-v3 = [ - { name = "onnxruntime" }, - { name = "transformers" }, -] mcp = [ { name = "mcp", extra = ["cli"] }, ] @@ -4548,9 +4648,6 @@ piper = [ { name = "piper-tts" }, { name = "requests" }, ] -playht = [ - { name = "websockets" }, -] resembleai = [ { name = "websockets" }, ] @@ -4643,7 +4740,7 @@ docs = [ { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, { name = "sphinx-autodoc-typehints", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "sphinx-autodoc-typehints", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.11.*'" }, - { name = "sphinx-autodoc-typehints", version = "3.6.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, + { name = "sphinx-autodoc-typehints", version = "3.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, { name = "sphinx-markdown-builder" }, { name = "sphinx-rtd-theme" }, { name = "toml" }, @@ -4690,12 +4787,12 @@ requires-dist = [ { name = "mcp", extras = ["cli"], marker = "extra == 'mcp'", specifier = ">=1.11.0,<2" }, { name = "mem0ai", marker = "extra == 'mem0'", specifier = "~=0.1.94" }, { name = "mlx-whisper", marker = "extra == 'mlx-whisper'", specifier = "~=0.4.2" }, - { name = "nltk", specifier = ">=3.9.1,<4" }, + { name = "nltk", specifier = ">=3.9.3,<4" }, { name = "noisereduce", marker = "extra == 'noisereduce'", specifier = "~=3.0.3" }, - { name = "numba", specifier = "==0.61.2" }, + { name = "numba", specifier = ">=0.61.2" }, { name = "numpy", specifier = ">=1.26.4,<3" }, { name = "nvidia-riva-client", marker = "extra == 'nvidia'", specifier = "~=2.21.1" }, - { name = "onnxruntime", marker = "extra == 'local-smart-turn-v3'", specifier = "~=1.23.2" }, + { name = "onnxruntime", specifier = "~=1.23.2" }, { name = "onnxruntime", marker = "extra == 'silero'", specifier = "~=1.23.2" }, { name = "openai", specifier = ">=1.74.0,<3" }, { name = "opencv-python", marker = "extra == 'webrtc'", specifier = ">=4.11.0.86,<5" }, @@ -4705,7 +4802,6 @@ requires-dist = [ { name = "opentelemetry-sdk", marker = "extra == 'tracing'", specifier = ">=1.33.0" }, { name = "ormsgpack", marker = "extra == 'fish'", specifier = "~=1.7.0" }, { name = "pillow", specifier = ">=11.1.0,<13" }, - { name = "pipecat-ai", extras = ["local-smart-turn-v3"] }, { name = "pipecat-ai", extras = ["nvidia"], marker = "extra == 'riva'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'assemblyai'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'asyncai'" }, @@ -4721,7 +4817,6 @@ requires-dist = [ { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'lmnt'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'neuphonic'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'openai'" }, - { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'playht'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'resembleai'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'rime'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'sarvam'" }, @@ -4729,7 +4824,7 @@ requires-dist = [ { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'ultravox'" }, { name = "pipecat-ai", extras = ["websockets-base"], marker = "extra == 'websocket'" }, { name = "pipecat-ai-krisp", marker = "extra == 'krisp'", specifier = "~=0.4.0" }, - { name = "pipecat-ai-small-webrtc-prebuilt", marker = "extra == 'runner'", specifier = ">=2.2.0" }, + { name = "pipecat-ai-small-webrtc-prebuilt", marker = "extra == 'runner'", specifier = ">=2.3.0" }, { name = "piper-tts", marker = "extra == 'piper'", specifier = ">=1.3.0,<2" }, { name = "protobuf", specifier = "~=5.29.6" }, { name = "pvkoala", marker = "extra == 'koala'", specifier = "~=2.0.3" }, @@ -4755,14 +4850,14 @@ requires-dist = [ { name = "timm", marker = "extra == 'moondream'", specifier = "~=1.0.13" }, { name = "torch", marker = "extra == 'local-smart-turn'", specifier = ">=2.5.0,<3" }, { name = "torchaudio", marker = "extra == 'local-smart-turn'", specifier = ">=2.5.0,<3" }, + { name = "transformers" }, { name = "transformers", marker = "extra == 'local-smart-turn'" }, - { name = "transformers", marker = "extra == 'local-smart-turn-v3'" }, { name = "transformers", marker = "extra == 'moondream'", specifier = ">=4.48.0" }, { name = "uvicorn", marker = "extra == 'runner'", specifier = ">=0.32.0,<1.0.0" }, { name = "wait-for2", marker = "python_full_version < '3.12'", specifier = ">=0.4.1" }, { name = "websockets", marker = "extra == 'websockets-base'", specifier = ">=13.1,<16.0" }, ] -provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "kokoro", "krisp", "langchain", "lemonslice", "livekit", "lmnt", "local", "local-smart-turn", "local-smart-turn-v3", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "piper", "playht", "qwen", "remote-smart-turn", "resembleai", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"] +provides-extras = ["aic", "anthropic", "assemblyai", "asyncai", "aws", "aws-nova-sonic", "azure", "cartesia", "camb", "cerebras", "daily", "deepgram", "deepseek", "elevenlabs", "fal", "fireworks", "fish", "gladia", "google", "gradium", "grok", "groq", "gstreamer", "heygen", "hume", "inworld", "koala", "kokoro", "krisp", "langchain", "livekit", "lmnt", "local", "local-smart-turn", "mcp", "mem0", "mistral", "mlx-whisper", "moondream", "neuphonic", "noisereduce", "nvidia", "openai", "rnnoise", "openpipe", "openrouter", "perplexity", "piper", "qwen", "remote-smart-turn", "resembleai", "rime", "riva", "runner", "sagemaker", "sambanova", "sarvam", "sentry", "silero", "simli", "soniox", "soundfile", "speechmatics", "strands", "tavus", "together", "tracing", "ultravox", "webrtc", "websocket", "websockets-base", "whisper"] [package.metadata.requires-dev] dev = [ @@ -4794,20 +4889,21 @@ name = "pipecat-ai-krisp" version = "0.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/1d/37/0f1d11d1dc33234a36de01992a9e5adc3c5e1dce71cc87b2bf909fa2f698/pipecat_ai_krisp-0.4.0.tar.gz", hash = "sha256:4f0e05e218dcf15874957e9851299e219c713a0aa8353d2fd811f1b54001a602", size = 13338, upload-time = "2025-06-09T16:13:08.209Z" } [[package]] name = "pipecat-ai-small-webrtc-prebuilt" -version = "2.2.0" +version = "2.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "fastapi", extra = ["all"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9a/9f/b06cc0e2eaeda811959c216dade3ed38c30d20e6327a2b22f80125072c5a/pipecat_ai_small_webrtc_prebuilt-2.2.0.tar.gz", hash = "sha256:5d73fe619225b97e383863a901060d1c986f088f4de004477856b085aaba76c4", size = 466005, upload-time = "2026-02-13T19:28:54.626Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2e/5f/b0f73bbc6997c22655f0495ce21a4cb176e192df1b5407f66fad8101c697/pipecat_ai_small_webrtc_prebuilt-2.3.0.tar.gz", hash = "sha256:10dc31db9978d68001ae941066fe460c533412a8984df71e5416d4ebeb9c0371", size = 469001, upload-time = "2026-02-25T17:18:43.316Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/26/71/20a015cea25dc57129ed6426fdf37a09aefe37f4dd60e3a42ba2d9e3bd1b/pipecat_ai_small_webrtc_prebuilt-2.2.0-py3-none-any.whl", hash = "sha256:e7917d23f51e5418667541a3e241b2de28a43eea35a5a9486721be3da04e719d", size = 466257, upload-time = "2026-02-13T19:28:53.188Z" }, + { url = "https://files.pythonhosted.org/packages/5a/bc/6193b639a53f4bac1c0fe29b1f8e0d49085c60e457b02a01e725eb7c093f/pipecat_ai_small_webrtc_prebuilt-2.3.0-py3-none-any.whl", hash = "sha256:b3ddaff8bbd56746fe3c58a2d721d3ccc94d17a33c16d78dcbce73d7526c1a05", size = 468881, upload-time = "2026-02-25T17:18:41.869Z" }, ] [[package]] @@ -4858,7 +4954,7 @@ wheels = [ [[package]] name = "posthog" -version = "7.9.3" +version = "7.9.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "backoff" }, @@ -4868,9 +4964,9 @@ dependencies = [ { name = "six" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7e/06/bcffcd262c861695fbaa74490b872e37d6fc41d3dcc1a43207d20525522f/posthog-7.9.3.tar.gz", hash = "sha256:55f7580265d290936ac4c112a4e2031a41743be4f90d4183ac9f85b721ff13ae", size = 172336, upload-time = "2026-02-18T22:20:24.085Z" } +sdist = { url = "https://files.pythonhosted.org/packages/55/50/5c0d9232118fdc1434c1b7bbc1a14de5b310498ede09a7e2123ae1f5f8bd/posthog-7.9.4.tar.gz", hash = "sha256:50acc94ef6267d7030575d2ff54e89e748fac2e98525ac672aeb0423160f77cf", size = 172973, upload-time = "2026-02-25T15:28:47.065Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/7e/0e06a96823fa7c11ce73920e6ff77e82445db62ac4eae0b6f211edb4c4c2/posthog-7.9.3-py3-none-any.whl", hash = "sha256:2ddcacdef6c4afb124ebfcf27d7be58388943a7e24f8d4a51a52732c9b90bad6", size = 197819, upload-time = "2026-02-18T22:20:22.015Z" }, + { url = "https://files.pythonhosted.org/packages/df/6f/794a4e94e3640282e75013ce18e65f0a01afc8d71f733664b4a272f98bce/posthog-7.9.4-py3-none-any.whl", hash = "sha256:414125ddd7a48b9c67feb24d723df1f666af41ad10f8a9a8bbaf5e3b536a2e26", size = 198651, upload-time = "2026-02-25T15:28:45.398Z" }, ] [[package]] @@ -5395,9 +5491,10 @@ version = "0.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "future" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.17.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/75/b5/39d59c44ecd828fabfdbd796b50a561e6543ca90ef440ab307374f107856/pyloudnorm-0.1.1.tar.gz", hash = "sha256:63cd4e197dea4e7795160ea08ed02d318091bce883e436a6dbc5963326b71e1e", size = 8588, upload-time = "2023-01-05T16:11:28.601Z" } wheels = [ @@ -5465,7 +5562,8 @@ dependencies = [ { name = "audiolab" }, { name = "click" }, { name = "matplotlib" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "tqdm" }, ] wheels = [ @@ -5533,6 +5631,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] +[[package]] +name = "python-discovery" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/82/bb/93a3e83bdf9322c7e21cafd092e56a4a17c4d8ef4277b6eb01af1a540a6f/python_discovery-1.1.0.tar.gz", hash = "sha256:447941ba1aed8cc2ab7ee3cb91be5fc137c5bdbb05b7e6ea62fbdcb66e50b268", size = 55674, upload-time = "2026-02-26T09:42:49.668Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/54/82a6e2ef37f0f23dccac604b9585bdcbd0698604feb64807dcb72853693e/python_discovery-1.1.0-py3-none-any.whl", hash = "sha256:a162893b8809727f54594a99ad2179d2ede4bf953e12d4c7abc3cc9cdbd1437b", size = 30687, upload-time = "2026-02-26T09:42:48.548Z" }, +] + [[package]] name = "python-dotenv" version = "1.2.1" @@ -5683,20 +5794,21 @@ wheels = [ [[package]] name = "qdrant-client" -version = "1.16.1" +version = "1.17.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "grpcio" }, { name = "httpx", extra = ["http2"] }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "portalocker" }, { name = "protobuf" }, { name = "pydantic" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d9/68/fec3816a223c0b73b0e0036460be45c61ce2770ffb9197ac371e4f615ddc/qdrant_client-1.16.1.tar.gz", hash = "sha256:676c7c10fd4d4cb2981b8fcb32fd764f5f661b04b7334d024034d07212f971fd", size = 332130, upload-time = "2025-11-25T04:31:54.212Z" } +sdist = { url = "https://files.pythonhosted.org/packages/20/fb/c9c4cecf6e7fdff2dbaeee0de40e93fe495379eb5fe2775b184ea45315da/qdrant_client-1.17.0.tar.gz", hash = "sha256:47eb033edb9be33a4babb4d87b0d8d5eaf03d52112dca0218db7f2030bf41ba9", size = 344839, upload-time = "2026-02-19T16:03:17.069Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/60/e2/60a20d04b0595c641516463168909c5bbcc192d3d6eacb637c1677109c6a/qdrant_client-1.16.1-py3-none-any.whl", hash = "sha256:1eefe89f66e8a468ba0de1680e28b441e69825cfb62e8fb2e457c15e24ce5e3b", size = 378481, upload-time = "2025-11-25T04:31:52.629Z" }, + { url = "https://files.pythonhosted.org/packages/c1/15/dfadbc9d8c9872e8ac45fa96f5099bb2855f23426bfea1bbcdc85e64ef6e/qdrant_client-1.17.0-py3-none-any.whl", hash = "sha256:f5b452c68c42b3580d3d266446fb00d3c6e3aae89c916e16585b3c704e108438", size = 390381, upload-time = "2026-02-19T16:03:15.486Z" }, ] [[package]] @@ -5728,123 +5840,123 @@ wheels = [ [[package]] name = "regex" -version = "2026.2.19" +version = "2026.2.28" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ff/c0/d8079d4f6342e4cec5c3e7d7415b5cd3e633d5f4124f7a4626908dbe84c7/regex-2026.2.19.tar.gz", hash = "sha256:6fb8cb09b10e38f3ae17cc6dc04a1df77762bd0351b6ba9041438e7cc85ec310", size = 414973, upload-time = "2026-02-19T19:03:47.899Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/71/41455aa99a5a5ac1eaf311f5d8efd9ce6433c03ac1e0962de163350d0d97/regex-2026.2.28.tar.gz", hash = "sha256:a729e47d418ea11d03469f321aaf67cdee8954cde3ff2cf8403ab87951ad10f2", size = 415184, upload-time = "2026-02-28T02:19:42.792Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/af/de/f10b4506acfd684de4e42b0aa56ccea1a778a18864da8f6d319a40591062/regex-2026.2.19-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f5a37a17d110f9d5357a43aa7e3507cb077bf3143d1c549a45c4649e90e40a70", size = 488369, upload-time = "2026-02-19T18:59:45.01Z" }, - { url = "https://files.pythonhosted.org/packages/8b/2f/b4eaef1f0b4d0bf2a73eaf07c08f6c13422918a4180c9211ce0521746d0c/regex-2026.2.19-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:676c4e6847a83a1d5732b4ed553881ad36f0a8133627bb695a89ecf3571499d3", size = 290743, upload-time = "2026-02-19T18:59:48.527Z" }, - { url = "https://files.pythonhosted.org/packages/76/7c/805413bd0a88d04688c0725c222cfb811bd54a2f571004c24199a1ae55d6/regex-2026.2.19-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82336faeecac33297cd42857c3b36f12b91810e3fdd276befdd128f73a2b43fa", size = 288652, upload-time = "2026-02-19T18:59:50.2Z" }, - { url = "https://files.pythonhosted.org/packages/08/ff/2c4cd530a878b1975398e76faef4285f11e7c9ccf1aaedfd528bfcc1f580/regex-2026.2.19-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:52136f5b71f095cb74b736cc3a1b578030dada2e361ef2f07ca582240b703946", size = 781759, upload-time = "2026-02-19T18:59:51.836Z" }, - { url = "https://files.pythonhosted.org/packages/37/45/9608ab1b41f6740ff4076eabadde8e8b3f3400942b348ac41e8599ccc131/regex-2026.2.19-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4192464fe3e6cb0ef6751f7d3b16f886d8270d359ed1590dd555539d364f0ff7", size = 850947, upload-time = "2026-02-19T18:59:53.739Z" }, - { url = "https://files.pythonhosted.org/packages/90/3a/66471b6c4f7cac17e14bf5300e46661bba2b17ffb0871bd2759e837a6f82/regex-2026.2.19-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e561dd47a85d2660d3d3af4e6cb2da825cf20f121e577147963f875b83d32786", size = 898794, upload-time = "2026-02-19T18:59:55.993Z" }, - { url = "https://files.pythonhosted.org/packages/c2/d2/38c53929a5931f7398e5e49f5a5a3079cb2aba30119b4350608364cfad8c/regex-2026.2.19-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00ec994d7824bf01cd6c7d14c7a6a04d9aeaf7c42a2bc22d2359d715634d539b", size = 791922, upload-time = "2026-02-19T18:59:58.216Z" }, - { url = "https://files.pythonhosted.org/packages/8b/bd/b046e065630fa25059d9c195b7b5308ea94da45eee65d40879772500f74c/regex-2026.2.19-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2cb00aabd96b345d56a8c2bc328c8d6c4d29935061e05078bf1f02302e12abf5", size = 783345, upload-time = "2026-02-19T18:59:59.948Z" }, - { url = "https://files.pythonhosted.org/packages/d4/8f/045c643d2fa255a985e8f87d848e4be230b711a8935e4bdc58e60b8f7b84/regex-2026.2.19-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f374366ed35673ea81b86a8859c457d4fae6ba092b71024857e9e237410c7404", size = 768055, upload-time = "2026-02-19T19:00:01.65Z" }, - { url = "https://files.pythonhosted.org/packages/72/9f/ab7ae9f5447559562f1a788bbc85c0e526528c5e6c20542d18e4afc86aad/regex-2026.2.19-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f9417fd853fcd00b7d55167e692966dd12d95ba1a88bf08a62002ccd85030790", size = 774955, upload-time = "2026-02-19T19:00:03.368Z" }, - { url = "https://files.pythonhosted.org/packages/37/5c/f16fc23c56f60b6f4ff194604a6e53bb8aec7b6e8e4a23a482dee8d77235/regex-2026.2.19-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:12e86a01594031abf892686fcb309b041bf3de3d13d99eb7e2b02a8f3c687df1", size = 846010, upload-time = "2026-02-19T19:00:05.079Z" }, - { url = "https://files.pythonhosted.org/packages/51/c8/6be4c854135d7c9f35d4deeafdaf124b039ecb4ffcaeb7ed0495ad2c97ca/regex-2026.2.19-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:79014115e6fdf18fd9b32e291d58181bf42d4298642beaa13fd73e69810e4cb6", size = 755938, upload-time = "2026-02-19T19:00:07.148Z" }, - { url = "https://files.pythonhosted.org/packages/d6/8d/f683d49b9663a5324b95a328e69d397f6dade7cb84154eec116bf79fe150/regex-2026.2.19-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:31aefac2506967b7dd69af2c58eca3cc8b086d4110b66d6ac6e9026f0ee5b697", size = 835773, upload-time = "2026-02-19T19:00:08.939Z" }, - { url = "https://files.pythonhosted.org/packages/16/cd/619224b90da09f167fe4497c350a0d0b30edc539ee9244bf93e604c073c3/regex-2026.2.19-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:49cef7bb2a491f91a8869c7cdd90babf0a417047ab0bf923cd038ed2eab2ccb8", size = 780075, upload-time = "2026-02-19T19:00:10.838Z" }, - { url = "https://files.pythonhosted.org/packages/5b/88/19cfb0c262d6f9d722edef29157125418bf90eb3508186bf79335afeedae/regex-2026.2.19-cp310-cp310-win32.whl", hash = "sha256:3a039474986e7a314ace6efb9ce52f5da2bdb80ac4955358723d350ec85c32ad", size = 266004, upload-time = "2026-02-19T19:00:12.371Z" }, - { url = "https://files.pythonhosted.org/packages/82/af/5b487e0287ef72545d7ae92edecdacbe3d44e531cac24fda7de5598ba8dd/regex-2026.2.19-cp310-cp310-win_amd64.whl", hash = "sha256:5b81ff4f9cad99f90c807a00c5882fbcda86d8b3edd94e709fb531fc52cb3d25", size = 277895, upload-time = "2026-02-19T19:00:13.75Z" }, - { url = "https://files.pythonhosted.org/packages/4c/19/b6715a187ffca4d2979af92a46ce922445ba41f910bf187ccd666a2d52ef/regex-2026.2.19-cp310-cp310-win_arm64.whl", hash = "sha256:a032bc01a4bc73fc3cadba793fce28eb420da39338f47910c59ffcc11a5ba5ef", size = 270465, upload-time = "2026-02-19T19:00:15.127Z" }, - { url = "https://files.pythonhosted.org/packages/6f/93/43f405a98f54cc59c786efb4fc0b644615ed2392fc89d57d30da11f35b5b/regex-2026.2.19-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:93b16a18cadb938f0f2306267161d57eb33081a861cee9ffcd71e60941eb5dfc", size = 488365, upload-time = "2026-02-19T19:00:17.857Z" }, - { url = "https://files.pythonhosted.org/packages/66/46/da0efce22cd8f5ae28eeb25ac69703f49edcad3331ac22440776f4ea0867/regex-2026.2.19-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:78af1e499cab704131f6f4e2f155b7f54ce396ca2acb6ef21a49507e4752e0be", size = 290737, upload-time = "2026-02-19T19:00:19.869Z" }, - { url = "https://files.pythonhosted.org/packages/fb/19/f735078448132c1c974974d30d5306337bc297fe6b6f126164bff72c1019/regex-2026.2.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:eb20c11aa4c3793c9ad04c19a972078cdadb261b8429380364be28e867a843f2", size = 288654, upload-time = "2026-02-19T19:00:21.307Z" }, - { url = "https://files.pythonhosted.org/packages/e2/3e/6d7c24a2f423c03ad03e3fbddefa431057186ac1c4cb4fa98b03c7f39808/regex-2026.2.19-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:db5fd91eec71e7b08de10011a2223d0faa20448d4e1380b9daa179fa7bf58906", size = 793785, upload-time = "2026-02-19T19:00:22.926Z" }, - { url = "https://files.pythonhosted.org/packages/67/32/fdb8107504b3122a79bde6705ac1f9d495ed1fe35b87d7cfc1864471999a/regex-2026.2.19-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fdbade8acba71bb45057c2b72f477f0b527c4895f9c83e6cfc30d4a006c21726", size = 860731, upload-time = "2026-02-19T19:00:25.196Z" }, - { url = "https://files.pythonhosted.org/packages/9a/fd/cc8c6f05868defd840be6e75919b1c3f462357969ac2c2a0958363b4dc23/regex-2026.2.19-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:31a5f561eb111d6aae14202e7043fb0b406d3c8dddbbb9e60851725c9b38ab1d", size = 907350, upload-time = "2026-02-19T19:00:27.093Z" }, - { url = "https://files.pythonhosted.org/packages/b5/1b/4590db9caa8db3d5a3fe31197c4e42c15aab3643b549ef6a454525fa3a61/regex-2026.2.19-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4584a3ee5f257b71e4b693cc9be3a5104249399f4116fe518c3f79b0c6fc7083", size = 800628, upload-time = "2026-02-19T19:00:29.392Z" }, - { url = "https://files.pythonhosted.org/packages/76/05/513eaa5b96fa579fd0b813e19ec047baaaf573d7374ff010fa139b384bf7/regex-2026.2.19-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:196553ba2a2f47904e5dc272d948a746352e2644005627467e055be19d73b39e", size = 773711, upload-time = "2026-02-19T19:00:30.996Z" }, - { url = "https://files.pythonhosted.org/packages/95/65/5aed06d8c54563d37fea496cf888be504879a3981a7c8e12c24b2c92c209/regex-2026.2.19-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0c10869d18abb759a3317c757746cc913d6324ce128b8bcec99350df10419f18", size = 783186, upload-time = "2026-02-19T19:00:34.598Z" }, - { url = "https://files.pythonhosted.org/packages/2c/57/79a633ad90f2371b4ef9cd72ba3a69a1a67d0cfaab4fe6fa8586d46044ef/regex-2026.2.19-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e689fed279cbe797a6b570bd18ff535b284d057202692c73420cb93cca41aa32", size = 854854, upload-time = "2026-02-19T19:00:37.306Z" }, - { url = "https://files.pythonhosted.org/packages/eb/2d/0f113d477d9e91ec4545ec36c82e58be25038d06788229c91ad52da2b7f5/regex-2026.2.19-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0782bd983f19ac7594039c9277cd6f75c89598c1d72f417e4d30d874105eb0c7", size = 762279, upload-time = "2026-02-19T19:00:39.793Z" }, - { url = "https://files.pythonhosted.org/packages/39/cb/237e9fa4f61469fd4f037164dbe8e675a376c88cf73aaaa0aedfd305601c/regex-2026.2.19-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:dbb240c81cfed5d4a67cb86d7676d9f7ec9c3f186310bec37d8a1415210e111e", size = 846172, upload-time = "2026-02-19T19:00:42.134Z" }, - { url = "https://files.pythonhosted.org/packages/ac/7c/104779c5915cc4eb557a33590f8a3f68089269c64287dd769afd76c7ce61/regex-2026.2.19-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80d31c3f1fe7e4c6cd1831cd4478a0609903044dfcdc4660abfe6fb307add7f0", size = 789078, upload-time = "2026-02-19T19:00:43.908Z" }, - { url = "https://files.pythonhosted.org/packages/a8/4a/eae4e88b1317fb2ff57794915e0099198f51e760f6280b320adfa0ad396d/regex-2026.2.19-cp311-cp311-win32.whl", hash = "sha256:66e6a43225ff1064f8926adbafe0922b370d381c3330edaf9891cade52daa790", size = 266013, upload-time = "2026-02-19T19:00:47.274Z" }, - { url = "https://files.pythonhosted.org/packages/f9/29/ba89eb8fae79705e07ad1bd69e568f776159d2a8093c9dbc5303ee618298/regex-2026.2.19-cp311-cp311-win_amd64.whl", hash = "sha256:59a7a5216485a1896c5800e9feb8ff9213e11967b482633b6195d7da11450013", size = 277906, upload-time = "2026-02-19T19:00:49.011Z" }, - { url = "https://files.pythonhosted.org/packages/e3/1a/042d8f04b28e318df92df69d8becb0f42221eb3dd4fe5e976522f4337c76/regex-2026.2.19-cp311-cp311-win_arm64.whl", hash = "sha256:ec661807ffc14c8d14bb0b8c1bb3d5906e476bc96f98b565b709d03962ee4dd4", size = 270463, upload-time = "2026-02-19T19:00:50.988Z" }, - { url = "https://files.pythonhosted.org/packages/b3/73/13b39c7c9356f333e564ab4790b6cb0df125b8e64e8d6474e73da49b1955/regex-2026.2.19-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c1665138776e4ac1aa75146669236f7a8a696433ec4e525abf092ca9189247cc", size = 489541, upload-time = "2026-02-19T19:00:52.728Z" }, - { url = "https://files.pythonhosted.org/packages/15/77/fcc7bd9a67000d07fbcc11ed226077287a40d5c84544e62171d29d3ef59c/regex-2026.2.19-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d792b84709021945597e05656aac059526df4e0c9ef60a0eaebb306f8fafcaa8", size = 291414, upload-time = "2026-02-19T19:00:54.51Z" }, - { url = "https://files.pythonhosted.org/packages/f9/87/3997fc72dc59233426ef2e18dfdd105bb123812fff740ee9cc348f1a3243/regex-2026.2.19-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db970bcce4d63b37b3f9eb8c893f0db980bbf1d404a1d8d2b17aa8189de92c53", size = 289140, upload-time = "2026-02-19T19:00:56.841Z" }, - { url = "https://files.pythonhosted.org/packages/f3/d0/b7dd3883ed1cff8ee0c0c9462d828aaf12be63bf5dc55453cbf423523b13/regex-2026.2.19-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:03d706fbe7dfec503c8c3cb76f9352b3e3b53b623672aa49f18a251a6c71b8e6", size = 798767, upload-time = "2026-02-19T19:00:59.014Z" }, - { url = "https://files.pythonhosted.org/packages/4a/7e/8e2d09103832891b2b735a2515abf377db21144c6dd5ede1fb03c619bf09/regex-2026.2.19-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8dbff048c042beef60aa1848961384572c5afb9e8b290b0f1203a5c42cf5af65", size = 864436, upload-time = "2026-02-19T19:01:00.772Z" }, - { url = "https://files.pythonhosted.org/packages/8a/2e/afea8d23a6db1f67f45e3a0da3057104ce32e154f57dd0c8997274d45fcd/regex-2026.2.19-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccaaf9b907ea6b4223d5cbf5fa5dff5f33dc66f4907a25b967b8a81339a6e332", size = 912391, upload-time = "2026-02-19T19:01:02.865Z" }, - { url = "https://files.pythonhosted.org/packages/59/3c/ea5a4687adaba5e125b9bd6190153d0037325a0ba3757cc1537cc2c8dd90/regex-2026.2.19-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:75472631eee7898e16a8a20998d15106cb31cfde21cdf96ab40b432a7082af06", size = 803702, upload-time = "2026-02-19T19:01:05.298Z" }, - { url = "https://files.pythonhosted.org/packages/dc/c5/624a0705e8473a26488ec1a3a4e0b8763ecfc682a185c302dfec71daea35/regex-2026.2.19-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d89f85a5ccc0cec125c24be75610d433d65295827ebaf0d884cbe56df82d4774", size = 775980, upload-time = "2026-02-19T19:01:07.047Z" }, - { url = "https://files.pythonhosted.org/packages/4d/4b/ed776642533232b5599b7c1f9d817fe11faf597e8a92b7a44b841daaae76/regex-2026.2.19-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0d9f81806abdca3234c3dd582b8a97492e93de3602c8772013cb4affa12d1668", size = 788122, upload-time = "2026-02-19T19:01:08.744Z" }, - { url = "https://files.pythonhosted.org/packages/8c/58/e93e093921d13b9784b4f69896b6e2a9e09580a265c59d9eb95e87d288f2/regex-2026.2.19-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9dadc10d1c2bbb1326e572a226d2ec56474ab8aab26fdb8cf19419b372c349a9", size = 858910, upload-time = "2026-02-19T19:01:10.488Z" }, - { url = "https://files.pythonhosted.org/packages/85/77/ff1d25a0c56cd546e0455cbc93235beb33474899690e6a361fa6b52d265b/regex-2026.2.19-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:6bc25d7e15f80c9dc7853cbb490b91c1ec7310808b09d56bd278fe03d776f4f6", size = 764153, upload-time = "2026-02-19T19:01:12.156Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ef/8ec58df26d52d04443b1dc56f9be4b409f43ed5ae6c0248a287f52311fc4/regex-2026.2.19-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:965d59792f5037d9138da6fed50ba943162160443b43d4895b182551805aff9c", size = 850348, upload-time = "2026-02-19T19:01:14.147Z" }, - { url = "https://files.pythonhosted.org/packages/f5/b3/c42fd5ed91639ce5a4225b9df909180fc95586db071f2bf7c68d2ccbfbe6/regex-2026.2.19-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:38d88c6ed4a09ed61403dbdf515d969ccba34669af3961ceb7311ecd0cef504a", size = 789977, upload-time = "2026-02-19T19:01:15.838Z" }, - { url = "https://files.pythonhosted.org/packages/b6/22/bc3b58ebddbfd6ca5633e71fd41829ee931963aad1ebeec55aad0c23044e/regex-2026.2.19-cp312-cp312-win32.whl", hash = "sha256:5df947cabab4b643d4791af5e28aecf6bf62e6160e525651a12eba3d03755e6b", size = 266381, upload-time = "2026-02-19T19:01:17.952Z" }, - { url = "https://files.pythonhosted.org/packages/fc/4a/6ff550b63e67603ee60e69dc6bd2d5694e85046a558f663b2434bdaeb285/regex-2026.2.19-cp312-cp312-win_amd64.whl", hash = "sha256:4146dc576ea99634ae9c15587d0c43273b4023a10702998edf0fa68ccb60237a", size = 277274, upload-time = "2026-02-19T19:01:19.826Z" }, - { url = "https://files.pythonhosted.org/packages/cc/29/9ec48b679b1e87e7bc8517dff45351eab38f74fbbda1fbcf0e9e6d4e8174/regex-2026.2.19-cp312-cp312-win_arm64.whl", hash = "sha256:cdc0a80f679353bd68450d2a42996090c30b2e15ca90ded6156c31f1a3b63f3b", size = 270509, upload-time = "2026-02-19T19:01:22.075Z" }, - { url = "https://files.pythonhosted.org/packages/d2/2d/a849835e76ac88fcf9e8784e642d3ea635d183c4112150ca91499d6703af/regex-2026.2.19-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8df08decd339e8b3f6a2eb5c05c687fe9d963ae91f352bc57beb05f5b2ac6879", size = 489329, upload-time = "2026-02-19T19:01:23.841Z" }, - { url = "https://files.pythonhosted.org/packages/da/aa/78ff4666d3855490bae87845a5983485e765e1f970da20adffa2937b241d/regex-2026.2.19-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3aa0944f1dc6e92f91f3b306ba7f851e1009398c84bfd370633182ee4fc26a64", size = 291308, upload-time = "2026-02-19T19:01:25.605Z" }, - { url = "https://files.pythonhosted.org/packages/cd/58/714384efcc07ae6beba528a541f6e99188c5cc1bc0295337f4e8a868296d/regex-2026.2.19-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c13228fbecb03eadbfd8f521732c5fda09ef761af02e920a3148e18ad0e09968", size = 289033, upload-time = "2026-02-19T19:01:27.243Z" }, - { url = "https://files.pythonhosted.org/packages/75/ec/6438a9344d2869cf5265236a06af1ca6d885e5848b6561e10629bc8e5a11/regex-2026.2.19-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0d0e72703c60d68b18b27cde7cdb65ed2570ae29fb37231aa3076bfb6b1d1c13", size = 798798, upload-time = "2026-02-19T19:01:28.877Z" }, - { url = "https://files.pythonhosted.org/packages/c2/be/b1ce2d395e3fd2ce5f2fde2522f76cade4297cfe84cd61990ff48308749c/regex-2026.2.19-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:46e69a4bf552e30e74a8aa73f473c87efcb7f6e8c8ece60d9fd7bf13d5c86f02", size = 864444, upload-time = "2026-02-19T19:01:30.933Z" }, - { url = "https://files.pythonhosted.org/packages/d5/97/a3406460c504f7136f140d9461960c25f058b0240e4424d6fb73c7a067ab/regex-2026.2.19-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8edda06079bd770f7f0cf7f3bba1a0b447b96b4a543c91fe0c142d034c166161", size = 912633, upload-time = "2026-02-19T19:01:32.744Z" }, - { url = "https://files.pythonhosted.org/packages/8b/d9/e5dbef95008d84e9af1dc0faabbc34a7fbc8daa05bc5807c5cf86c2bec49/regex-2026.2.19-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9cbc69eae834afbf634f7c902fc72ff3e993f1c699156dd1af1adab5d06b7fe7", size = 803718, upload-time = "2026-02-19T19:01:34.61Z" }, - { url = "https://files.pythonhosted.org/packages/2f/e5/61d80132690a1ef8dc48e0f44248036877aebf94235d43f63a20d1598888/regex-2026.2.19-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bcf57d30659996ee5c7937999874504c11b5a068edc9515e6a59221cc2744dd1", size = 775975, upload-time = "2026-02-19T19:01:36.525Z" }, - { url = "https://files.pythonhosted.org/packages/05/32/ae828b3b312c972cf228b634447de27237d593d61505e6ad84723f8eabba/regex-2026.2.19-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8e6e77cd92216eb489e21e5652a11b186afe9bdefca8a2db739fd6b205a9e0a4", size = 788129, upload-time = "2026-02-19T19:01:38.498Z" }, - { url = "https://files.pythonhosted.org/packages/cb/25/d74f34676f22bec401eddf0e5e457296941e10cbb2a49a571ca7a2c16e5a/regex-2026.2.19-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b9ab8dec42afefa6314ea9b31b188259ffdd93f433d77cad454cd0b8d235ce1c", size = 858818, upload-time = "2026-02-19T19:01:40.409Z" }, - { url = "https://files.pythonhosted.org/packages/1e/eb/0bc2b01a6b0b264e1406e5ef11cae3f634c3bd1a6e61206fd3227ce8e89c/regex-2026.2.19-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:294c0fb2e87c6bcc5f577c8f609210f5700b993151913352ed6c6af42f30f95f", size = 764186, upload-time = "2026-02-19T19:01:43.009Z" }, - { url = "https://files.pythonhosted.org/packages/eb/37/5fe5a630d0d99ecf0c3570f8905dafbc160443a2d80181607770086c9812/regex-2026.2.19-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c0924c64b082d4512b923ac016d6e1dcf647a3560b8a4c7e55cbbd13656cb4ed", size = 850363, upload-time = "2026-02-19T19:01:45.015Z" }, - { url = "https://files.pythonhosted.org/packages/c3/45/ef68d805294b01ec030cfd388724ba76a5a21a67f32af05b17924520cb0b/regex-2026.2.19-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:790dbf87b0361606cb0d79b393c3e8f4436a14ee56568a7463014565d97da02a", size = 790026, upload-time = "2026-02-19T19:01:47.51Z" }, - { url = "https://files.pythonhosted.org/packages/d6/3a/40d3b66923dfc5aeba182f194f0ca35d09afe8c031a193e6ae46971a0a0e/regex-2026.2.19-cp313-cp313-win32.whl", hash = "sha256:43cdde87006271be6963896ed816733b10967baaf0e271d529c82e93da66675b", size = 266372, upload-time = "2026-02-19T19:01:49.469Z" }, - { url = "https://files.pythonhosted.org/packages/3d/f2/39082e8739bfd553497689e74f9d5e5bb531d6f8936d0b94f43e18f219c0/regex-2026.2.19-cp313-cp313-win_amd64.whl", hash = "sha256:127ea69273485348a126ebbf3d6052604d3c7da284f797bba781f364c0947d47", size = 277253, upload-time = "2026-02-19T19:01:51.208Z" }, - { url = "https://files.pythonhosted.org/packages/c2/c2/852b9600d53fb47e47080c203e2cdc0ac7e84e37032a57e0eaa37446033a/regex-2026.2.19-cp313-cp313-win_arm64.whl", hash = "sha256:5e56c669535ac59cbf96ca1ece0ef26cb66809990cda4fa45e1e32c3b146599e", size = 270505, upload-time = "2026-02-19T19:01:52.865Z" }, - { url = "https://files.pythonhosted.org/packages/a9/a2/e0b4575b93bc84db3b1fab24183e008691cd2db5c0ef14ed52681fbd94dd/regex-2026.2.19-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:93d881cab5afdc41a005dba1524a40947d6f7a525057aa64aaf16065cf62faa9", size = 492202, upload-time = "2026-02-19T19:01:54.816Z" }, - { url = "https://files.pythonhosted.org/packages/24/b5/b84fec8cbb5f92a7eed2b6b5353a6a9eed9670fee31817c2da9eb85dc797/regex-2026.2.19-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:80caaa1ddcc942ec7be18427354f9d58a79cee82dea2a6b3d4fd83302e1240d7", size = 292884, upload-time = "2026-02-19T19:01:58.254Z" }, - { url = "https://files.pythonhosted.org/packages/70/0c/fe89966dfae43da46f475362401f03e4d7dc3a3c955b54f632abc52669e0/regex-2026.2.19-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d793c5b4d2b4c668524cd1651404cfc798d40694c759aec997e196fe9729ec60", size = 291236, upload-time = "2026-02-19T19:01:59.966Z" }, - { url = "https://files.pythonhosted.org/packages/f2/f7/bda2695134f3e63eb5cccbbf608c2a12aab93d261ff4e2fe49b47fabc948/regex-2026.2.19-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5100acb20648d9efd3f4e7e91f51187f95f22a741dcd719548a6cf4e1b34b3f", size = 807660, upload-time = "2026-02-19T19:02:01.632Z" }, - { url = "https://files.pythonhosted.org/packages/11/56/6e3a4bf5e60d17326b7003d91bbde8938e439256dec211d835597a44972d/regex-2026.2.19-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5e3a31e94d10e52a896adaa3adf3621bd526ad2b45b8c2d23d1bbe74c7423007", size = 873585, upload-time = "2026-02-19T19:02:03.522Z" }, - { url = "https://files.pythonhosted.org/packages/35/5e/c90c6aa4d1317cc11839359479cfdd2662608f339e84e81ba751c8a4e461/regex-2026.2.19-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8497421099b981f67c99eba4154cf0dfd8e47159431427a11cfb6487f7791d9e", size = 915243, upload-time = "2026-02-19T19:02:05.608Z" }, - { url = "https://files.pythonhosted.org/packages/90/7c/981ea0694116793001496aaf9524e5c99e122ec3952d9e7f1878af3a6bf1/regex-2026.2.19-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e7a08622f7d51d7a068f7e4052a38739c412a3e74f55817073d2e2418149619", size = 812922, upload-time = "2026-02-19T19:02:08.115Z" }, - { url = "https://files.pythonhosted.org/packages/2d/be/9eda82afa425370ffdb3fa9f3ea42450b9ae4da3ff0a4ec20466f69e371b/regex-2026.2.19-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8abe671cf0f15c26b1ad389bf4043b068ce7d3b1c5d9313e12895f57d6738555", size = 781318, upload-time = "2026-02-19T19:02:10.072Z" }, - { url = "https://files.pythonhosted.org/packages/c6/d5/50f0bbe56a8199f60a7b6c714e06e54b76b33d31806a69d0703b23ce2a9e/regex-2026.2.19-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5a8f28dd32a4ce9c41758d43b5b9115c1c497b4b1f50c457602c1d571fa98ce1", size = 795649, upload-time = "2026-02-19T19:02:11.96Z" }, - { url = "https://files.pythonhosted.org/packages/c5/09/d039f081e44a8b0134d0bb2dd805b0ddf390b69d0b58297ae098847c572f/regex-2026.2.19-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:654dc41a5ba9b8cc8432b3f1aa8906d8b45f3e9502442a07c2f27f6c63f85db5", size = 868844, upload-time = "2026-02-19T19:02:14.043Z" }, - { url = "https://files.pythonhosted.org/packages/ef/53/e2903b79a19ec8557fe7cd21cd093956ff2dbc2e0e33969e3adbe5b184dd/regex-2026.2.19-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:4a02faea614e7fdd6ba8b3bec6c8e79529d356b100381cec76e638f45d12ca04", size = 770113, upload-time = "2026-02-19T19:02:16.161Z" }, - { url = "https://files.pythonhosted.org/packages/8f/e2/784667767b55714ebb4e59bf106362327476b882c0b2f93c25e84cc99b1a/regex-2026.2.19-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d96162140bb819814428800934c7b71b7bffe81fb6da2d6abc1dcca31741eca3", size = 854922, upload-time = "2026-02-19T19:02:18.155Z" }, - { url = "https://files.pythonhosted.org/packages/59/78/9ef4356bd4aed752775bd18071034979b85f035fec51f3a4f9dea497a254/regex-2026.2.19-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c227f2922153ee42bbeb355fd6d009f8c81d9d7bdd666e2276ce41f53ed9a743", size = 799636, upload-time = "2026-02-19T19:02:20.04Z" }, - { url = "https://files.pythonhosted.org/packages/cf/54/fcfc9287f20c5c9bd8db755aafe3e8cf4d99a6a3f1c7162ee182e0ca9374/regex-2026.2.19-cp313-cp313t-win32.whl", hash = "sha256:a178df8ec03011153fbcd2c70cb961bc98cbbd9694b28f706c318bee8927c3db", size = 268968, upload-time = "2026-02-19T19:02:22.816Z" }, - { url = "https://files.pythonhosted.org/packages/1e/a0/ff24c6cb1273e42472706d277147fc38e1f9074a280fb6034b0fc9b69415/regex-2026.2.19-cp313-cp313t-win_amd64.whl", hash = "sha256:2c1693ca6f444d554aa246b592355b5cec030ace5a2729eae1b04ab6e853e768", size = 280390, upload-time = "2026-02-19T19:02:25.231Z" }, - { url = "https://files.pythonhosted.org/packages/1a/b6/a3f6ad89d780ffdeebb4d5e2e3e30bd2ef1f70f6a94d1760e03dd1e12c60/regex-2026.2.19-cp313-cp313t-win_arm64.whl", hash = "sha256:c0761d7ae8d65773e01515ebb0b304df1bf37a0a79546caad9cbe79a42c12af7", size = 271643, upload-time = "2026-02-19T19:02:27.175Z" }, - { url = "https://files.pythonhosted.org/packages/2d/e2/7ad4e76a6dddefc0d64dbe12a4d3ca3947a19ddc501f864a5df2a8222ddd/regex-2026.2.19-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:03d191a9bcf94d31af56d2575210cb0d0c6a054dbcad2ea9e00aa4c42903b919", size = 489306, upload-time = "2026-02-19T19:02:29.058Z" }, - { url = "https://files.pythonhosted.org/packages/14/95/ee1736135733afbcf1846c58671046f99c4d5170102a150ebb3dd8d701d9/regex-2026.2.19-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:516ee067c6c721d0d0bfb80a2004edbd060fffd07e456d4e1669e38fe82f922e", size = 291218, upload-time = "2026-02-19T19:02:31.083Z" }, - { url = "https://files.pythonhosted.org/packages/ef/08/180d1826c3d7065200a5168c6b993a44947395c7bb6e04b2c2a219c34225/regex-2026.2.19-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:997862c619994c4a356cb7c3592502cbd50c2ab98da5f61c5c871f10f22de7e5", size = 289097, upload-time = "2026-02-19T19:02:33.485Z" }, - { url = "https://files.pythonhosted.org/packages/28/93/0651924c390c5740f5f896723f8ddd946a6c63083a7d8647231c343912ff/regex-2026.2.19-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02b9e1b8a7ebe2807cd7bbdf662510c8e43053a23262b9f46ad4fc2dfc9d204e", size = 799147, upload-time = "2026-02-19T19:02:35.669Z" }, - { url = "https://files.pythonhosted.org/packages/a7/00/2078bd8bcd37d58a756989adbfd9f1d0151b7ca4085a9c2a07e917fbac61/regex-2026.2.19-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6c8fb3b19652e425ff24169dad3ee07f99afa7996caa9dfbb3a9106cd726f49a", size = 865239, upload-time = "2026-02-19T19:02:38.012Z" }, - { url = "https://files.pythonhosted.org/packages/2a/13/75195161ec16936b35a365fa8c1dd2ab29fd910dd2587765062b174d8cfc/regex-2026.2.19-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50f1ee9488dd7a9fda850ec7c68cad7a32fa49fd19733f5403a3f92b451dcf73", size = 911904, upload-time = "2026-02-19T19:02:40.737Z" }, - { url = "https://files.pythonhosted.org/packages/96/72/ac42f6012179343d1c4bd0ffee8c948d841cb32ea188d37e96d80527fcc9/regex-2026.2.19-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ab780092b1424d13200aa5a62996e95f65ee3db8509be366437439cdc0af1a9f", size = 803518, upload-time = "2026-02-19T19:02:42.923Z" }, - { url = "https://files.pythonhosted.org/packages/bc/d1/75a08e2269b007b9783f0f86aa64488e023141219cb5f14dc1e69cda56c6/regex-2026.2.19-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:17648e1a88e72d88641b12635e70e6c71c5136ba14edba29bf8fc6834005a265", size = 775866, upload-time = "2026-02-19T19:02:45.189Z" }, - { url = "https://files.pythonhosted.org/packages/92/41/70e7d05faf6994c2ca7a9fcaa536da8f8e4031d45b0ec04b57040ede201f/regex-2026.2.19-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f914ae8c804c8a8a562fe216100bc156bfb51338c1f8d55fe32cf407774359a", size = 788224, upload-time = "2026-02-19T19:02:47.804Z" }, - { url = "https://files.pythonhosted.org/packages/c8/83/34a2dd601f9deb13c20545c674a55f4a05c90869ab73d985b74d639bac43/regex-2026.2.19-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c7e121a918bbee3f12ac300ce0a0d2f2c979cf208fb071ed8df5a6323281915c", size = 859682, upload-time = "2026-02-19T19:02:50.583Z" }, - { url = "https://files.pythonhosted.org/packages/8e/30/136db9a09a7f222d6e48b806f3730e7af6499a8cad9c72ac0d49d52c746e/regex-2026.2.19-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2fedd459c791da24914ecc474feecd94cf7845efb262ac3134fe27cbd7eda799", size = 764223, upload-time = "2026-02-19T19:02:52.777Z" }, - { url = "https://files.pythonhosted.org/packages/9e/ea/bb947743c78a16df481fa0635c50aa1a439bb80b0e6dc24cd4e49c716679/regex-2026.2.19-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:ea8dfc99689240e61fb21b5fc2828f68b90abf7777d057b62d3166b7c1543c4c", size = 850101, upload-time = "2026-02-19T19:02:55.87Z" }, - { url = "https://files.pythonhosted.org/packages/25/27/e3bfe6e97a99f7393665926be02fef772da7f8aa59e50bc3134e4262a032/regex-2026.2.19-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fff45852160960f29e184ec8a5be5ab4063cfd0b168d439d1fc4ac3744bf29e", size = 789904, upload-time = "2026-02-19T19:02:58.523Z" }, - { url = "https://files.pythonhosted.org/packages/84/7b/7e2be6f00cea59d08761b027ad237002e90cac74b1607200ebaa2ba3d586/regex-2026.2.19-cp314-cp314-win32.whl", hash = "sha256:5390b130cce14a7d1db226a3896273b7b35be10af35e69f1cca843b6e5d2bb2d", size = 271784, upload-time = "2026-02-19T19:03:00.418Z" }, - { url = "https://files.pythonhosted.org/packages/f7/f6/639911530335773e7ec60bcaa519557b719586024c1d7eaad1daf87b646b/regex-2026.2.19-cp314-cp314-win_amd64.whl", hash = "sha256:e581f75d5c0b15669139ca1c2d3e23a65bb90e3c06ba9d9ea194c377c726a904", size = 280506, upload-time = "2026-02-19T19:03:02.302Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ec/2582b56b4e036d46bb9b5d74a18548439ffa16c11cf59076419174d80f48/regex-2026.2.19-cp314-cp314-win_arm64.whl", hash = "sha256:7187fdee1be0896c1499a991e9bf7c78e4b56b7863e7405d7bb687888ac10c4b", size = 273557, upload-time = "2026-02-19T19:03:04.836Z" }, - { url = "https://files.pythonhosted.org/packages/49/0b/f901cfeb4efd83e4f5c3e9f91a6de77e8e5ceb18555698aca3a27e215ed3/regex-2026.2.19-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:5ec1d7c080832fdd4e150c6f5621fe674c70c63b3ae5a4454cebd7796263b175", size = 492196, upload-time = "2026-02-19T19:03:08.188Z" }, - { url = "https://files.pythonhosted.org/packages/94/0a/349b959e3da874e15eda853755567b4cde7e5309dbb1e07bfe910cfde452/regex-2026.2.19-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8457c1bc10ee9b29cdfd897ccda41dce6bde0e9abd514bcfef7bcd05e254d411", size = 292878, upload-time = "2026-02-19T19:03:10.272Z" }, - { url = "https://files.pythonhosted.org/packages/98/b0/9d81b3c2c5ddff428f8c506713737278979a2c476f6e3675a9c51da0c389/regex-2026.2.19-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cce8027010d1ffa3eb89a0b19621cdc78ae548ea2b49fea1f7bfb3ea77064c2b", size = 291235, upload-time = "2026-02-19T19:03:12.5Z" }, - { url = "https://files.pythonhosted.org/packages/04/e7/be7818df8691dbe9508c381ea2cc4c1153e4fdb1c4b06388abeaa93bd712/regex-2026.2.19-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:11c138febb40546ff9e026dbbc41dc9fb8b29e61013fa5848ccfe045f5b23b83", size = 807893, upload-time = "2026-02-19T19:03:15.064Z" }, - { url = "https://files.pythonhosted.org/packages/0c/b6/b898a8b983190cfa0276031c17beb73cfd1db07c03c8c37f606d80b655e2/regex-2026.2.19-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:74ff212aa61532246bb3036b3dfea62233414b0154b8bc3676975da78383cac3", size = 873696, upload-time = "2026-02-19T19:03:17.848Z" }, - { url = "https://files.pythonhosted.org/packages/1a/98/126ba671d54f19080ec87cad228fb4f3cc387fff8c4a01cb4e93f4ff9d94/regex-2026.2.19-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d00c95a2b6bfeb3ea1cb68d1751b1dfce2b05adc2a72c488d77a780db06ab867", size = 915493, upload-time = "2026-02-19T19:03:20.343Z" }, - { url = "https://files.pythonhosted.org/packages/b2/10/550c84a1a1a7371867fe8be2bea7df55e797cbca4709974811410e195c5d/regex-2026.2.19-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:311fcccb76af31be4c588d5a17f8f1a059ae8f4b097192896ebffc95612f223a", size = 813094, upload-time = "2026-02-19T19:03:23.287Z" }, - { url = "https://files.pythonhosted.org/packages/29/fb/ba221d2fc76a27b6b7d7a60f73a7a6a7bac21c6ba95616a08be2bcb434b0/regex-2026.2.19-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:77cfd6b5e7c4e8bf7a39d243ea05882acf5e3c7002b0ef4756de6606893b0ecd", size = 781583, upload-time = "2026-02-19T19:03:26.872Z" }, - { url = "https://files.pythonhosted.org/packages/26/f1/af79231301297c9e962679efc04a31361b58dc62dec1fc0cb4b8dd95956a/regex-2026.2.19-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6380f29ff212ec922b6efb56100c089251940e0526a0d05aa7c2d9b571ddf2fe", size = 795875, upload-time = "2026-02-19T19:03:29.223Z" }, - { url = "https://files.pythonhosted.org/packages/a0/90/1e1d76cb0a2d0a4f38a039993e1c5cd971ae50435d751c5bae4f10e1c302/regex-2026.2.19-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:655f553a1fa3ab8a7fd570eca793408b8d26a80bfd89ed24d116baaf13a38969", size = 868916, upload-time = "2026-02-19T19:03:31.415Z" }, - { url = "https://files.pythonhosted.org/packages/9a/67/a1c01da76dbcfed690855a284c665cc0a370e7d02d1bd635cf9ff7dd74b8/regex-2026.2.19-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:015088b8558502f1f0bccd58754835aa154a7a5b0bd9d4c9b7b96ff4ae9ba876", size = 770386, upload-time = "2026-02-19T19:03:33.972Z" }, - { url = "https://files.pythonhosted.org/packages/49/6f/94842bf294f432ff3836bfd91032e2ecabea6d284227f12d1f935318c9c4/regex-2026.2.19-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:9e6693b8567a59459b5dda19104c4a4dbbd4a1c78833eacc758796f2cfef1854", size = 855007, upload-time = "2026-02-19T19:03:36.238Z" }, - { url = "https://files.pythonhosted.org/packages/ff/93/393cd203ca0d1d368f05ce12d2c7e91a324bc93c240db2e6d5ada05835f4/regex-2026.2.19-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4071209fd4376ab5ceec72ad3507e9d3517c59e38a889079b98916477a871868", size = 799863, upload-time = "2026-02-19T19:03:38.497Z" }, - { url = "https://files.pythonhosted.org/packages/43/d9/35afda99bd92bf1a5831e55a4936d37ea4bed6e34c176a3c2238317faf4f/regex-2026.2.19-cp314-cp314t-win32.whl", hash = "sha256:2905ff4a97fad42f2d0834d8b1ea3c2f856ec209837e458d71a061a7d05f9f01", size = 274742, upload-time = "2026-02-19T19:03:40.804Z" }, - { url = "https://files.pythonhosted.org/packages/ae/42/7edc3344dcc87b698e9755f7f685d463852d481302539dae07135202d3ca/regex-2026.2.19-cp314-cp314t-win_amd64.whl", hash = "sha256:64128549b600987e0f335c2365879895f860a9161f283b14207c800a6ed623d3", size = 284443, upload-time = "2026-02-19T19:03:42.954Z" }, - { url = "https://files.pythonhosted.org/packages/3a/45/affdf2d851b42adf3d13fc5b3b059372e9bd299371fd84cf5723c45871fa/regex-2026.2.19-cp314-cp314t-win_arm64.whl", hash = "sha256:a09ae430e94c049dc6957f6baa35ee3418a3a77f3c12b6e02883bd80a2b679b0", size = 274932, upload-time = "2026-02-19T19:03:45.488Z" }, + { url = "https://files.pythonhosted.org/packages/70/b8/845a927e078f5e5cc55d29f57becbfde0003d52806544531ab3f2da4503c/regex-2026.2.28-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fc48c500838be6882b32748f60a15229d2dea96e59ef341eaa96ec83538f498d", size = 488461, upload-time = "2026-02-28T02:15:48.405Z" }, + { url = "https://files.pythonhosted.org/packages/32/f9/8a0034716684e38a729210ded6222249f29978b24b684f448162ef21f204/regex-2026.2.28-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2afa673660928d0b63d84353c6c08a8a476ddfc4a47e11742949d182e6863ce8", size = 290774, upload-time = "2026-02-28T02:15:51.738Z" }, + { url = "https://files.pythonhosted.org/packages/a6/ba/b27feefffbb199528dd32667cd172ed484d9c197618c575f01217fbe6103/regex-2026.2.28-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7ab218076eb0944549e7fe74cf0e2b83a82edb27e81cc87411f76240865e04d5", size = 288737, upload-time = "2026-02-28T02:15:53.534Z" }, + { url = "https://files.pythonhosted.org/packages/18/c5/65379448ca3cbfe774fcc33774dc8295b1ee97dc3237ae3d3c7b27423c9d/regex-2026.2.28-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94d63db12e45a9b9f064bfe4800cefefc7e5f182052e4c1b774d46a40ab1d9bb", size = 782675, upload-time = "2026-02-28T02:15:55.488Z" }, + { url = "https://files.pythonhosted.org/packages/aa/30/6fa55bef48090f900fbd4649333791fc3e6467380b9e775e741beeb3231f/regex-2026.2.28-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:195237dc327858a7721bf8b0bbbef797554bc13563c3591e91cd0767bacbe359", size = 850514, upload-time = "2026-02-28T02:15:57.509Z" }, + { url = "https://files.pythonhosted.org/packages/a9/28/9ca180fb3787a54150209754ac06a42409913571fa94994f340b3bba4e1e/regex-2026.2.28-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b387a0d092dac157fb026d737dde35ff3e49ef27f285343e7c6401851239df27", size = 896612, upload-time = "2026-02-28T02:15:59.682Z" }, + { url = "https://files.pythonhosted.org/packages/46/b5/f30d7d3936d6deecc3ea7bea4f7d3c5ee5124e7c8de372226e436b330a55/regex-2026.2.28-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3935174fa4d9f70525a4367aaff3cb8bc0548129d114260c29d9dfa4a5b41692", size = 791691, upload-time = "2026-02-28T02:16:01.752Z" }, + { url = "https://files.pythonhosted.org/packages/f5/34/96631bcf446a56ba0b2a7f684358a76855dfe315b7c2f89b35388494ede0/regex-2026.2.28-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b2b23587b26496ff5fd40df4278becdf386813ec00dc3533fa43a4cf0e2ad3c", size = 783111, upload-time = "2026-02-28T02:16:03.651Z" }, + { url = "https://files.pythonhosted.org/packages/39/54/f95cb7a85fe284d41cd2f3625e0f2ae30172b55dfd2af1d9b4eaef6259d7/regex-2026.2.28-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3b24bd7e9d85dc7c6a8bd2aa14ecd234274a0248335a02adeb25448aecdd420d", size = 767512, upload-time = "2026-02-28T02:16:05.616Z" }, + { url = "https://files.pythonhosted.org/packages/3d/af/a650f64a79c02a97f73f64d4e7fc4cc1984e64affab14075e7c1f9a2db34/regex-2026.2.28-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bd477d5f79920338107f04aa645f094032d9e3030cc55be581df3d1ef61aa318", size = 773920, upload-time = "2026-02-28T02:16:08.325Z" }, + { url = "https://files.pythonhosted.org/packages/72/f8/3f9c2c2af37aedb3f5a1e7227f81bea065028785260d9cacc488e43e6997/regex-2026.2.28-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:b49eb78048c6354f49e91e4b77da21257fecb92256b6d599ae44403cab30b05b", size = 846681, upload-time = "2026-02-28T02:16:10.381Z" }, + { url = "https://files.pythonhosted.org/packages/54/12/8db04a334571359f4d127d8f89550917ec6561a2fddfd69cd91402b47482/regex-2026.2.28-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:a25c7701e4f7a70021db9aaf4a4a0a67033c6318752146e03d1b94d32006217e", size = 755565, upload-time = "2026-02-28T02:16:11.972Z" }, + { url = "https://files.pythonhosted.org/packages/da/bc/91c22f384d79324121b134c267a86ca90d11f8016aafb1dc5bee05890ee3/regex-2026.2.28-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:9dd450db6458387167e033cfa80887a34c99c81d26da1bf8b0b41bf8c9cac88e", size = 835789, upload-time = "2026-02-28T02:16:14.036Z" }, + { url = "https://files.pythonhosted.org/packages/46/a7/4cc94fd3af01dcfdf5a9ed75c8e15fd80fcd62cc46da7592b1749e9c35db/regex-2026.2.28-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2954379dd20752e82d22accf3ff465311cbb2bac6c1f92c4afd400e1757f7451", size = 780094, upload-time = "2026-02-28T02:16:15.468Z" }, + { url = "https://files.pythonhosted.org/packages/3c/21/e5a38f420af3c77cab4a65f0c3a55ec02ac9babf04479cfd282d356988a6/regex-2026.2.28-cp310-cp310-win32.whl", hash = "sha256:1f8b17be5c27a684ea6759983c13506bd77bfc7c0347dff41b18ce5ddd2ee09a", size = 266025, upload-time = "2026-02-28T02:16:16.828Z" }, + { url = "https://files.pythonhosted.org/packages/4d/0a/205c4c1466a36e04d90afcd01d8908bac327673050c7fe316b2416d99d3d/regex-2026.2.28-cp310-cp310-win_amd64.whl", hash = "sha256:dd8847c4978bc3c7e6c826fb745f5570e518b8459ac2892151ce6627c7bc00d5", size = 277965, upload-time = "2026-02-28T02:16:18.752Z" }, + { url = "https://files.pythonhosted.org/packages/c3/4d/29b58172f954b6ec2c5ed28529a65e9026ab96b4b7016bcd3858f1c31d3c/regex-2026.2.28-cp310-cp310-win_arm64.whl", hash = "sha256:73cdcdbba8028167ea81490c7f45280113e41db2c7afb65a276f4711fa3bcbff", size = 270336, upload-time = "2026-02-28T02:16:20.735Z" }, + { url = "https://files.pythonhosted.org/packages/04/db/8cbfd0ba3f302f2d09dd0019a9fcab74b63fee77a76c937d0e33161fb8c1/regex-2026.2.28-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e621fb7c8dc147419b28e1702f58a0177ff8308a76fa295c71f3e7827849f5d9", size = 488462, upload-time = "2026-02-28T02:16:22.616Z" }, + { url = "https://files.pythonhosted.org/packages/5d/10/ccc22c52802223f2368731964ddd117799e1390ffc39dbb31634a83022ee/regex-2026.2.28-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0d5bef2031cbf38757a0b0bc4298bb4824b6332d28edc16b39247228fbdbad97", size = 290774, upload-time = "2026-02-28T02:16:23.993Z" }, + { url = "https://files.pythonhosted.org/packages/62/b9/6796b3bf3101e64117201aaa3a5a030ec677ecf34b3cd6141b5d5c6c67d5/regex-2026.2.28-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bcb399ed84eabf4282587ba151f2732ad8168e66f1d3f85b1d038868fe547703", size = 288724, upload-time = "2026-02-28T02:16:25.403Z" }, + { url = "https://files.pythonhosted.org/packages/9c/02/291c0ae3f3a10cea941d0f5366da1843d8d1fa8a25b0671e20a0e454bb38/regex-2026.2.28-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7c1b34dfa72f826f535b20712afa9bb3ba580020e834f3c69866c5bddbf10098", size = 791924, upload-time = "2026-02-28T02:16:26.863Z" }, + { url = "https://files.pythonhosted.org/packages/0f/57/f0235cc520d9672742196c5c15098f8f703f2758d48d5a7465a56333e496/regex-2026.2.28-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:851fa70df44325e1e4cdb79c5e676e91a78147b1b543db2aec8734d2add30ec2", size = 860095, upload-time = "2026-02-28T02:16:28.772Z" }, + { url = "https://files.pythonhosted.org/packages/b3/7c/393c94cbedda79a0f5f2435ebd01644aba0b338d327eb24b4aa5b8d6c07f/regex-2026.2.28-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:516604edd17b1c2c3e579cf4e9b25a53bf8fa6e7cedddf1127804d3e0140ca64", size = 906583, upload-time = "2026-02-28T02:16:30.977Z" }, + { url = "https://files.pythonhosted.org/packages/2c/73/a72820f47ca5abf2b5d911d0407ba5178fc52cf9780191ed3a54f5f419a2/regex-2026.2.28-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e7ce83654d1ab701cb619285a18a8e5a889c1216d746ddc710c914ca5fd71022", size = 800234, upload-time = "2026-02-28T02:16:32.55Z" }, + { url = "https://files.pythonhosted.org/packages/34/b3/6e6a4b7b31fa998c4cf159a12cbeaf356386fbd1a8be743b1e80a3da51e4/regex-2026.2.28-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f2791948f7c70bb9335a9102df45e93d428f4b8128020d85920223925d73b9e1", size = 772803, upload-time = "2026-02-28T02:16:34.029Z" }, + { url = "https://files.pythonhosted.org/packages/10/e7/5da0280c765d5a92af5e1cd324b3fe8464303189cbaa449de9a71910e273/regex-2026.2.28-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:03a83cc26aa2acda6b8b9dfe748cf9e84cbd390c424a1de34fdcef58961a297a", size = 781117, upload-time = "2026-02-28T02:16:36.253Z" }, + { url = "https://files.pythonhosted.org/packages/76/39/0b8d7efb256ae34e1b8157acc1afd8758048a1cf0196e1aec2e71fd99f4b/regex-2026.2.28-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ec6f5674c5dc836994f50f1186dd1fafde4be0666aae201ae2fcc3d29d8adf27", size = 854224, upload-time = "2026-02-28T02:16:38.119Z" }, + { url = "https://files.pythonhosted.org/packages/21/ff/a96d483ebe8fe6d1c67907729202313895d8de8495569ec319c6f29d0438/regex-2026.2.28-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:50c2fc924749543e0eacc93ada6aeeb3ea5f6715825624baa0dccaec771668ae", size = 761898, upload-time = "2026-02-28T02:16:40.333Z" }, + { url = "https://files.pythonhosted.org/packages/89/bd/d4f2e75cb4a54b484e796017e37c0d09d8a0a837de43d17e238adf163f4e/regex-2026.2.28-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:ba55c50f408fb5c346a3a02d2ce0ebc839784e24f7c9684fde328ff063c3cdea", size = 844832, upload-time = "2026-02-28T02:16:41.875Z" }, + { url = "https://files.pythonhosted.org/packages/8a/a7/428a135cf5e15e4e11d1e696eb2bf968362f8ea8a5f237122e96bc2ae950/regex-2026.2.28-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:edb1b1b3a5576c56f08ac46f108c40333f222ebfd5cf63afdfa3aab0791ebe5b", size = 788347, upload-time = "2026-02-28T02:16:43.472Z" }, + { url = "https://files.pythonhosted.org/packages/a9/59/68691428851cf9c9c3707217ab1d9b47cfeec9d153a49919e6c368b9e926/regex-2026.2.28-cp311-cp311-win32.whl", hash = "sha256:948c12ef30ecedb128903c2c2678b339746eb7c689c5c21957c4a23950c96d15", size = 266033, upload-time = "2026-02-28T02:16:45.094Z" }, + { url = "https://files.pythonhosted.org/packages/42/8b/1483de1c57024e89296cbcceb9cccb3f625d416ddb46e570be185c9b05a9/regex-2026.2.28-cp311-cp311-win_amd64.whl", hash = "sha256:fd63453f10d29097cc3dc62d070746523973fb5aa1c66d25f8558bebd47fed61", size = 277978, upload-time = "2026-02-28T02:16:46.75Z" }, + { url = "https://files.pythonhosted.org/packages/a4/36/abec45dc6e7252e3dbc797120496e43bb5730a7abf0d9cb69340696a2f2d/regex-2026.2.28-cp311-cp311-win_arm64.whl", hash = "sha256:00f2b8d9615aa165fdff0a13f1a92049bfad555ee91e20d246a51aa0b556c60a", size = 270340, upload-time = "2026-02-28T02:16:48.626Z" }, + { url = "https://files.pythonhosted.org/packages/07/42/9061b03cf0fc4b5fa2c3984cbbaed54324377e440a5c5a29d29a72518d62/regex-2026.2.28-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fcf26c3c6d0da98fada8ae4ef0aa1c3405a431c0a77eb17306d38a89b02adcd7", size = 489574, upload-time = "2026-02-28T02:16:50.455Z" }, + { url = "https://files.pythonhosted.org/packages/77/83/0c8a5623a233015595e3da499c5a1c13720ac63c107897a6037bb97af248/regex-2026.2.28-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02473c954af35dd2defeb07e44182f5705b30ea3f351a7cbffa9177beb14da5d", size = 291426, upload-time = "2026-02-28T02:16:52.52Z" }, + { url = "https://files.pythonhosted.org/packages/9e/06/3ef1ac6910dc3295ebd71b1f9bfa737e82cfead211a18b319d45f85ddd09/regex-2026.2.28-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9b65d33a17101569f86d9c5966a8b1d7fbf8afdda5a8aa219301b0a80f58cf7d", size = 289200, upload-time = "2026-02-28T02:16:54.08Z" }, + { url = "https://files.pythonhosted.org/packages/dd/c9/8cc8d850b35ab5650ff6756a1cb85286e2000b66c97520b29c1587455344/regex-2026.2.28-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e71dcecaa113eebcc96622c17692672c2d104b1d71ddf7adeda90da7ddeb26fc", size = 796765, upload-time = "2026-02-28T02:16:55.905Z" }, + { url = "https://files.pythonhosted.org/packages/e9/5d/57702597627fc23278ebf36fbb497ac91c0ce7fec89ac6c81e420ca3e38c/regex-2026.2.28-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:481df4623fa4969c8b11f3433ed7d5e3dc9cec0f008356c3212b3933fb77e3d8", size = 863093, upload-time = "2026-02-28T02:16:58.094Z" }, + { url = "https://files.pythonhosted.org/packages/02/6d/f3ecad537ca2811b4d26b54ca848cf70e04fcfc138667c146a9f3157779c/regex-2026.2.28-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:64e7c6ad614573e0640f271e811a408d79a9e1fe62a46adb602f598df42a818d", size = 909455, upload-time = "2026-02-28T02:17:00.918Z" }, + { url = "https://files.pythonhosted.org/packages/9e/40/bb226f203caa22c1043c1ca79b36340156eca0f6a6742b46c3bb222a3a57/regex-2026.2.28-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6b08a06976ff4fb0d83077022fde3eca06c55432bb997d8c0495b9a4e9872f4", size = 802037, upload-time = "2026-02-28T02:17:02.842Z" }, + { url = "https://files.pythonhosted.org/packages/44/7c/c6d91d8911ac6803b45ca968e8e500c46934e58c0903cbc6d760ee817a0a/regex-2026.2.28-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:864cdd1a2ef5716b0ab468af40139e62ede1b3a53386b375ec0786bb6783fc05", size = 775113, upload-time = "2026-02-28T02:17:04.506Z" }, + { url = "https://files.pythonhosted.org/packages/dc/8d/4a9368d168d47abd4158580b8c848709667b1cd293ff0c0c277279543bd0/regex-2026.2.28-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:511f7419f7afab475fd4d639d4aedfc54205bcb0800066753ef68a59f0f330b5", size = 784194, upload-time = "2026-02-28T02:17:06.888Z" }, + { url = "https://files.pythonhosted.org/packages/cc/bf/2c72ab5d8b7be462cb1651b5cc333da1d0068740342f350fcca3bca31947/regex-2026.2.28-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b42f7466e32bf15a961cf09f35fa6323cc72e64d3d2c990b10de1274a5da0a59", size = 856846, upload-time = "2026-02-28T02:17:09.11Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f4/6b65c979bb6d09f51bb2d2a7bc85de73c01ec73335d7ddd202dcb8cd1c8f/regex-2026.2.28-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8710d61737b0c0ce6836b1da7109f20d495e49b3809f30e27e9560be67a257bf", size = 763516, upload-time = "2026-02-28T02:17:11.004Z" }, + { url = "https://files.pythonhosted.org/packages/8e/32/29ea5e27400ee86d2cc2b4e80aa059df04eaf78b4f0c18576ae077aeff68/regex-2026.2.28-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4390c365fd2d45278f45afd4673cb90f7285f5701607e3ad4274df08e36140ae", size = 849278, upload-time = "2026-02-28T02:17:12.693Z" }, + { url = "https://files.pythonhosted.org/packages/1d/91/3233d03b5f865111cd517e1c95ee8b43e8b428d61fa73764a80c9bb6f537/regex-2026.2.28-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:cb3b1db8ff6c7b8bf838ab05583ea15230cb2f678e569ab0e3a24d1e8320940b", size = 790068, upload-time = "2026-02-28T02:17:14.9Z" }, + { url = "https://files.pythonhosted.org/packages/76/92/abc706c1fb03b4580a09645b206a3fc032f5a9f457bc1a8038ac555658ab/regex-2026.2.28-cp312-cp312-win32.whl", hash = "sha256:f8ed9a5d4612df9d4de15878f0bc6aa7a268afbe5af21a3fdd97fa19516e978c", size = 266416, upload-time = "2026-02-28T02:17:17.15Z" }, + { url = "https://files.pythonhosted.org/packages/fa/06/2a6f7dff190e5fa9df9fb4acf2fdf17a1aa0f7f54596cba8de608db56b3a/regex-2026.2.28-cp312-cp312-win_amd64.whl", hash = "sha256:01d65fd24206c8e1e97e2e31b286c59009636c022eb5d003f52760b0f42155d4", size = 277297, upload-time = "2026-02-28T02:17:18.723Z" }, + { url = "https://files.pythonhosted.org/packages/b7/f0/58a2484851fadf284458fdbd728f580d55c1abac059ae9f048c63b92f427/regex-2026.2.28-cp312-cp312-win_arm64.whl", hash = "sha256:c0b5ccbb8ffb433939d248707d4a8b31993cb76ab1a0187ca886bf50e96df952", size = 270408, upload-time = "2026-02-28T02:17:20.328Z" }, + { url = "https://files.pythonhosted.org/packages/87/f6/dc9ef48c61b79c8201585bf37fa70cd781977da86e466cd94e8e95d2443b/regex-2026.2.28-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6d63a07e5ec8ce7184452cb00c41c37b49e67dc4f73b2955b5b8e782ea970784", size = 489311, upload-time = "2026-02-28T02:17:22.591Z" }, + { url = "https://files.pythonhosted.org/packages/95/c8/c20390f2232d3f7956f420f4ef1852608ad57aa26c3dd78516cb9f3dc913/regex-2026.2.28-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e59bc8f30414d283ae8ee1617b13d8112e7135cb92830f0ec3688cb29152585a", size = 291285, upload-time = "2026-02-28T02:17:24.355Z" }, + { url = "https://files.pythonhosted.org/packages/d2/a6/ba1068a631ebd71a230e7d8013fcd284b7c89c35f46f34a7da02082141b1/regex-2026.2.28-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:de0cf053139f96219ccfabb4a8dd2d217c8c82cb206c91d9f109f3f552d6b43d", size = 289051, upload-time = "2026-02-28T02:17:26.722Z" }, + { url = "https://files.pythonhosted.org/packages/1d/1b/7cc3b7af4c244c204b7a80924bd3d85aecd9ba5bc82b485c5806ee8cda9e/regex-2026.2.28-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb4db2f17e6484904f986c5a657cec85574c76b5c5e61c7aae9ffa1bc6224f95", size = 796842, upload-time = "2026-02-28T02:17:29.064Z" }, + { url = "https://files.pythonhosted.org/packages/24/87/26bd03efc60e0d772ac1e7b60a2e6325af98d974e2358f659c507d3c76db/regex-2026.2.28-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:52b017b35ac2214d0db5f4f90e303634dc44e4aba4bd6235a27f97ecbe5b0472", size = 863083, upload-time = "2026-02-28T02:17:31.363Z" }, + { url = "https://files.pythonhosted.org/packages/ae/54/aeaf4afb1aa0a65e40de52a61dc2ac5b00a83c6cb081c8a1d0dda74f3010/regex-2026.2.28-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:69fc560ccbf08a09dc9b52ab69cacfae51e0ed80dc5693078bdc97db2f91ae96", size = 909412, upload-time = "2026-02-28T02:17:33.248Z" }, + { url = "https://files.pythonhosted.org/packages/12/2f/049901def913954e640d199bbc6a7ca2902b6aeda0e5da9d17f114100ec2/regex-2026.2.28-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e61eea47230eba62a31f3e8a0e3164d0f37ef9f40529fb2c79361bc6b53d2a92", size = 802101, upload-time = "2026-02-28T02:17:35.053Z" }, + { url = "https://files.pythonhosted.org/packages/7d/a5/512fb9ff7f5b15ea204bb1967ebb649059446decacccb201381f9fa6aad4/regex-2026.2.28-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:4f5c0b182ad4269e7381b7c27fdb0408399881f7a92a4624fd5487f2971dfc11", size = 775260, upload-time = "2026-02-28T02:17:37.692Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a8/9a92935878aba19bd72706b9db5646a6f993d99b3f6ed42c02ec8beb1d61/regex-2026.2.28-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:96f6269a2882fbb0ee76967116b83679dc628e68eaea44e90884b8d53d833881", size = 784311, upload-time = "2026-02-28T02:17:39.855Z" }, + { url = "https://files.pythonhosted.org/packages/09/d3/fc51a8a738a49a6b6499626580554c9466d3ea561f2b72cfdc72e4149773/regex-2026.2.28-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b5acd4b6a95f37c3c3828e5d053a7d4edaedb85de551db0153754924cb7c83e3", size = 856876, upload-time = "2026-02-28T02:17:42.317Z" }, + { url = "https://files.pythonhosted.org/packages/08/b7/2e641f3d084b120ca4c52e8c762a78da0b32bf03ef546330db3e2635dc5f/regex-2026.2.28-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:2234059cfe33d9813a3677ef7667999caea9eeaa83fef98eb6ce15c6cf9e0215", size = 763632, upload-time = "2026-02-28T02:17:45.073Z" }, + { url = "https://files.pythonhosted.org/packages/fe/6d/0009021d97e79ee99f3d8641f0a8d001eed23479ade4c3125a5480bf3e2d/regex-2026.2.28-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c15af43c72a7fb0c97cbc66fa36a43546eddc5c06a662b64a0cbf30d6ac40944", size = 849320, upload-time = "2026-02-28T02:17:47.192Z" }, + { url = "https://files.pythonhosted.org/packages/05/7a/51cfbad5758f8edae430cb21961a9c8d04bce1dae4d2d18d4186eec7cfa1/regex-2026.2.28-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9185cc63359862a6e80fe97f696e04b0ad9a11c4ac0a4a927f979f611bfe3768", size = 790152, upload-time = "2026-02-28T02:17:49.067Z" }, + { url = "https://files.pythonhosted.org/packages/90/3d/a83e2b6b3daa142acb8c41d51de3876186307d5cb7490087031747662500/regex-2026.2.28-cp313-cp313-win32.whl", hash = "sha256:fb66e5245db9652abd7196ace599b04d9c0e4aa7c8f0e2803938377835780081", size = 266398, upload-time = "2026-02-28T02:17:50.744Z" }, + { url = "https://files.pythonhosted.org/packages/85/4f/16e9ebb1fe5425e11b9596c8d57bf8877dcb32391da0bfd33742e3290637/regex-2026.2.28-cp313-cp313-win_amd64.whl", hash = "sha256:71a911098be38c859ceb3f9a9ce43f4ed9f4c6720ad8684a066ea246b76ad9ff", size = 277282, upload-time = "2026-02-28T02:17:53.074Z" }, + { url = "https://files.pythonhosted.org/packages/07/b4/92851335332810c5a89723bf7a7e35c7209f90b7d4160024501717b28cc9/regex-2026.2.28-cp313-cp313-win_arm64.whl", hash = "sha256:39bb5727650b9a0275c6a6690f9bb3fe693a7e6cc5c3155b1240aedf8926423e", size = 270382, upload-time = "2026-02-28T02:17:54.888Z" }, + { url = "https://files.pythonhosted.org/packages/24/07/6c7e4cec1e585959e96cbc24299d97e4437a81173217af54f1804994e911/regex-2026.2.28-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:97054c55db06ab020342cc0d35d6f62a465fa7662871190175f1ad6c655c028f", size = 492541, upload-time = "2026-02-28T02:17:56.813Z" }, + { url = "https://files.pythonhosted.org/packages/7c/13/55eb22ada7f43d4f4bb3815b6132183ebc331c81bd496e2d1f3b8d862e0d/regex-2026.2.28-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0d25a10811de831c2baa6aef3c0be91622f44dd8d31dd12e69f6398efb15e48b", size = 292984, upload-time = "2026-02-28T02:17:58.538Z" }, + { url = "https://files.pythonhosted.org/packages/5b/11/c301f8cb29ce9644a5ef85104c59244e6e7e90994a0f458da4d39baa8e17/regex-2026.2.28-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d6cfe798d8da41bb1862ed6e0cba14003d387c3c0c4a5d45591076ae9f0ce2f8", size = 291509, upload-time = "2026-02-28T02:18:00.208Z" }, + { url = "https://files.pythonhosted.org/packages/b5/43/aabe384ec1994b91796e903582427bc2ffaed9c4103819ed3c16d8e749f3/regex-2026.2.28-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fd0ce43e71d825b7c0661f9c54d4d74bd97c56c3fd102a8985bcfea48236bacb", size = 809429, upload-time = "2026-02-28T02:18:02.328Z" }, + { url = "https://files.pythonhosted.org/packages/04/b8/8d2d987a816720c4f3109cee7c06a4b24ad0e02d4fc74919ab619e543737/regex-2026.2.28-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:00945d007fd74a9084d2ab79b695b595c6b7ba3698972fadd43e23230c6979c1", size = 869422, upload-time = "2026-02-28T02:18:04.23Z" }, + { url = "https://files.pythonhosted.org/packages/fc/ad/2c004509e763c0c3719f97c03eca26473bffb3868d54c5f280b8cd4f9e3d/regex-2026.2.28-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:bec23c11cbbf09a4df32fe50d57cbdd777bc442269b6e39a1775654f1c95dee2", size = 915175, upload-time = "2026-02-28T02:18:06.791Z" }, + { url = "https://files.pythonhosted.org/packages/55/c2/fd429066da487ef555a9da73bf214894aec77fc8c66a261ee355a69871a8/regex-2026.2.28-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5cdcc17d935c8f9d3f4db5c2ebe2640c332e3822ad5d23c2f8e0228e6947943a", size = 812044, upload-time = "2026-02-28T02:18:08.736Z" }, + { url = "https://files.pythonhosted.org/packages/5b/ca/feedb7055c62a3f7f659971bf45f0e0a87544b6b0cf462884761453f97c5/regex-2026.2.28-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a448af01e3d8031c89c5d902040b124a5e921a25c4e5e07a861ca591ce429341", size = 782056, upload-time = "2026-02-28T02:18:10.777Z" }, + { url = "https://files.pythonhosted.org/packages/95/30/1aa959ed0d25c1dd7dd5047ea8ba482ceaef38ce363c401fd32a6b923e60/regex-2026.2.28-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:10d28e19bd4888e4abf43bd3925f3c134c52fdf7259219003588a42e24c2aa25", size = 798743, upload-time = "2026-02-28T02:18:13.025Z" }, + { url = "https://files.pythonhosted.org/packages/3b/1f/dadb9cf359004784051c897dcf4d5d79895f73a1bbb7b827abaa4814ae80/regex-2026.2.28-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:99985a2c277dcb9ccb63f937451af5d65177af1efdeb8173ac55b61095a0a05c", size = 864633, upload-time = "2026-02-28T02:18:16.84Z" }, + { url = "https://files.pythonhosted.org/packages/a7/f1/b9a25eb24e1cf79890f09e6ec971ee5b511519f1851de3453bc04f6c902b/regex-2026.2.28-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:e1e7b24cb3ae9953a560c563045d1ba56ee4749fbd05cf21ba571069bd7be81b", size = 770862, upload-time = "2026-02-28T02:18:18.892Z" }, + { url = "https://files.pythonhosted.org/packages/02/9a/c5cb10b7aa6f182f9247a30cc9527e326601f46f4df864ac6db588d11fcd/regex-2026.2.28-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d8511a01d0e4ee1992eb3ba19e09bc1866fe03f05129c3aec3fdc4cbc77aad3f", size = 854788, upload-time = "2026-02-28T02:18:21.475Z" }, + { url = "https://files.pythonhosted.org/packages/0a/50/414ba0731c4bd40b011fa4703b2cc86879ec060c64f2a906e65a56452589/regex-2026.2.28-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:aaffaecffcd2479ce87aa1e74076c221700b7c804e48e98e62500ee748f0f550", size = 800184, upload-time = "2026-02-28T02:18:23.492Z" }, + { url = "https://files.pythonhosted.org/packages/69/50/0c7290987f97e7e6830b0d853f69dc4dc5852c934aae63e7fdcd76b4c383/regex-2026.2.28-cp313-cp313t-win32.whl", hash = "sha256:ef77bdde9c9eba3f7fa5b58084b29bbcc74bcf55fdbeaa67c102a35b5bd7e7cc", size = 269137, upload-time = "2026-02-28T02:18:25.375Z" }, + { url = "https://files.pythonhosted.org/packages/68/80/ef26ff90e74ceb4051ad6efcbbb8a4be965184a57e879ebcbdef327d18fa/regex-2026.2.28-cp313-cp313t-win_amd64.whl", hash = "sha256:98adf340100cbe6fbaf8e6dc75e28f2c191b1be50ffefe292fb0e6f6eefdb0d8", size = 280682, upload-time = "2026-02-28T02:18:27.205Z" }, + { url = "https://files.pythonhosted.org/packages/69/8b/fbad9c52e83ffe8f97e3ed1aa0516e6dff6bb633a41da9e64645bc7efdc5/regex-2026.2.28-cp313-cp313t-win_arm64.whl", hash = "sha256:2fb950ac1d88e6b6a9414381f403797b236f9fa17e1eee07683af72b1634207b", size = 271735, upload-time = "2026-02-28T02:18:29.015Z" }, + { url = "https://files.pythonhosted.org/packages/cf/03/691015f7a7cb1ed6dacb2ea5de5682e4858e05a4c5506b2839cd533bbcd6/regex-2026.2.28-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:78454178c7df31372ea737996fb7f36b3c2c92cccc641d251e072478afb4babc", size = 489497, upload-time = "2026-02-28T02:18:30.889Z" }, + { url = "https://files.pythonhosted.org/packages/c6/ba/8db8fd19afcbfa0e1036eaa70c05f20ca8405817d4ad7a38a6b4c2f031ac/regex-2026.2.28-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:5d10303dd18cedfd4d095543998404df656088240bcfd3cd20a8f95b861f74bd", size = 291295, upload-time = "2026-02-28T02:18:33.426Z" }, + { url = "https://files.pythonhosted.org/packages/5a/79/9aa0caf089e8defef9b857b52fc53801f62ff868e19e5c83d4a96612eba1/regex-2026.2.28-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:19a9c9e0a8f24f39d575a6a854d516b48ffe4cbdcb9de55cb0570a032556ecff", size = 289275, upload-time = "2026-02-28T02:18:35.247Z" }, + { url = "https://files.pythonhosted.org/packages/eb/26/ee53117066a30ef9c883bf1127eece08308ccf8ccd45c45a966e7a665385/regex-2026.2.28-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09500be324f49b470d907b3ef8af9afe857f5cca486f853853f7945ddbf75911", size = 797176, upload-time = "2026-02-28T02:18:37.15Z" }, + { url = "https://files.pythonhosted.org/packages/05/1b/67fb0495a97259925f343ae78b5d24d4a6624356ae138b57f18bd43006e4/regex-2026.2.28-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fb1c4ff62277d87a7335f2c1ea4e0387b8f2b3ad88a64efd9943906aafad4f33", size = 863813, upload-time = "2026-02-28T02:18:39.478Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/93ac9bbafc53618091c685c7ed40239a90bf9f2a82c983f0baa97cb7ae07/regex-2026.2.28-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b8b3f1be1738feadc69f62daa250c933e85c6f34fa378f54a7ff43807c1b9117", size = 908678, upload-time = "2026-02-28T02:18:41.619Z" }, + { url = "https://files.pythonhosted.org/packages/c7/7a/a8f5e0561702b25239846a16349feece59712ae20598ebb205580332a471/regex-2026.2.28-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc8ed8c3f41c27acb83f7b6a9eb727a73fc6663441890c5cb3426a5f6a91ce7d", size = 801528, upload-time = "2026-02-28T02:18:43.624Z" }, + { url = "https://files.pythonhosted.org/packages/96/5d/ed6d4cbde80309854b1b9f42d9062fee38ade15f7eb4909f6ef2440403b5/regex-2026.2.28-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fa539be029844c0ce1114762d2952ab6cfdd7c7c9bd72e0db26b94c3c36dcc5a", size = 775373, upload-time = "2026-02-28T02:18:46.102Z" }, + { url = "https://files.pythonhosted.org/packages/6a/e9/6e53c34e8068b9deec3e87210086ecb5b9efebdefca6b0d3fa43d66dcecb/regex-2026.2.28-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7900157786428a79615a8264dac1f12c9b02957c473c8110c6b1f972dcecaddf", size = 784859, upload-time = "2026-02-28T02:18:48.269Z" }, + { url = "https://files.pythonhosted.org/packages/48/3c/736e1c7ca7f0dcd2ae33819888fdc69058a349b7e5e84bc3e2f296bbf794/regex-2026.2.28-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:0b1d2b07614d95fa2bf8a63fd1e98bd8fa2b4848dc91b1efbc8ba219fdd73952", size = 857813, upload-time = "2026-02-28T02:18:50.576Z" }, + { url = "https://files.pythonhosted.org/packages/6e/7c/48c4659ad9da61f58e79dbe8c05223e0006696b603c16eb6b5cbfbb52c27/regex-2026.2.28-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:b389c61aa28a79c2e0527ac36da579869c2e235a5b208a12c5b5318cda2501d8", size = 763705, upload-time = "2026-02-28T02:18:52.59Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a1/bc1c261789283128165f71b71b4b221dd1b79c77023752a6074c102f18d8/regex-2026.2.28-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f467cb602f03fbd1ab1908f68b53c649ce393fde056628dc8c7e634dab6bfc07", size = 848734, upload-time = "2026-02-28T02:18:54.595Z" }, + { url = "https://files.pythonhosted.org/packages/10/d8/979407faf1397036e25a5ae778157366a911c0f382c62501009f4957cf86/regex-2026.2.28-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e8c8cb2deba42f5ec1ede46374e990f8adc5e6456a57ac1a261b19be6f28e4e6", size = 789871, upload-time = "2026-02-28T02:18:57.34Z" }, + { url = "https://files.pythonhosted.org/packages/03/23/da716821277115fcb1f4e3de1e5dc5023a1e6533598c486abf5448612579/regex-2026.2.28-cp314-cp314-win32.whl", hash = "sha256:9036b400b20e4858d56d117108d7813ed07bb7803e3eed766675862131135ca6", size = 271825, upload-time = "2026-02-28T02:18:59.202Z" }, + { url = "https://files.pythonhosted.org/packages/91/ff/90696f535d978d5f16a52a419be2770a8d8a0e7e0cfecdbfc31313df7fab/regex-2026.2.28-cp314-cp314-win_amd64.whl", hash = "sha256:1d367257cd86c1cbb97ea94e77b373a0bbc2224976e247f173d19e8f18b4afa7", size = 280548, upload-time = "2026-02-28T02:19:01.049Z" }, + { url = "https://files.pythonhosted.org/packages/69/f9/5e1b5652fc0af3fcdf7677e7df3ad2a0d47d669b34ac29a63bb177bb731b/regex-2026.2.28-cp314-cp314-win_arm64.whl", hash = "sha256:5e68192bb3a1d6fb2836da24aa494e413ea65853a21505e142e5b1064a595f3d", size = 273444, upload-time = "2026-02-28T02:19:03.255Z" }, + { url = "https://files.pythonhosted.org/packages/d3/eb/8389f9e940ac89bcf58d185e230a677b4fd07c5f9b917603ad5c0f8fa8fe/regex-2026.2.28-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:a5dac14d0872eeb35260a8e30bac07ddf22adc1e3a0635b52b02e180d17c9c7e", size = 492546, upload-time = "2026-02-28T02:19:05.378Z" }, + { url = "https://files.pythonhosted.org/packages/7b/c7/09441d27ce2a6fa6a61ea3150ea4639c1dcda9b31b2ea07b80d6937b24dd/regex-2026.2.28-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:ec0c608b7a7465ffadb344ed7c987ff2f11ee03f6a130b569aa74d8a70e8333c", size = 292986, upload-time = "2026-02-28T02:19:07.24Z" }, + { url = "https://files.pythonhosted.org/packages/fb/69/4144b60ed7760a6bd235e4087041f487aa4aa62b45618ce018b0c14833ea/regex-2026.2.28-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c7815afb0ca45456613fdaf60ea9c993715511c8d53a83bc468305cbc0ee23c7", size = 291518, upload-time = "2026-02-28T02:19:09.698Z" }, + { url = "https://files.pythonhosted.org/packages/2d/be/77e5426cf5948c82f98c53582009ca9e94938c71f73a8918474f2e2990bb/regex-2026.2.28-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b059e71ec363968671693a78c5053bd9cb2fe410f9b8e4657e88377ebd603a2e", size = 809464, upload-time = "2026-02-28T02:19:12.494Z" }, + { url = "https://files.pythonhosted.org/packages/45/99/2c8c5ac90dc7d05c6e7d8e72c6a3599dc08cd577ac476898e91ca787d7f1/regex-2026.2.28-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8cf76f1a29f0e99dcfd7aef1551a9827588aae5a737fe31442021165f1920dc", size = 869553, upload-time = "2026-02-28T02:19:15.151Z" }, + { url = "https://files.pythonhosted.org/packages/53/34/daa66a342f0271e7737003abf6c3097aa0498d58c668dbd88362ef94eb5d/regex-2026.2.28-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:180e08a435a0319e6a4821c3468da18dc7001987e1c17ae1335488dfe7518dd8", size = 915289, upload-time = "2026-02-28T02:19:17.331Z" }, + { url = "https://files.pythonhosted.org/packages/c5/c7/e22c2aaf0a12e7e22ab19b004bb78d32ca1ecc7ef245949935463c5567de/regex-2026.2.28-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e496956106fd59ba6322a8ea17141a27c5040e5ee8f9433ae92d4e5204462a0", size = 812156, upload-time = "2026-02-28T02:19:20.011Z" }, + { url = "https://files.pythonhosted.org/packages/7f/bb/2dc18c1efd9051cf389cd0d7a3a4d90f6804b9fff3a51b5dc3c85b935f71/regex-2026.2.28-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bba2b18d70eeb7b79950f12f633beeecd923f7c9ad6f6bae28e59b4cb3ab046b", size = 782215, upload-time = "2026-02-28T02:19:22.047Z" }, + { url = "https://files.pythonhosted.org/packages/17/1e/9e4ec9b9013931faa32226ec4aa3c71fe664a6d8a2b91ac56442128b332f/regex-2026.2.28-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6db7bfae0f8a2793ff1f7021468ea55e2699d0790eb58ee6ab36ae43aa00bc5b", size = 798925, upload-time = "2026-02-28T02:19:24.173Z" }, + { url = "https://files.pythonhosted.org/packages/71/57/a505927e449a9ccb41e2cc8d735e2abe3444b0213d1cf9cb364a8c1f2524/regex-2026.2.28-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:d0b02e8b7e5874b48ae0f077ecca61c1a6a9f9895e9c6dfb191b55b242862033", size = 864701, upload-time = "2026-02-28T02:19:26.376Z" }, + { url = "https://files.pythonhosted.org/packages/a6/ad/c62cb60cdd93e13eac5b3d9d6bd5d284225ed0e3329426f94d2552dd7cca/regex-2026.2.28-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:25b6eb660c5cf4b8c3407a1ed462abba26a926cc9965e164268a3267bcc06a43", size = 770899, upload-time = "2026-02-28T02:19:29.38Z" }, + { url = "https://files.pythonhosted.org/packages/3c/5a/874f861f5c3d5ab99633e8030dee1bc113db8e0be299d1f4b07f5b5ec349/regex-2026.2.28-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:5a932ea8ad5d0430351ff9c76c8db34db0d9f53c1d78f06022a21f4e290c5c18", size = 854727, upload-time = "2026-02-28T02:19:31.494Z" }, + { url = "https://files.pythonhosted.org/packages/6b/ca/d2c03b0efde47e13db895b975b2be6a73ed90b8ba963677927283d43bf74/regex-2026.2.28-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:1c2c95e1a2b0f89d01e821ff4de1be4b5d73d1f4b0bf679fa27c1ad8d2327f1a", size = 800366, upload-time = "2026-02-28T02:19:34.248Z" }, + { url = "https://files.pythonhosted.org/packages/14/bd/ee13b20b763b8989f7c75d592bfd5de37dc1181814a2a2747fedcf97e3ba/regex-2026.2.28-cp314-cp314t-win32.whl", hash = "sha256:bbb882061f742eb5d46f2f1bd5304055be0a66b783576de3d7eef1bed4778a6e", size = 274936, upload-time = "2026-02-28T02:19:36.313Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e7/d8020e39414c93af7f0d8688eabcecece44abfd5ce314b21dfda0eebd3d8/regex-2026.2.28-cp314-cp314t-win_amd64.whl", hash = "sha256:6591f281cb44dc13de9585b552cec6fc6cf47fb2fe7a48892295ee9bc4a612f9", size = 284779, upload-time = "2026-02-28T02:19:38.625Z" }, + { url = "https://files.pythonhosted.org/packages/13/c0/ad225f4a405827486f1955283407cf758b6d2fb966712644c5f5aef33d1b/regex-2026.2.28-cp314-cp314t-win_arm64.whl", hash = "sha256:dee50f1be42222f89767b64b283283ef963189da0dda4a515aa54a5563c62dec", size = 275010, upload-time = "2026-02-28T02:19:40.65Z" }, ] [[package]] @@ -5880,7 +5992,8 @@ version = "0.4.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numba" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/29/f1/34be702a69a5d272e844c98cee82351f880985cfbca0cc86378011078497/resampy-0.4.3.tar.gz", hash = "sha256:a0d1c28398f0e55994b739650afef4e3974115edbe96cd4bb81968425e916e47", size = 3080604, upload-time = "2024-03-05T20:36:08.119Z" } wheels = [ @@ -5911,16 +6024,16 @@ wheels = [ [[package]] name = "rich-toolkit" -version = "0.19.4" +version = "0.19.7" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "rich" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d0/c9/4bbf4bfee195ed1b7d7a6733cc523ca61dbfb4a3e3c12ea090aaffd97597/rich_toolkit-0.19.4.tar.gz", hash = "sha256:52e23d56f9dc30d1343eb3b3f6f18764c313fbfea24e52e6a1d6069bec9c18eb", size = 193951, upload-time = "2026-02-12T10:08:15.814Z" } +sdist = { url = "https://files.pythonhosted.org/packages/42/ba/dae9e3096651042754da419a4042bc1c75e07d615f9b15066d738838e4df/rich_toolkit-0.19.7.tar.gz", hash = "sha256:133c0915872da91d4c25d85342d5ec1dfacc69b63448af1a08a0d4b4f23ef46e", size = 195877, upload-time = "2026-02-24T16:06:20.555Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/28/31/97d39719def09c134385bfcfbedfed255168b571e7beb3ad7765aae660ca/rich_toolkit-0.19.4-py3-none-any.whl", hash = "sha256:34ac344de8862801644be8b703e26becf44b047e687f208d7829e8f7cfc311d6", size = 32757, upload-time = "2026-02-12T10:08:15.037Z" }, + { url = "https://files.pythonhosted.org/packages/fb/3c/c923619f6d2f5fafcc96fec0aaf9550a46cd5b6481f06e0c6b66a2a4fed0/rich_toolkit-0.19.7-py3-none-any.whl", hash = "sha256:0288e9203728c47c5a4eb60fd2f0692d9df7455a65901ab6f898437a2ba5989d", size = 32963, upload-time = "2026-02-24T16:06:22.066Z" }, ] [[package]] @@ -6189,27 +6302,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.2" +version = "0.15.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/06/04/eab13a954e763b0606f460443fcbf6bb5a0faf06890ea3754ff16523dce5/ruff-0.15.2.tar.gz", hash = "sha256:14b965afee0969e68bb871eba625343b8673375f457af4abe98553e8bbb98342", size = 4558148, upload-time = "2026-02-19T22:32:20.271Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/31/d6e536cdebb6568ae75a7f00e4b4819ae0ad2640c3604c305a0428680b0c/ruff-0.15.4.tar.gz", hash = "sha256:3412195319e42d634470cc97aa9803d07e9d5c9223b99bcb1518f0c725f26ae1", size = 4569550, upload-time = "2026-02-26T20:04:14.959Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/2f/70/3a4dc6d09b13cb3e695f28307e5d889b2e1a66b7af9c5e257e796695b0e6/ruff-0.15.2-py3-none-linux_armv6l.whl", hash = "sha256:120691a6fdae2f16d65435648160f5b81a9625288f75544dc40637436b5d3c0d", size = 10430565, upload-time = "2026-02-19T22:32:41.824Z" }, - { url = "https://files.pythonhosted.org/packages/71/0b/bb8457b56185ece1305c666dc895832946d24055be90692381c31d57466d/ruff-0.15.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a89056d831256099658b6bba4037ac6dd06f49d194199215befe2bb10457ea5e", size = 10820354, upload-time = "2026-02-19T22:32:07.366Z" }, - { url = "https://files.pythonhosted.org/packages/2d/c1/e0532d7f9c9e0b14c46f61b14afd563298b8b83f337b6789ddd987e46121/ruff-0.15.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e36dee3a64be0ebd23c86ffa3aa3fd3ac9a712ff295e192243f814a830b6bd87", size = 10170767, upload-time = "2026-02-19T22:32:13.188Z" }, - { url = "https://files.pythonhosted.org/packages/47/e8/da1aa341d3af017a21c7a62fb5ec31d4e7ad0a93ab80e3a508316efbcb23/ruff-0.15.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9fb47b6d9764677f8c0a193c0943ce9a05d6763523f132325af8a858eadc2b9", size = 10529591, upload-time = "2026-02-19T22:32:02.547Z" }, - { url = "https://files.pythonhosted.org/packages/93/74/184fbf38e9f3510231fbc5e437e808f0b48c42d1df9434b208821efcd8d6/ruff-0.15.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f376990f9d0d6442ea9014b19621d8f2aaf2b8e39fdbfc79220b7f0c596c9b80", size = 10260771, upload-time = "2026-02-19T22:32:36.938Z" }, - { url = "https://files.pythonhosted.org/packages/05/ac/605c20b8e059a0bc4b42360414baa4892ff278cec1c91fff4be0dceedefd/ruff-0.15.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2dcc987551952d73cbf5c88d9fdee815618d497e4df86cd4c4824cc59d5dd75f", size = 11045791, upload-time = "2026-02-19T22:32:31.642Z" }, - { url = "https://files.pythonhosted.org/packages/fd/52/db6e419908f45a894924d410ac77d64bdd98ff86901d833364251bd08e22/ruff-0.15.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42a47fd785cbe8c01b9ff45031af875d101b040ad8f4de7bbb716487c74c9a77", size = 11879271, upload-time = "2026-02-19T22:32:29.305Z" }, - { url = "https://files.pythonhosted.org/packages/3e/d8/7992b18f2008bdc9231d0f10b16df7dda964dbf639e2b8b4c1b4e91b83af/ruff-0.15.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cbe9f49354866e575b4c6943856989f966421870e85cd2ac94dccb0a9dcb2fea", size = 11303707, upload-time = "2026-02-19T22:32:22.492Z" }, - { url = "https://files.pythonhosted.org/packages/d7/02/849b46184bcfdd4b64cde61752cc9a146c54759ed036edd11857e9b8443b/ruff-0.15.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7a672c82b5f9887576087d97be5ce439f04bbaf548ee987b92d3a7dede41d3a", size = 11149151, upload-time = "2026-02-19T22:32:44.234Z" }, - { url = "https://files.pythonhosted.org/packages/70/04/f5284e388bab60d1d3b99614a5a9aeb03e0f333847e2429bebd2aaa1feec/ruff-0.15.2-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:72ecc64f46f7019e2bcc3cdc05d4a7da958b629a5ab7033195e11a438403d956", size = 11091132, upload-time = "2026-02-19T22:32:24.691Z" }, - { url = "https://files.pythonhosted.org/packages/fa/ae/88d844a21110e14d92cf73d57363fab59b727ebeabe78009b9ccb23500af/ruff-0.15.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:8dcf243b15b561c655c1ef2f2b0050e5d50db37fe90115507f6ff37d865dc8b4", size = 10504717, upload-time = "2026-02-19T22:32:26.75Z" }, - { url = "https://files.pythonhosted.org/packages/64/27/867076a6ada7f2b9c8292884ab44d08fd2ba71bd2b5364d4136f3cd537e1/ruff-0.15.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:dab6941c862c05739774677c6273166d2510d254dac0695c0e3f5efa1b5585de", size = 10263122, upload-time = "2026-02-19T22:32:10.036Z" }, - { url = "https://files.pythonhosted.org/packages/e7/ef/faf9321d550f8ebf0c6373696e70d1758e20ccdc3951ad7af00c0956be7c/ruff-0.15.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b9164f57fc36058e9a6806eb92af185b0697c9fe4c7c52caa431c6554521e5c", size = 10735295, upload-time = "2026-02-19T22:32:39.227Z" }, - { url = "https://files.pythonhosted.org/packages/2f/55/e8089fec62e050ba84d71b70e7834b97709ca9b7aba10c1a0b196e493f97/ruff-0.15.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:80d24fcae24d42659db7e335b9e1531697a7102c19185b8dc4a028b952865fd8", size = 11241641, upload-time = "2026-02-19T22:32:34.617Z" }, - { url = "https://files.pythonhosted.org/packages/23/01/1c30526460f4d23222d0fabd5888868262fd0e2b71a00570ca26483cd993/ruff-0.15.2-py3-none-win32.whl", hash = "sha256:fd5ff9e5f519a7e1bd99cbe8daa324010a74f5e2ebc97c6242c08f26f3714f6f", size = 10507885, upload-time = "2026-02-19T22:32:15.635Z" }, - { url = "https://files.pythonhosted.org/packages/5c/10/3d18e3bbdf8fc50bbb4ac3cc45970aa5a9753c5cb51bf9ed9a3cd8b79fa3/ruff-0.15.2-py3-none-win_amd64.whl", hash = "sha256:d20014e3dfa400f3ff84830dfb5755ece2de45ab62ecea4af6b7262d0fb4f7c5", size = 11623725, upload-time = "2026-02-19T22:32:04.947Z" }, - { url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649, upload-time = "2026-02-19T22:32:18.108Z" }, + { url = "https://files.pythonhosted.org/packages/f2/82/c11a03cfec3a4d26a0ea1e571f0f44be5993b923f905eeddfc397c13d360/ruff-0.15.4-py3-none-linux_armv6l.whl", hash = "sha256:a1810931c41606c686bae8b5b9a8072adac2f611bb433c0ba476acba17a332e0", size = 10453333, upload-time = "2026-02-26T20:04:20.093Z" }, + { url = "https://files.pythonhosted.org/packages/ce/5d/6a1f271f6e31dffb31855996493641edc3eef8077b883eaf007a2f1c2976/ruff-0.15.4-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5a1632c66672b8b4d3e1d1782859e98d6e0b4e70829530666644286600a33992", size = 10853356, upload-time = "2026-02-26T20:04:05.808Z" }, + { url = "https://files.pythonhosted.org/packages/b1/d8/0fab9f8842b83b1a9c2bf81b85063f65e93fb512e60effa95b0be49bfc54/ruff-0.15.4-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a4386ba2cd6c0f4ff75252845906acc7c7c8e1ac567b7bc3d373686ac8c222ba", size = 10187434, upload-time = "2026-02-26T20:03:54.656Z" }, + { url = "https://files.pythonhosted.org/packages/85/cc/cc220fd9394eff5db8d94dec199eec56dd6c9f3651d8869d024867a91030/ruff-0.15.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2496488bdfd3732747558b6f95ae427ff066d1fcd054daf75f5a50674411e75", size = 10535456, upload-time = "2026-02-26T20:03:52.738Z" }, + { url = "https://files.pythonhosted.org/packages/fa/0f/bced38fa5cf24373ec767713c8e4cadc90247f3863605fb030e597878661/ruff-0.15.4-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3f1c4893841ff2d54cbda1b2860fa3260173df5ddd7b95d370186f8a5e66a4ac", size = 10287772, upload-time = "2026-02-26T20:04:08.138Z" }, + { url = "https://files.pythonhosted.org/packages/2b/90/58a1802d84fed15f8f281925b21ab3cecd813bde52a8ca033a4de8ab0e7a/ruff-0.15.4-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:820b8766bd65503b6c30aaa6331e8ef3a6e564f7999c844e9a547c40179e440a", size = 11049051, upload-time = "2026-02-26T20:04:03.53Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ac/b7ad36703c35f3866584564dc15f12f91cb1a26a897dc2fd13d7cb3ae1af/ruff-0.15.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9fb74bab47139c1751f900f857fa503987253c3ef89129b24ed375e72873e85", size = 11890494, upload-time = "2026-02-26T20:04:10.497Z" }, + { url = "https://files.pythonhosted.org/packages/93/3d/3eb2f47a39a8b0da99faf9c54d3eb24720add1e886a5309d4d1be73a6380/ruff-0.15.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f80c98765949c518142b3a50a5db89343aa90f2c2bf7799de9986498ae6176db", size = 11326221, upload-time = "2026-02-26T20:04:12.84Z" }, + { url = "https://files.pythonhosted.org/packages/ff/90/bf134f4c1e5243e62690e09d63c55df948a74084c8ac3e48a88468314da6/ruff-0.15.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:451a2e224151729b3b6c9ffb36aed9091b2996fe4bdbd11f47e27d8f2e8888ec", size = 11168459, upload-time = "2026-02-26T20:04:00.969Z" }, + { url = "https://files.pythonhosted.org/packages/b5/e5/a64d27688789b06b5d55162aafc32059bb8c989c61a5139a36e1368285eb/ruff-0.15.4-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a8f157f2e583c513c4f5f896163a93198297371f34c04220daf40d133fdd4f7f", size = 11104366, upload-time = "2026-02-26T20:03:48.099Z" }, + { url = "https://files.pythonhosted.org/packages/f1/f6/32d1dcb66a2559763fc3027bdd65836cad9eb09d90f2ed6a63d8e9252b02/ruff-0.15.4-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:917cc68503357021f541e69b35361c99387cdbbf99bd0ea4aa6f28ca99ff5338", size = 10510887, upload-time = "2026-02-26T20:03:45.771Z" }, + { url = "https://files.pythonhosted.org/packages/ff/92/22d1ced50971c5b6433aed166fcef8c9343f567a94cf2b9d9089f6aa80fe/ruff-0.15.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e9737c8161da79fd7cfec19f1e35620375bd8b2a50c3e77fa3d2c16f574105cc", size = 10285939, upload-time = "2026-02-26T20:04:22.42Z" }, + { url = "https://files.pythonhosted.org/packages/e6/f4/7c20aec3143837641a02509a4668fb146a642fd1211846634edc17eb5563/ruff-0.15.4-py3-none-musllinux_1_2_i686.whl", hash = "sha256:291258c917539e18f6ba40482fe31d6f5ac023994ee11d7bdafd716f2aab8a68", size = 10765471, upload-time = "2026-02-26T20:03:58.924Z" }, + { url = "https://files.pythonhosted.org/packages/d0/09/6d2f7586f09a16120aebdff8f64d962d7c4348313c77ebb29c566cefc357/ruff-0.15.4-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:3f83c45911da6f2cd5936c436cf86b9f09f09165f033a99dcf7477e34041cbc3", size = 11263382, upload-time = "2026-02-26T20:04:24.424Z" }, + { url = "https://files.pythonhosted.org/packages/1b/fa/2ef715a1cd329ef47c1a050e10dee91a9054b7ce2fcfdd6a06d139afb7ec/ruff-0.15.4-py3-none-win32.whl", hash = "sha256:65594a2d557d4ee9f02834fcdf0a28daa8b3b9f6cb2cb93846025a36db47ef22", size = 10506664, upload-time = "2026-02-26T20:03:50.56Z" }, + { url = "https://files.pythonhosted.org/packages/d0/a8/c688ef7e29983976820d18710f955751d9f4d4eb69df658af3d006e2ba3e/ruff-0.15.4-py3-none-win_amd64.whl", hash = "sha256:04196ad44f0df220c2ece5b0e959c2f37c777375ec744397d21d15b50a75264f", size = 11651048, upload-time = "2026-02-26T20:04:17.191Z" }, + { url = "https://files.pythonhosted.org/packages/3e/0a/9e1be9035b37448ce2e68c978f0591da94389ade5a5abafa4cf99985d1b2/ruff-0.15.4-py3-none-win_arm64.whl", hash = "sha256:60d5177e8cfc70e51b9c5fad936c634872a74209f934c1e79107d11787ad5453", size = 10966776, upload-time = "2026-02-26T20:03:56.908Z" }, ] [[package]] @@ -6274,7 +6387,7 @@ resolution-markers = [ "python_full_version < '3.11'", ] dependencies = [ - { name = "numpy", marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214, upload-time = "2025-05-08T16:13:05.955Z" } wheels = [ @@ -6327,7 +6440,7 @@ wheels = [ [[package]] name = "scipy" -version = "1.17.0" +version = "1.17.1" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.14'", @@ -6336,70 +6449,70 @@ resolution-markers = [ "python_full_version == '3.11.*'", ] dependencies = [ - { name = "numpy", marker = "python_full_version >= '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/56/3e/9cca699f3486ce6bc12ff46dc2031f1ec8eb9ccc9a320fdaf925f1417426/scipy-1.17.0.tar.gz", hash = "sha256:2591060c8e648d8b96439e111ac41fd8342fdeff1876be2e19dea3fe8930454e", size = 30396830, upload-time = "2026-01-10T21:34:23.009Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/4b/c89c131aa87cad2b77a54eb0fb94d633a842420fa7e919dc2f922037c3d8/scipy-1.17.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:2abd71643797bd8a106dff97894ff7869eeeb0af0f7a5ce02e4227c6a2e9d6fd", size = 31381316, upload-time = "2026-01-10T21:24:33.42Z" }, - { url = "https://files.pythonhosted.org/packages/5e/5f/a6b38f79a07d74989224d5f11b55267714707582908a5f1ae854cf9a9b84/scipy-1.17.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:ef28d815f4d2686503e5f4f00edc387ae58dfd7a2f42e348bb53359538f01558", size = 27966760, upload-time = "2026-01-10T21:24:38.911Z" }, - { url = "https://files.pythonhosted.org/packages/c1/20/095ad24e031ee8ed3c5975954d816b8e7e2abd731e04f8be573de8740885/scipy-1.17.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:272a9f16d6bb4667e8b50d25d71eddcc2158a214df1b566319298de0939d2ab7", size = 20138701, upload-time = "2026-01-10T21:24:43.249Z" }, - { url = "https://files.pythonhosted.org/packages/89/11/4aad2b3858d0337756f3323f8960755704e530b27eb2a94386c970c32cbe/scipy-1.17.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:7204fddcbec2fe6598f1c5fdf027e9f259106d05202a959a9f1aecf036adc9f6", size = 22480574, upload-time = "2026-01-10T21:24:47.266Z" }, - { url = "https://files.pythonhosted.org/packages/85/bd/f5af70c28c6da2227e510875cadf64879855193a687fb19951f0f44cfd6b/scipy-1.17.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fc02c37a5639ee67d8fb646ffded6d793c06c5622d36b35cfa8fe5ececb8f042", size = 32862414, upload-time = "2026-01-10T21:24:52.566Z" }, - { url = "https://files.pythonhosted.org/packages/ef/df/df1457c4df3826e908879fe3d76bc5b6e60aae45f4ee42539512438cfd5d/scipy-1.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dac97a27520d66c12a34fd90a4fe65f43766c18c0d6e1c0a80f114d2260080e4", size = 35112380, upload-time = "2026-01-10T21:24:58.433Z" }, - { url = "https://files.pythonhosted.org/packages/5f/bb/88e2c16bd1dd4de19d80d7c5e238387182993c2fb13b4b8111e3927ad422/scipy-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ebb7446a39b3ae0fe8f416a9a3fdc6fba3f11c634f680f16a239c5187bc487c0", size = 34922676, upload-time = "2026-01-10T21:25:04.287Z" }, - { url = "https://files.pythonhosted.org/packages/02/ba/5120242cc735f71fc002cff0303d536af4405eb265f7c60742851e7ccfe9/scipy-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:474da16199f6af66601a01546144922ce402cb17362e07d82f5a6cf8f963e449", size = 37507599, upload-time = "2026-01-10T21:25:09.851Z" }, - { url = "https://files.pythonhosted.org/packages/52/c8/08629657ac6c0da198487ce8cd3de78e02cfde42b7f34117d56a3fe249dc/scipy-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:255c0da161bd7b32a6c898e7891509e8a9289f0b1c6c7d96142ee0d2b114c2ea", size = 36380284, upload-time = "2026-01-10T21:25:15.632Z" }, - { url = "https://files.pythonhosted.org/packages/6c/4a/465f96d42c6f33ad324a40049dfd63269891db9324aa66c4a1c108c6f994/scipy-1.17.0-cp311-cp311-win_arm64.whl", hash = "sha256:85b0ac3ad17fa3be50abd7e69d583d98792d7edc08367e01445a1e2076005379", size = 24370427, upload-time = "2026-01-10T21:25:20.514Z" }, - { url = "https://files.pythonhosted.org/packages/0b/11/7241a63e73ba5a516f1930ac8d5b44cbbfabd35ac73a2d08ca206df007c4/scipy-1.17.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:0d5018a57c24cb1dd828bcf51d7b10e65986d549f52ef5adb6b4d1ded3e32a57", size = 31364580, upload-time = "2026-01-10T21:25:25.717Z" }, - { url = "https://files.pythonhosted.org/packages/ed/1d/5057f812d4f6adc91a20a2d6f2ebcdb517fdbc87ae3acc5633c9b97c8ba5/scipy-1.17.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:88c22af9e5d5a4f9e027e26772cc7b5922fab8bcc839edb3ae33de404feebd9e", size = 27969012, upload-time = "2026-01-10T21:25:30.921Z" }, - { url = "https://files.pythonhosted.org/packages/e3/21/f6ec556c1e3b6ec4e088da667d9987bb77cc3ab3026511f427dc8451187d/scipy-1.17.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:f3cd947f20fe17013d401b64e857c6b2da83cae567adbb75b9dcba865abc66d8", size = 20140691, upload-time = "2026-01-10T21:25:34.802Z" }, - { url = "https://files.pythonhosted.org/packages/7a/fe/5e5ad04784964ba964a96f16c8d4676aa1b51357199014dce58ab7ec5670/scipy-1.17.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e8c0b331c2c1f531eb51f1b4fc9ba709521a712cce58f1aa627bc007421a5306", size = 22463015, upload-time = "2026-01-10T21:25:39.277Z" }, - { url = "https://files.pythonhosted.org/packages/4a/69/7c347e857224fcaf32a34a05183b9d8a7aca25f8f2d10b8a698b8388561a/scipy-1.17.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5194c445d0a1c7a6c1a4a4681b6b7c71baad98ff66d96b949097e7513c9d6742", size = 32724197, upload-time = "2026-01-10T21:25:44.084Z" }, - { url = "https://files.pythonhosted.org/packages/d1/fe/66d73b76d378ba8cc2fe605920c0c75092e3a65ae746e1e767d9d020a75a/scipy-1.17.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9eeb9b5f5997f75507814ed9d298ab23f62cf79f5a3ef90031b1ee2506abdb5b", size = 35009148, upload-time = "2026-01-10T21:25:50.591Z" }, - { url = "https://files.pythonhosted.org/packages/af/07/07dec27d9dc41c18d8c43c69e9e413431d20c53a0339c388bcf72f353c4b/scipy-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:40052543f7bbe921df4408f46003d6f01c6af109b9e2c8a66dd1cf6cf57f7d5d", size = 34798766, upload-time = "2026-01-10T21:25:59.41Z" }, - { url = "https://files.pythonhosted.org/packages/81/61/0470810c8a093cdacd4ba7504b8a218fd49ca070d79eca23a615f5d9a0b0/scipy-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0cf46c8013fec9d3694dc572f0b54100c28405d55d3e2cb15e2895b25057996e", size = 37405953, upload-time = "2026-01-10T21:26:07.75Z" }, - { url = "https://files.pythonhosted.org/packages/92/ce/672ed546f96d5d41ae78c4b9b02006cedd0b3d6f2bf5bb76ea455c320c28/scipy-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:0937a0b0d8d593a198cededd4c439a0ea216a3f36653901ea1f3e4be949056f8", size = 36328121, upload-time = "2026-01-10T21:26:16.509Z" }, - { url = "https://files.pythonhosted.org/packages/9d/21/38165845392cae67b61843a52c6455d47d0cc2a40dd495c89f4362944654/scipy-1.17.0-cp312-cp312-win_arm64.whl", hash = "sha256:f603d8a5518c7426414d1d8f82e253e454471de682ce5e39c29adb0df1efb86b", size = 24314368, upload-time = "2026-01-10T21:26:23.087Z" }, - { url = "https://files.pythonhosted.org/packages/0c/51/3468fdfd49387ddefee1636f5cf6d03ce603b75205bf439bbf0e62069bfd/scipy-1.17.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:65ec32f3d32dfc48c72df4291345dae4f048749bc8d5203ee0a3f347f96c5ce6", size = 31344101, upload-time = "2026-01-10T21:26:30.25Z" }, - { url = "https://files.pythonhosted.org/packages/b2/9a/9406aec58268d437636069419e6977af953d1e246df941d42d3720b7277b/scipy-1.17.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:1f9586a58039d7229ce77b52f8472c972448cded5736eaf102d5658bbac4c269", size = 27950385, upload-time = "2026-01-10T21:26:36.801Z" }, - { url = "https://files.pythonhosted.org/packages/4f/98/e7342709e17afdfd1b26b56ae499ef4939b45a23a00e471dfb5375eea205/scipy-1.17.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9fad7d3578c877d606b1150135c2639e9de9cecd3705caa37b66862977cc3e72", size = 20122115, upload-time = "2026-01-10T21:26:42.107Z" }, - { url = "https://files.pythonhosted.org/packages/fd/0e/9eeeb5357a64fd157cbe0302c213517c541cc16b8486d82de251f3c68ede/scipy-1.17.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:423ca1f6584fc03936972b5f7c06961670dbba9f234e71676a7c7ccf938a0d61", size = 22442402, upload-time = "2026-01-10T21:26:48.029Z" }, - { url = "https://files.pythonhosted.org/packages/c9/10/be13397a0e434f98e0c79552b2b584ae5bb1c8b2be95db421533bbca5369/scipy-1.17.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fe508b5690e9eaaa9467fc047f833af58f1152ae51a0d0aed67aa5801f4dd7d6", size = 32696338, upload-time = "2026-01-10T21:26:55.521Z" }, - { url = "https://files.pythonhosted.org/packages/63/1e/12fbf2a3bb240161651c94bb5cdd0eae5d4e8cc6eaeceb74ab07b12a753d/scipy-1.17.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6680f2dfd4f6182e7d6db161344537da644d1cf85cf293f015c60a17ecf08752", size = 34977201, upload-time = "2026-01-10T21:27:03.501Z" }, - { url = "https://files.pythonhosted.org/packages/19/5b/1a63923e23ccd20bd32156d7dd708af5bbde410daa993aa2500c847ab2d2/scipy-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eec3842ec9ac9de5917899b277428886042a93db0b227ebbe3a333b64ec7643d", size = 34777384, upload-time = "2026-01-10T21:27:11.423Z" }, - { url = "https://files.pythonhosted.org/packages/39/22/b5da95d74edcf81e540e467202a988c50fef41bd2011f46e05f72ba07df6/scipy-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d7425fcafbc09a03731e1bc05581f5fad988e48c6a861f441b7ab729a49a55ea", size = 37379586, upload-time = "2026-01-10T21:27:20.171Z" }, - { url = "https://files.pythonhosted.org/packages/b9/b6/8ac583d6da79e7b9e520579f03007cb006f063642afd6b2eeb16b890bf93/scipy-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:87b411e42b425b84777718cc41516b8a7e0795abfa8e8e1d573bf0ef014f0812", size = 36287211, upload-time = "2026-01-10T21:28:43.122Z" }, - { url = "https://files.pythonhosted.org/packages/55/fb/7db19e0b3e52f882b420417644ec81dd57eeef1bd1705b6f689d8ff93541/scipy-1.17.0-cp313-cp313-win_arm64.whl", hash = "sha256:357ca001c6e37601066092e7c89cca2f1ce74e2a520ca78d063a6d2201101df2", size = 24312646, upload-time = "2026-01-10T21:28:49.893Z" }, - { url = "https://files.pythonhosted.org/packages/20/b6/7feaa252c21cc7aff335c6c55e1b90ab3e3306da3f048109b8b639b94648/scipy-1.17.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:ec0827aa4d36cb79ff1b81de898e948a51ac0b9b1c43e4a372c0508c38c0f9a3", size = 31693194, upload-time = "2026-01-10T21:27:27.454Z" }, - { url = "https://files.pythonhosted.org/packages/76/bb/bbb392005abce039fb7e672cb78ac7d158700e826b0515cab6b5b60c26fb/scipy-1.17.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:819fc26862b4b3c73a60d486dbb919202f3d6d98c87cf20c223511429f2d1a97", size = 28365415, upload-time = "2026-01-10T21:27:34.26Z" }, - { url = "https://files.pythonhosted.org/packages/37/da/9d33196ecc99fba16a409c691ed464a3a283ac454a34a13a3a57c0d66f3a/scipy-1.17.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:363ad4ae2853d88ebcde3ae6ec46ccca903ea9835ee8ba543f12f575e7b07e4e", size = 20537232, upload-time = "2026-01-10T21:27:40.306Z" }, - { url = "https://files.pythonhosted.org/packages/56/9d/f4b184f6ddb28e9a5caea36a6f98e8ecd2a524f9127354087ce780885d83/scipy-1.17.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:979c3a0ff8e5ba254d45d59ebd38cde48fce4f10b5125c680c7a4bfe177aab07", size = 22791051, upload-time = "2026-01-10T21:27:46.539Z" }, - { url = "https://files.pythonhosted.org/packages/9b/9d/025cccdd738a72140efc582b1641d0dd4caf2e86c3fb127568dc80444e6e/scipy-1.17.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:130d12926ae34399d157de777472bf82e9061c60cc081372b3118edacafe1d00", size = 32815098, upload-time = "2026-01-10T21:27:54.389Z" }, - { url = "https://files.pythonhosted.org/packages/48/5f/09b879619f8bca15ce392bfc1894bd9c54377e01d1b3f2f3b595a1b4d945/scipy-1.17.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6e886000eb4919eae3a44f035e63f0fd8b651234117e8f6f29bad1cd26e7bc45", size = 35031342, upload-time = "2026-01-10T21:28:03.012Z" }, - { url = "https://files.pythonhosted.org/packages/f2/9a/f0f0a9f0aa079d2f106555b984ff0fbb11a837df280f04f71f056ea9c6e4/scipy-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:13c4096ac6bc31d706018f06a49abe0485f96499deb82066b94d19b02f664209", size = 34893199, upload-time = "2026-01-10T21:28:10.832Z" }, - { url = "https://files.pythonhosted.org/packages/90/b8/4f0f5cf0c5ea4d7548424e6533e6b17d164f34a6e2fb2e43ffebb6697b06/scipy-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cacbaddd91fcffde703934897c5cd2c7cb0371fac195d383f4e1f1c5d3f3bd04", size = 37438061, upload-time = "2026-01-10T21:28:19.684Z" }, - { url = "https://files.pythonhosted.org/packages/f9/cc/2bd59140ed3b2fa2882fb15da0a9cb1b5a6443d67cfd0d98d4cec83a57ec/scipy-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:edce1a1cf66298cccdc48a1bdf8fb10a3bf58e8b58d6c3883dd1530e103f87c0", size = 36328593, upload-time = "2026-01-10T21:28:28.007Z" }, - { url = "https://files.pythonhosted.org/packages/13/1b/c87cc44a0d2c7aaf0f003aef2904c3d097b422a96c7e7c07f5efd9073c1b/scipy-1.17.0-cp313-cp313t-win_arm64.whl", hash = "sha256:30509da9dbec1c2ed8f168b8d8aa853bc6723fede1dbc23c7d43a56f5ab72a67", size = 24625083, upload-time = "2026-01-10T21:28:35.188Z" }, - { url = "https://files.pythonhosted.org/packages/1a/2d/51006cd369b8e7879e1c630999a19d1fbf6f8b5ed3e33374f29dc87e53b3/scipy-1.17.0-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:c17514d11b78be8f7e6331b983a65a7f5ca1fd037b95e27b280921fe5606286a", size = 31346803, upload-time = "2026-01-10T21:28:57.24Z" }, - { url = "https://files.pythonhosted.org/packages/d6/2e/2349458c3ce445f53a6c93d4386b1c4c5c0c540917304c01222ff95ff317/scipy-1.17.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:4e00562e519c09da34c31685f6acc3aa384d4d50604db0f245c14e1b4488bfa2", size = 27967182, upload-time = "2026-01-10T21:29:04.107Z" }, - { url = "https://files.pythonhosted.org/packages/5e/7c/df525fbfa77b878d1cfe625249529514dc02f4fd5f45f0f6295676a76528/scipy-1.17.0-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:f7df7941d71314e60a481e02d5ebcb3f0185b8d799c70d03d8258f6c80f3d467", size = 20139125, upload-time = "2026-01-10T21:29:10.179Z" }, - { url = "https://files.pythonhosted.org/packages/33/11/fcf9d43a7ed1234d31765ec643b0515a85a30b58eddccc5d5a4d12b5f194/scipy-1.17.0-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:aabf057c632798832f071a8dde013c2e26284043934f53b00489f1773b33527e", size = 22443554, upload-time = "2026-01-10T21:29:15.888Z" }, - { url = "https://files.pythonhosted.org/packages/80/5c/ea5d239cda2dd3d31399424967a24d556cf409fbea7b5b21412b0fd0a44f/scipy-1.17.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a38c3337e00be6fd8a95b4ed66b5d988bac4ec888fd922c2ea9fe5fb1603dd67", size = 32757834, upload-time = "2026-01-10T21:29:23.406Z" }, - { url = "https://files.pythonhosted.org/packages/b8/7e/8c917cc573310e5dc91cbeead76f1b600d3fb17cf0969db02c9cf92e3cfa/scipy-1.17.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00fb5f8ec8398ad90215008d8b6009c9db9fa924fd4c7d6be307c6f945f9cd73", size = 34995775, upload-time = "2026-01-10T21:29:31.915Z" }, - { url = "https://files.pythonhosted.org/packages/c5/43/176c0c3c07b3f7df324e7cdd933d3e2c4898ca202b090bd5ba122f9fe270/scipy-1.17.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f2a4942b0f5f7c23c7cd641a0ca1955e2ae83dedcff537e3a0259096635e186b", size = 34841240, upload-time = "2026-01-10T21:29:39.995Z" }, - { url = "https://files.pythonhosted.org/packages/44/8c/d1f5f4b491160592e7f084d997de53a8e896a3ac01cd07e59f43ca222744/scipy-1.17.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:dbf133ced83889583156566d2bdf7a07ff89228fe0c0cb727f777de92092ec6b", size = 37394463, upload-time = "2026-01-10T21:29:48.723Z" }, - { url = "https://files.pythonhosted.org/packages/9f/ec/42a6657f8d2d087e750e9a5dde0b481fd135657f09eaf1cf5688bb23c338/scipy-1.17.0-cp314-cp314-win_amd64.whl", hash = "sha256:3625c631a7acd7cfd929e4e31d2582cf00f42fcf06011f59281271746d77e061", size = 37053015, upload-time = "2026-01-10T21:30:51.418Z" }, - { url = "https://files.pythonhosted.org/packages/27/58/6b89a6afd132787d89a362d443a7bddd511b8f41336a1ae47f9e4f000dc4/scipy-1.17.0-cp314-cp314-win_arm64.whl", hash = "sha256:9244608d27eafe02b20558523ba57f15c689357c85bdcfe920b1828750aa26eb", size = 24951312, upload-time = "2026-01-10T21:30:56.771Z" }, - { url = "https://files.pythonhosted.org/packages/e9/01/f58916b9d9ae0112b86d7c3b10b9e685625ce6e8248df139d0fcb17f7397/scipy-1.17.0-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:2b531f57e09c946f56ad0b4a3b2abee778789097871fc541e267d2eca081cff1", size = 31706502, upload-time = "2026-01-10T21:29:56.326Z" }, - { url = "https://files.pythonhosted.org/packages/59/8e/2912a87f94a7d1f8b38aabc0faf74b82d3b6c9e22be991c49979f0eceed8/scipy-1.17.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:13e861634a2c480bd237deb69333ac79ea1941b94568d4b0efa5db5e263d4fd1", size = 28380854, upload-time = "2026-01-10T21:30:01.554Z" }, - { url = "https://files.pythonhosted.org/packages/bd/1c/874137a52dddab7d5d595c1887089a2125d27d0601fce8c0026a24a92a0b/scipy-1.17.0-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:eb2651271135154aa24f6481cbae5cc8af1f0dd46e6533fb7b56aa9727b6a232", size = 20552752, upload-time = "2026-01-10T21:30:05.93Z" }, - { url = "https://files.pythonhosted.org/packages/3f/f0/7518d171cb735f6400f4576cf70f756d5b419a07fe1867da34e2c2c9c11b/scipy-1.17.0-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:c5e8647f60679790c2f5c76be17e2e9247dc6b98ad0d3b065861e082c56e078d", size = 22803972, upload-time = "2026-01-10T21:30:10.651Z" }, - { url = "https://files.pythonhosted.org/packages/7c/74/3498563a2c619e8a3ebb4d75457486c249b19b5b04a30600dfd9af06bea5/scipy-1.17.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5fb10d17e649e1446410895639f3385fd2bf4c3c7dfc9bea937bddcbc3d7b9ba", size = 32829770, upload-time = "2026-01-10T21:30:16.359Z" }, - { url = "https://files.pythonhosted.org/packages/48/d1/7b50cedd8c6c9d6f706b4b36fa8544d829c712a75e370f763b318e9638c1/scipy-1.17.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8547e7c57f932e7354a2319fab613981cde910631979f74c9b542bb167a8b9db", size = 35051093, upload-time = "2026-01-10T21:30:22.987Z" }, - { url = "https://files.pythonhosted.org/packages/e2/82/a2d684dfddb87ba1b3ea325df7c3293496ee9accb3a19abe9429bce94755/scipy-1.17.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33af70d040e8af9d5e7a38b5ed3b772adddd281e3062ff23fec49e49681c38cf", size = 34909905, upload-time = "2026-01-10T21:30:28.704Z" }, - { url = "https://files.pythonhosted.org/packages/ef/5e/e565bd73991d42023eb82bb99e51c5b3d9e2c588ca9d4b3e2cc1d3ca62a6/scipy-1.17.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb55bb97d00f8b7ab95cb64f873eb0bf54d9446264d9f3609130381233483f", size = 37457743, upload-time = "2026-01-10T21:30:34.819Z" }, - { url = "https://files.pythonhosted.org/packages/58/a8/a66a75c3d8f1fb2b83f66007d6455a06a6f6cf5618c3dc35bc9b69dd096e/scipy-1.17.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1ff269abf702f6c7e67a4b7aad981d42871a11b9dd83c58d2d2ea624efbd1088", size = 37098574, upload-time = "2026-01-10T21:30:40.782Z" }, - { url = "https://files.pythonhosted.org/packages/56/a5/df8f46ef7da168f1bc52cd86e09a9de5c6f19cc1da04454d51b7d4f43408/scipy-1.17.0-cp314-cp314t-win_arm64.whl", hash = "sha256:031121914e295d9791319a1875444d55079885bbae5bdc9c5e0f2ee5f09d34ff", size = 25246266, upload-time = "2026-01-10T21:30:45.923Z" }, + { url = "https://files.pythonhosted.org/packages/df/75/b4ce781849931fef6fd529afa6b63711d5a733065722d0c3e2724af9e40a/scipy-1.17.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1f95b894f13729334fb990162e911c9e5dc1ab390c58aa6cbecb389c5b5e28ec", size = 31613675, upload-time = "2026-02-23T00:16:00.13Z" }, + { url = "https://files.pythonhosted.org/packages/f7/58/bccc2861b305abdd1b8663d6130c0b3d7cc22e8d86663edbc8401bfd40d4/scipy-1.17.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e18f12c6b0bc5a592ed23d3f7b891f68fd7f8241d69b7883769eb5d5dfb52696", size = 28162057, upload-time = "2026-02-23T00:16:09.456Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ee/18146b7757ed4976276b9c9819108adbc73c5aad636e5353e20746b73069/scipy-1.17.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a3472cfbca0a54177d0faa68f697d8ba4c80bbdc19908c3465556d9f7efce9ee", size = 20334032, upload-time = "2026-02-23T00:16:17.358Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e6/cef1cf3557f0c54954198554a10016b6a03b2ec9e22a4e1df734936bd99c/scipy-1.17.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:766e0dc5a616d026a3a1cffa379af959671729083882f50307e18175797b3dfd", size = 22709533, upload-time = "2026-02-23T00:16:25.791Z" }, + { url = "https://files.pythonhosted.org/packages/4d/60/8804678875fc59362b0fb759ab3ecce1f09c10a735680318ac30da8cd76b/scipy-1.17.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:744b2bf3640d907b79f3fd7874efe432d1cf171ee721243e350f55234b4cec4c", size = 33062057, upload-time = "2026-02-23T00:16:36.931Z" }, + { url = "https://files.pythonhosted.org/packages/09/7d/af933f0f6e0767995b4e2d705a0665e454d1c19402aa7e895de3951ebb04/scipy-1.17.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43af8d1f3bea642559019edfe64e9b11192a8978efbd1539d7bc2aaa23d92de4", size = 35349300, upload-time = "2026-02-23T00:16:49.108Z" }, + { url = "https://files.pythonhosted.org/packages/b4/3d/7ccbbdcbb54c8fdc20d3b6930137c782a163fa626f0aef920349873421ba/scipy-1.17.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd96a1898c0a47be4520327e01f874acfd61fb48a9420f8aa9f6483412ffa444", size = 35127333, upload-time = "2026-02-23T00:17:01.293Z" }, + { url = "https://files.pythonhosted.org/packages/e8/19/f926cb11c42b15ba08e3a71e376d816ac08614f769b4f47e06c3580c836a/scipy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4eb6c25dd62ee8d5edf68a8e1c171dd71c292fdae95d8aeb3dd7d7de4c364082", size = 37741314, upload-time = "2026-02-23T00:17:12.576Z" }, + { url = "https://files.pythonhosted.org/packages/95/da/0d1df507cf574b3f224ccc3d45244c9a1d732c81dcb26b1e8a766ae271a8/scipy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:d30e57c72013c2a4fe441c2fcb8e77b14e152ad48b5464858e07e2ad9fbfceff", size = 36607512, upload-time = "2026-02-23T00:17:23.424Z" }, + { url = "https://files.pythonhosted.org/packages/68/7f/bdd79ceaad24b671543ffe0ef61ed8e659440eb683b66f033454dcee90eb/scipy-1.17.1-cp311-cp311-win_arm64.whl", hash = "sha256:9ecb4efb1cd6e8c4afea0daa91a87fbddbce1b99d2895d151596716c0b2e859d", size = 24599248, upload-time = "2026-02-23T00:17:34.561Z" }, + { url = "https://files.pythonhosted.org/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954, upload-time = "2026-02-23T00:17:49.855Z" }, + { url = "https://files.pythonhosted.org/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662, upload-time = "2026-02-23T00:18:01.64Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366, upload-time = "2026-02-23T00:18:12.015Z" }, + { url = "https://files.pythonhosted.org/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017, upload-time = "2026-02-23T00:18:21.502Z" }, + { url = "https://files.pythonhosted.org/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842, upload-time = "2026-02-23T00:18:35.367Z" }, + { url = "https://files.pythonhosted.org/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890, upload-time = "2026-02-23T00:18:49.188Z" }, + { url = "https://files.pythonhosted.org/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557, upload-time = "2026-02-23T00:18:54.74Z" }, + { url = "https://files.pythonhosted.org/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856, upload-time = "2026-02-23T00:19:00.307Z" }, + { url = "https://files.pythonhosted.org/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682, upload-time = "2026-02-23T00:19:07.67Z" }, + { url = "https://files.pythonhosted.org/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340, upload-time = "2026-02-23T00:19:12.024Z" }, + { url = "https://files.pythonhosted.org/packages/76/27/07ee1b57b65e92645f219b37148a7e7928b82e2b5dbeccecb4dff7c64f0b/scipy-1.17.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5e3c5c011904115f88a39308379c17f91546f77c1667cea98739fe0fccea804c", size = 31590199, upload-time = "2026-02-23T00:19:17.192Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ae/db19f8ab842e9b724bf5dbb7db29302a91f1e55bc4d04b1025d6d605a2c5/scipy-1.17.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6fac755ca3d2c3edcb22f479fceaa241704111414831ddd3bc6056e18516892f", size = 28154001, upload-time = "2026-02-23T00:19:22.241Z" }, + { url = "https://files.pythonhosted.org/packages/5b/58/3ce96251560107b381cbd6e8413c483bbb1228a6b919fa8652b0d4090e7f/scipy-1.17.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ff200bf9d24f2e4d5dc6ee8c3ac64d739d3a89e2326ba68aaf6c4a2b838fd7d", size = 20325719, upload-time = "2026-02-23T00:19:26.329Z" }, + { url = "https://files.pythonhosted.org/packages/b2/83/15087d945e0e4d48ce2377498abf5ad171ae013232ae31d06f336e64c999/scipy-1.17.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4b400bdc6f79fa02a4d86640310dde87a21fba0c979efff5248908c6f15fad1b", size = 22683595, upload-time = "2026-02-23T00:19:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/b4/e0/e58fbde4a1a594c8be8114eb4aac1a55bcd6587047efc18a61eb1f5c0d30/scipy-1.17.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b64ca7d4aee0102a97f3ba22124052b4bd2152522355073580bf4845e2550b6", size = 32896429, upload-time = "2026-02-23T00:19:35.536Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952, upload-time = "2026-02-23T00:19:42.259Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063, upload-time = "2026-02-23T00:19:47.547Z" }, + { url = "https://files.pythonhosted.org/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449, upload-time = "2026-02-23T00:19:53.238Z" }, + { url = "https://files.pythonhosted.org/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943, upload-time = "2026-02-23T00:20:50.89Z" }, + { url = "https://files.pythonhosted.org/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621, upload-time = "2026-02-23T00:20:55.871Z" }, + { url = "https://files.pythonhosted.org/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708, upload-time = "2026-02-23T00:19:58.694Z" }, + { url = "https://files.pythonhosted.org/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135, upload-time = "2026-02-23T00:20:03.934Z" }, + { url = "https://files.pythonhosted.org/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977, upload-time = "2026-02-23T00:20:07.935Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b8/8ccd9b766ad14c78386599708eb745f6b44f08400a5fd0ade7cf89b6fc93/scipy-1.17.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:844e165636711ef41f80b4103ed234181646b98a53c8f05da12ca5ca289134f6", size = 23029601, upload-time = "2026-02-23T00:20:12.161Z" }, + { url = "https://files.pythonhosted.org/packages/6d/a0/3cb6f4d2fb3e17428ad2880333cac878909ad1a89f678527b5328b93c1d4/scipy-1.17.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:158dd96d2207e21c966063e1635b1063cd7787b627b6f07305315dd73d9c679e", size = 33019667, upload-time = "2026-02-23T00:20:17.208Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159, upload-time = "2026-02-23T00:20:23.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771, upload-time = "2026-02-23T00:20:28.636Z" }, + { url = "https://files.pythonhosted.org/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910, upload-time = "2026-02-23T00:20:34.743Z" }, + { url = "https://files.pythonhosted.org/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980, upload-time = "2026-02-23T00:20:40.575Z" }, + { url = "https://files.pythonhosted.org/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543, upload-time = "2026-02-23T00:20:45.313Z" }, + { url = "https://files.pythonhosted.org/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510, upload-time = "2026-02-23T00:21:01.015Z" }, + { url = "https://files.pythonhosted.org/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131, upload-time = "2026-02-23T00:21:05.888Z" }, + { url = "https://files.pythonhosted.org/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032, upload-time = "2026-02-23T00:21:09.904Z" }, + { url = "https://files.pythonhosted.org/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766, upload-time = "2026-02-23T00:21:14.313Z" }, + { url = "https://files.pythonhosted.org/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007, upload-time = "2026-02-23T00:21:19.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333, upload-time = "2026-02-23T00:21:25.278Z" }, + { url = "https://files.pythonhosted.org/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066, upload-time = "2026-02-23T00:21:31.358Z" }, + { url = "https://files.pythonhosted.org/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763, upload-time = "2026-02-23T00:21:37.247Z" }, + { url = "https://files.pythonhosted.org/packages/4b/39/f0e8ea762a764a9dc52aa7dabcfad51a354819de1f0d4652b6a1122424d6/scipy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:3877ac408e14da24a6196de0ddcace62092bfc12a83823e92e49e40747e52c19", size = 37290984, upload-time = "2026-02-23T00:22:35.023Z" }, + { url = "https://files.pythonhosted.org/packages/7c/56/fe201e3b0f93d1a8bcf75d3379affd228a63d7e2d80ab45467a74b494947/scipy-1.17.1-cp314-cp314-win_arm64.whl", hash = "sha256:f8885db0bc2bffa59d5c1b72fad7a6a92d3e80e7257f967dd81abb553a90d293", size = 25192877, upload-time = "2026-02-23T00:22:39.798Z" }, + { url = "https://files.pythonhosted.org/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750, upload-time = "2026-02-23T00:21:42.289Z" }, + { url = "https://files.pythonhosted.org/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858, upload-time = "2026-02-23T00:21:47.706Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723, upload-time = "2026-02-23T00:21:52.039Z" }, + { url = "https://files.pythonhosted.org/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098, upload-time = "2026-02-23T00:21:56.185Z" }, + { url = "https://files.pythonhosted.org/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397, upload-time = "2026-02-23T00:22:01.404Z" }, + { url = "https://files.pythonhosted.org/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163, upload-time = "2026-02-23T00:22:07.024Z" }, + { url = "https://files.pythonhosted.org/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291, upload-time = "2026-02-23T00:22:12.585Z" }, + { url = "https://files.pythonhosted.org/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317, upload-time = "2026-02-23T00:22:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/11/2f/b29eafe4a3fbc3d6de9662b36e028d5f039e72d345e05c250e121a230dd4/scipy-1.17.1-cp314-cp314t-win_amd64.whl", hash = "sha256:eb092099205ef62cd1782b006658db09e2fed75bffcae7cc0d44052d8aa0f484", size = 37345327, upload-time = "2026-02-23T00:22:24.442Z" }, + { url = "https://files.pythonhosted.org/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165, upload-time = "2026-02-23T00:22:29.563Z" }, ] [[package]] @@ -6462,18 +6575,19 @@ wheels = [ [[package]] name = "simli-ai" -version = "2.0.1" +version = "2.0.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiortc" }, { name = "av" }, { name = "httpx" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/76/b5/6021990871daf9f5b6eb744aff68c83f2c7b257cfd2ee5b9883d0acd9cf4/simli_ai-2.0.1.tar.gz", hash = "sha256:1f63eb76900d4dac0c18406a854219e54ebab51acb0c01e245c7a0738dc72413", size = 16104, upload-time = "2026-02-17T12:46:09.743Z" } +sdist = { url = "https://files.pythonhosted.org/packages/aa/8c/fe0697cd371a0f203b915f59e376e1807e4ad79bd53e20ceea57a161f242/simli_ai-2.0.2.tar.gz", hash = "sha256:53b99901fe4c5eeb7637492f70dde34c131ee9e5589bf8781a75494c0469ca03", size = 16422, upload-time = "2026-02-25T11:13:16.854Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/c1/e7aed0f59d04628c0ac738e2bf8cb6bf020870d1909f3ec9fcf265136663/simli_ai-2.0.1-py3-none-any.whl", hash = "sha256:0a48e38fe289568e56236266843484a1f0e28aca694dd8e2b96610fe40d6c687", size = 19456, upload-time = "2026-02-17T12:46:08.727Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f0/fb6737a87069ed2830d421c7e45cc5c117c8bc7d2183bb37466c0bf6f6ab/simli_ai-2.0.2-py3-none-any.whl", hash = "sha256:023cb8ef37c74f7463810af4595c2e0c2850647e33f9ff9b2ef09d088c0d2403", size = 19914, upload-time = "2026-02-25T11:13:15.257Z" }, ] [[package]] @@ -6487,14 +6601,14 @@ wheels = [ [[package]] name = "smart-open" -version = "7.5.0" +version = "7.5.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/67/9a/0a7acb748b86e2922982366d780ca4b16c33f7246fa5860d26005c97e4f3/smart_open-7.5.0.tar.gz", hash = "sha256:f394b143851d8091011832ac8113ea4aba6b92e6c35f6e677ddaaccb169d7cb9", size = 53920, upload-time = "2025-11-08T21:38:40.698Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/be/a66598b305763861a9ab15ff0f2fbc44e47b1ce7a776797337a4eef37c66/smart_open-7.5.1.tar.gz", hash = "sha256:3f08e16827c4733699e6b2cc40328a3568f900cb12ad9a3ad233ba6c872d9fe7", size = 54034, upload-time = "2026-02-23T11:01:28.979Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/95/bc978be7ea0babf2fb48a414b6afaad414c6a9e8b1eafc5b8a53c030381a/smart_open-7.5.0-py3-none-any.whl", hash = "sha256:87e695c5148bbb988f15cec00971602765874163be85acb1c9fb8abc012e6599", size = 63940, upload-time = "2025-11-08T21:38:39.024Z" }, + { url = "https://files.pythonhosted.org/packages/5e/ea/dcdecd68acebb49d3fd560473a43499b1635076f7f1ae8641c060fe7ce74/smart_open-7.5.1-py3-none-any.whl", hash = "sha256:3e07cbbd9c8a908bcb8e25d48becf1a5cbb4886fa975e9f34c672ed171df2318", size = 64108, upload-time = "2026-02-23T11:01:27.429Z" }, ] [[package]] @@ -6594,7 +6708,8 @@ version = "0.13.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/e1/41/9b873a8c055582859b239be17902a85339bec6a30ad162f98c9b0288a2cc/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b", size = 46156, upload-time = "2025-01-25T09:17:04.831Z" } wheels = [ @@ -6612,7 +6727,8 @@ name = "soxr" version = "0.5.0.post1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/02/c0/4429bf9b3be10e749149e286aa5c53775399ec62891c6b970456c6dca325/soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73", size = 170853, upload-time = "2024-08-31T03:43:33.058Z" } wheels = [ @@ -6650,7 +6766,8 @@ name = "speechmatics-voice" version = "0.2.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pydantic" }, { name = "speechmatics-rt" }, ] @@ -6793,7 +6910,7 @@ wheels = [ [[package]] name = "sphinx-autodoc-typehints" -version = "3.6.3" +version = "3.8.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ "python_full_version >= '3.14'", @@ -6803,9 +6920,9 @@ resolution-markers = [ dependencies = [ { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/64/5f/ebcaed1a67e623e4a7622808a8be6b0fd8344313e185f62e85a26b0ce26a/sphinx_autodoc_typehints-3.6.3.tar.gz", hash = "sha256:6c387b47d9ad5e75b157810af5bad46901f0a22708ed5e4adf466885a9c60910", size = 38288, upload-time = "2026-02-18T04:22:08.384Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/89/72f96fe27aa1cfdc882aa6e1309a86b94e4653c1e8acf9b143d34e89c619/sphinx_autodoc_typehints-3.8.0.tar.gz", hash = "sha256:155a30407e88ed3287eeeb1e9156b0ed0ad08c998b0391c652b540563132fd70", size = 59672, upload-time = "2026-02-25T15:00:35.909Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/bd/2b853836d152e40a27655828fdc02c5128f294ac452ad9a13424bb7f92fa/sphinx_autodoc_typehints-3.6.3-py3-none-any.whl", hash = "sha256:46ebc68fa85b320d55887a8d836a01e12e3b7744da973e70af8cedc74072aad5", size = 20882, upload-time = "2026-02-18T04:22:07.238Z" }, + { url = "https://files.pythonhosted.org/packages/8c/0e/36820830c766647d688dfc2b3fda76d76c1cf007eea58fffc1990195aca4/sphinx_autodoc_typehints-3.8.0-py3-none-any.whl", hash = "sha256:f348971f3d88eaee053668b61512e921086b8f0600f1e0887a39bc9476aca51c", size = 32616, upload-time = "2026-02-25T15:00:34.749Z" }, ] [[package]] @@ -6912,71 +7029,75 @@ wheels = [ [[package]] name = "sqlalchemy" -version = "2.0.46" +version = "2.0.47" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/06/aa/9ce0f3e7a9829ead5c8ce549392f33a12c4555a6c0609bb27d882e9c7ddf/sqlalchemy-2.0.46.tar.gz", hash = "sha256:cf36851ee7219c170bb0793dbc3da3e80c582e04a5437bc601bfe8c85c9216d7", size = 9865393, upload-time = "2026-01-21T18:03:45.119Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/4b/1e00561093fe2cd8eef09d406da003c8a118ff02d6548498c1ae677d68d9/sqlalchemy-2.0.47.tar.gz", hash = "sha256:e3e7feb57b267fe897e492b9721ae46d5c7de6f9e8dee58aacf105dc4e154f3d", size = 9886323, upload-time = "2026-02-24T16:34:27.947Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/40/26/66ba59328dc25e523bfcb0f8db48bdebe2035e0159d600e1f01c0fc93967/sqlalchemy-2.0.46-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:895296687ad06dc9b11a024cf68e8d9d3943aa0b4964278d2553b86f1b267735", size = 2155051, upload-time = "2026-01-21T18:27:28.965Z" }, - { url = "https://files.pythonhosted.org/packages/21/cd/9336732941df972fbbfa394db9caa8bb0cf9fe03656ec728d12e9cbd6edc/sqlalchemy-2.0.46-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab65cb2885a9f80f979b85aa4e9c9165a31381ca322cbde7c638fe6eefd1ec39", size = 3234666, upload-time = "2026-01-21T18:32:28.72Z" }, - { url = "https://files.pythonhosted.org/packages/38/62/865ae8b739930ec433cd4123760bee7f8dafdc10abefd725a025604fb0de/sqlalchemy-2.0.46-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52fe29b3817bd191cc20bad564237c808967972c97fa683c04b28ec8979ae36f", size = 3232917, upload-time = "2026-01-21T18:44:54.064Z" }, - { url = "https://files.pythonhosted.org/packages/24/38/805904b911857f2b5e00fdea44e9570df62110f834378706939825579296/sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:09168817d6c19954d3b7655da6ba87fcb3a62bb575fb396a81a8b6a9fadfe8b5", size = 3185790, upload-time = "2026-01-21T18:32:30.581Z" }, - { url = "https://files.pythonhosted.org/packages/69/4f/3260bb53aabd2d274856337456ea52f6a7eccf6cce208e558f870cec766b/sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:be6c0466b4c25b44c5d82b0426b5501de3c424d7a3220e86cd32f319ba56798e", size = 3207206, upload-time = "2026-01-21T18:44:55.93Z" }, - { url = "https://files.pythonhosted.org/packages/ce/b3/67c432d7f9d88bb1a61909b67e29f6354d59186c168fb5d381cf438d3b73/sqlalchemy-2.0.46-cp310-cp310-win32.whl", hash = "sha256:1bc3f601f0a818d27bfe139f6766487d9c88502062a2cd3a7ee6c342e81d5047", size = 2115296, upload-time = "2026-01-21T18:33:12.498Z" }, - { url = "https://files.pythonhosted.org/packages/4a/8c/25fb284f570f9d48e6c240f0269a50cec9cf009a7e08be4c0aaaf0654972/sqlalchemy-2.0.46-cp310-cp310-win_amd64.whl", hash = "sha256:e0c05aff5c6b1bb5fb46a87e0f9d2f733f83ef6cbbbcd5c642b6c01678268061", size = 2138540, upload-time = "2026-01-21T18:33:14.22Z" }, - { url = "https://files.pythonhosted.org/packages/69/ac/b42ad16800d0885105b59380ad69aad0cce5a65276e269ce2729a2343b6a/sqlalchemy-2.0.46-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:261c4b1f101b4a411154f1da2b76497d73abbfc42740029205d4d01fa1052684", size = 2154851, upload-time = "2026-01-21T18:27:30.54Z" }, - { url = "https://files.pythonhosted.org/packages/a0/60/d8710068cb79f64d002ebed62a7263c00c8fd95f4ebd4b5be8f7ca93f2bc/sqlalchemy-2.0.46-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:181903fe8c1b9082995325f1b2e84ac078b1189e2819380c2303a5f90e114a62", size = 3311241, upload-time = "2026-01-21T18:32:33.45Z" }, - { url = "https://files.pythonhosted.org/packages/2b/0f/20c71487c7219ab3aa7421c7c62d93824c97c1460f2e8bb72404b0192d13/sqlalchemy-2.0.46-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:590be24e20e2424a4c3c1b0835e9405fa3d0af5823a1a9fc02e5dff56471515f", size = 3310741, upload-time = "2026-01-21T18:44:57.887Z" }, - { url = "https://files.pythonhosted.org/packages/65/80/d26d00b3b249ae000eee4db206fcfc564bf6ca5030e4747adf451f4b5108/sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7568fe771f974abadce52669ef3a03150ff03186d8eb82613bc8adc435a03f01", size = 3263116, upload-time = "2026-01-21T18:32:35.044Z" }, - { url = "https://files.pythonhosted.org/packages/da/ee/74dda7506640923821340541e8e45bd3edd8df78664f1f2e0aae8077192b/sqlalchemy-2.0.46-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf7e1e78af38047e08836d33502c7a278915698b7c2145d045f780201679999", size = 3285327, upload-time = "2026-01-21T18:44:59.254Z" }, - { url = "https://files.pythonhosted.org/packages/9f/25/6dcf8abafff1389a21c7185364de145107b7394ecdcb05233815b236330d/sqlalchemy-2.0.46-cp311-cp311-win32.whl", hash = "sha256:9d80ea2ac519c364a7286e8d765d6cd08648f5b21ca855a8017d9871f075542d", size = 2114564, upload-time = "2026-01-21T18:33:15.85Z" }, - { url = "https://files.pythonhosted.org/packages/93/5f/e081490f8523adc0088f777e4ebad3cac21e498ec8a3d4067074e21447a1/sqlalchemy-2.0.46-cp311-cp311-win_amd64.whl", hash = "sha256:585af6afe518732d9ccd3aea33af2edaae4a7aa881af5d8f6f4fe3a368699597", size = 2139233, upload-time = "2026-01-21T18:33:17.528Z" }, - { url = "https://files.pythonhosted.org/packages/b6/35/d16bfa235c8b7caba3730bba43e20b1e376d2224f407c178fbf59559f23e/sqlalchemy-2.0.46-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3a9a72b0da8387f15d5810f1facca8f879de9b85af8c645138cba61ea147968c", size = 2153405, upload-time = "2026-01-21T19:05:54.143Z" }, - { url = "https://files.pythonhosted.org/packages/06/6c/3192e24486749862f495ddc6584ed730c0c994a67550ec395d872a2ad650/sqlalchemy-2.0.46-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2347c3f0efc4de367ba00218e0ae5c4ba2306e47216ef80d6e31761ac97cb0b9", size = 3334702, upload-time = "2026-01-21T18:46:45.384Z" }, - { url = "https://files.pythonhosted.org/packages/ea/a2/b9f33c8d68a3747d972a0bb758c6b63691f8fb8a49014bc3379ba15d4274/sqlalchemy-2.0.46-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9094c8b3197db12aa6f05c51c05daaad0a92b8c9af5388569847b03b1007fb1b", size = 3347664, upload-time = "2026-01-21T18:40:09.979Z" }, - { url = "https://files.pythonhosted.org/packages/aa/d2/3e59e2a91eaec9db7e8dc6b37b91489b5caeb054f670f32c95bcba98940f/sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37fee2164cf21417478b6a906adc1a91d69ae9aba8f9533e67ce882f4bb1de53", size = 3277372, upload-time = "2026-01-21T18:46:47.168Z" }, - { url = "https://files.pythonhosted.org/packages/dd/dd/67bc2e368b524e2192c3927b423798deda72c003e73a1e94c21e74b20a85/sqlalchemy-2.0.46-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b1e14b2f6965a685c7128bd315e27387205429c2e339eeec55cb75ca4ab0ea2e", size = 3312425, upload-time = "2026-01-21T18:40:11.548Z" }, - { url = "https://files.pythonhosted.org/packages/43/82/0ecd68e172bfe62247e96cb47867c2d68752566811a4e8c9d8f6e7c38a65/sqlalchemy-2.0.46-cp312-cp312-win32.whl", hash = "sha256:412f26bb4ba942d52016edc8d12fb15d91d3cd46b0047ba46e424213ad407bcb", size = 2113155, upload-time = "2026-01-21T18:42:49.748Z" }, - { url = "https://files.pythonhosted.org/packages/bc/2a/2821a45742073fc0331dc132552b30de68ba9563230853437cac54b2b53e/sqlalchemy-2.0.46-cp312-cp312-win_amd64.whl", hash = "sha256:ea3cd46b6713a10216323cda3333514944e510aa691c945334713fca6b5279ff", size = 2140078, upload-time = "2026-01-21T18:42:51.197Z" }, - { url = "https://files.pythonhosted.org/packages/b3/4b/fa7838fe20bb752810feed60e45625a9a8b0102c0c09971e2d1d95362992/sqlalchemy-2.0.46-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:93a12da97cca70cea10d4b4fc602589c4511f96c1f8f6c11817620c021d21d00", size = 2150268, upload-time = "2026-01-21T19:05:56.621Z" }, - { url = "https://files.pythonhosted.org/packages/46/c1/b34dccd712e8ea846edf396e00973dda82d598cb93762e55e43e6835eba9/sqlalchemy-2.0.46-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:af865c18752d416798dae13f83f38927c52f085c52e2f32b8ab0fef46fdd02c2", size = 3276511, upload-time = "2026-01-21T18:46:49.022Z" }, - { url = "https://files.pythonhosted.org/packages/96/48/a04d9c94753e5d5d096c628c82a98c4793b9c08ca0e7155c3eb7d7db9f24/sqlalchemy-2.0.46-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8d679b5f318423eacb61f933a9a0f75535bfca7056daeadbf6bd5bcee6183aee", size = 3292881, upload-time = "2026-01-21T18:40:13.089Z" }, - { url = "https://files.pythonhosted.org/packages/be/f4/06eda6e91476f90a7d8058f74311cb65a2fb68d988171aced81707189131/sqlalchemy-2.0.46-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64901e08c33462acc9ec3bad27fc7a5c2b6491665f2aa57564e57a4f5d7c52ad", size = 3224559, upload-time = "2026-01-21T18:46:50.974Z" }, - { url = "https://files.pythonhosted.org/packages/ab/a2/d2af04095412ca6345ac22b33b89fe8d6f32a481e613ffcb2377d931d8d0/sqlalchemy-2.0.46-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8ac45e8f4eaac0f9f8043ea0e224158855c6a4329fd4ee37c45c61e3beb518e", size = 3262728, upload-time = "2026-01-21T18:40:14.883Z" }, - { url = "https://files.pythonhosted.org/packages/31/48/1980c7caa5978a3b8225b4d230e69a2a6538a3562b8b31cea679b6933c83/sqlalchemy-2.0.46-cp313-cp313-win32.whl", hash = "sha256:8d3b44b3d0ab2f1319d71d9863d76eeb46766f8cf9e921ac293511804d39813f", size = 2111295, upload-time = "2026-01-21T18:42:52.366Z" }, - { url = "https://files.pythonhosted.org/packages/2d/54/f8d65bbde3d877617c4720f3c9f60e99bb7266df0d5d78b6e25e7c149f35/sqlalchemy-2.0.46-cp313-cp313-win_amd64.whl", hash = "sha256:77f8071d8fbcbb2dd11b7fd40dedd04e8ebe2eb80497916efedba844298065ef", size = 2137076, upload-time = "2026-01-21T18:42:53.924Z" }, - { url = "https://files.pythonhosted.org/packages/56/ba/9be4f97c7eb2b9d5544f2624adfc2853e796ed51d2bb8aec90bc94b7137e/sqlalchemy-2.0.46-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a1e8cc6cc01da346dc92d9509a63033b9b1bda4fed7a7a7807ed385c7dccdc10", size = 3556533, upload-time = "2026-01-21T18:33:06.636Z" }, - { url = "https://files.pythonhosted.org/packages/20/a6/b1fc6634564dbb4415b7ed6419cdfeaadefd2c39cdab1e3aa07a5f2474c2/sqlalchemy-2.0.46-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:96c7cca1a4babaaf3bfff3e4e606e38578856917e52f0384635a95b226c87764", size = 3523208, upload-time = "2026-01-21T18:45:08.436Z" }, - { url = "https://files.pythonhosted.org/packages/a1/d8/41e0bdfc0f930ff236f86fccd12962d8fa03713f17ed57332d38af6a3782/sqlalchemy-2.0.46-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b2a9f9aee38039cf4755891a1e50e1effcc42ea6ba053743f452c372c3152b1b", size = 3464292, upload-time = "2026-01-21T18:33:08.208Z" }, - { url = "https://files.pythonhosted.org/packages/f0/8b/9dcbec62d95bea85f5ecad9b8d65b78cc30fb0ffceeb3597961f3712549b/sqlalchemy-2.0.46-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:db23b1bf8cfe1f7fda19018e7207b20cdb5168f83c437ff7e95d19e39289c447", size = 3473497, upload-time = "2026-01-21T18:45:10.552Z" }, - { url = "https://files.pythonhosted.org/packages/e9/f8/5ecdfc73383ec496de038ed1614de9e740a82db9ad67e6e4514ebc0708a3/sqlalchemy-2.0.46-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:56bdd261bfd0895452006d5316cbf35739c53b9bb71a170a331fa0ea560b2ada", size = 2152079, upload-time = "2026-01-21T19:05:58.477Z" }, - { url = "https://files.pythonhosted.org/packages/e5/bf/eba3036be7663ce4d9c050bc3d63794dc29fbe01691f2bf5ccb64e048d20/sqlalchemy-2.0.46-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33e462154edb9493f6c3ad2125931e273bbd0be8ae53f3ecd1c161ea9a1dd366", size = 3272216, upload-time = "2026-01-21T18:46:52.634Z" }, - { url = "https://files.pythonhosted.org/packages/05/45/1256fb597bb83b58a01ddb600c59fe6fdf0e5afe333f0456ed75c0f8d7bd/sqlalchemy-2.0.46-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9bcdce05f056622a632f1d44bb47dbdb677f58cad393612280406ce37530eb6d", size = 3277208, upload-time = "2026-01-21T18:40:16.38Z" }, - { url = "https://files.pythonhosted.org/packages/d9/a0/2053b39e4e63b5d7ceb3372cface0859a067c1ddbd575ea7e9985716f771/sqlalchemy-2.0.46-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e84b09a9b0f19accedcbeff5c2caf36e0dd537341a33aad8d680336152dc34e", size = 3221994, upload-time = "2026-01-21T18:46:54.622Z" }, - { url = "https://files.pythonhosted.org/packages/1e/87/97713497d9502553c68f105a1cb62786ba1ee91dea3852ae4067ed956a50/sqlalchemy-2.0.46-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4f52f7291a92381e9b4de9050b0a65ce5d6a763333406861e33906b8aa4906bf", size = 3243990, upload-time = "2026-01-21T18:40:18.253Z" }, - { url = "https://files.pythonhosted.org/packages/a8/87/5d1b23548f420ff823c236f8bea36b1a997250fd2f892e44a3838ca424f4/sqlalchemy-2.0.46-cp314-cp314-win32.whl", hash = "sha256:70ed2830b169a9960193f4d4322d22be5c0925357d82cbf485b3369893350908", size = 2114215, upload-time = "2026-01-21T18:42:55.232Z" }, - { url = "https://files.pythonhosted.org/packages/3a/20/555f39cbcf0c10cf452988b6a93c2a12495035f68b3dbd1a408531049d31/sqlalchemy-2.0.46-cp314-cp314-win_amd64.whl", hash = "sha256:3c32e993bc57be6d177f7d5d31edb93f30726d798ad86ff9066d75d9bf2e0b6b", size = 2139867, upload-time = "2026-01-21T18:42:56.474Z" }, - { url = "https://files.pythonhosted.org/packages/3e/f0/f96c8057c982d9d8a7a68f45d69c674bc6f78cad401099692fe16521640a/sqlalchemy-2.0.46-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4dafb537740eef640c4d6a7c254611dca2df87eaf6d14d6a5fca9d1f4c3fc0fa", size = 3561202, upload-time = "2026-01-21T18:33:10.337Z" }, - { url = "https://files.pythonhosted.org/packages/d7/53/3b37dda0a5b137f21ef608d8dfc77b08477bab0fe2ac9d3e0a66eaeab6fc/sqlalchemy-2.0.46-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42a1643dc5427b69aca967dae540a90b0fbf57eaf248f13a90ea5930e0966863", size = 3526296, upload-time = "2026-01-21T18:45:12.657Z" }, - { url = "https://files.pythonhosted.org/packages/33/75/f28622ba6dde79cd545055ea7bd4062dc934e0621f7b3be2891f8563f8de/sqlalchemy-2.0.46-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ff33c6e6ad006bbc0f34f5faf941cfc62c45841c64c0a058ac38c799f15b5ede", size = 3470008, upload-time = "2026-01-21T18:33:11.725Z" }, - { url = "https://files.pythonhosted.org/packages/a9/42/4afecbbc38d5e99b18acef446453c76eec6fbd03db0a457a12a056836e22/sqlalchemy-2.0.46-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:82ec52100ec1e6ec671563bbd02d7c7c8d0b9e71a0723c72f22ecf52d1755330", size = 3476137, upload-time = "2026-01-21T18:45:15.001Z" }, - { url = "https://files.pythonhosted.org/packages/fc/a1/9c4efa03300926601c19c18582531b45aededfb961ab3c3585f1e24f120b/sqlalchemy-2.0.46-py3-none-any.whl", hash = "sha256:f9c11766e7e7c0a2767dda5acb006a118640c9fc0a4104214b96269bfb78399e", size = 1937882, upload-time = "2026-01-21T18:22:10.456Z" }, + { url = "https://files.pythonhosted.org/packages/ec/75/17db77c57129c223c7d98518ad1e1faa24ee350c22a44b55390d8463c28c/sqlalchemy-2.0.47-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:33a917ede39406ddb93c3e642b5bc480be7c5fd0f3d0d6ae1036d466fb963f1a", size = 2157331, upload-time = "2026-02-24T16:43:52.693Z" }, + { url = "https://files.pythonhosted.org/packages/b0/d6/3658f7e5c376de774c009f2bb9c0ddf88a35b89c5bfb15ee7174a17b1a5f/sqlalchemy-2.0.47-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:561d027c829b01e040bdade6b6f5b429249d056ef95d7bdcb9211539ecc82803", size = 3236939, upload-time = "2026-02-24T17:28:57.419Z" }, + { url = "https://files.pythonhosted.org/packages/4e/38/f4b94f85d1c26cb9ee0e57449754de816c326f9586b9a8c5247eb49146de/sqlalchemy-2.0.47-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fa5072a37e68c565363c009b7afa5b199b488c87940ec02719860093a08f34ca", size = 3235190, upload-time = "2026-02-24T17:27:07.884Z" }, + { url = "https://files.pythonhosted.org/packages/94/f2/36714f1de01e135a2bf142b662e416e5338ab63c47878e31051338c66e2d/sqlalchemy-2.0.47-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1e7ed17dd4312a298b6024bfd1baf51654bc49e3f03c798005babf0c7922d6a7", size = 3188064, upload-time = "2026-02-24T17:28:58.908Z" }, + { url = "https://files.pythonhosted.org/packages/ab/94/fcd978e7625cd1c97d9f1d7363e18e37d24314e572acd7c091e3a4210106/sqlalchemy-2.0.47-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6992e353fcb0593eb42d95ad84b3e58fe40b5e37fd332b9ccba28f4b2f36d1fc", size = 3209480, upload-time = "2026-02-24T17:27:09.823Z" }, + { url = "https://files.pythonhosted.org/packages/23/29/c633202b9900ab65f0162f59df737b57f30010f44d892b186810c9ed58b7/sqlalchemy-2.0.47-cp310-cp310-win32.whl", hash = "sha256:05a6d58ed99ebd01303c92d29a0c9cbf70f637b3ddd155f5172c5a7239940998", size = 2117652, upload-time = "2026-02-24T17:14:34.635Z" }, + { url = "https://files.pythonhosted.org/packages/00/39/54acf13913932b8508058d47a169e6fcde9adaa4cbfa16cbf30da1f6a482/sqlalchemy-2.0.47-cp310-cp310-win_amd64.whl", hash = "sha256:4a7aa4a584cc97e268c11e700dea0b763874eaebb435e75e7d0ffee5d90f5030", size = 2140883, upload-time = "2026-02-24T17:14:35.875Z" }, + { url = "https://files.pythonhosted.org/packages/94/13/886338d3e8ab5ddcfe84d54302c749b1793e16c4bba63d7004e3f7baa8ec/sqlalchemy-2.0.47-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3a1dbf0913879c443617d6b64403cf2801c941651db8c60e96d204ed9388d6b0", size = 2157124, upload-time = "2026-02-24T16:43:54.706Z" }, + { url = "https://files.pythonhosted.org/packages/b6/bb/a897f6a66c9986aa9f27f5cf8550637d8a5ea368fd7fb42f6dac3105b4dc/sqlalchemy-2.0.47-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:775effbb97ea3b00c4dd3aeaf3ba8acba6e3e2b4b41d17d67a27e696843dbc95", size = 3313513, upload-time = "2026-02-24T17:29:00.527Z" }, + { url = "https://files.pythonhosted.org/packages/59/fb/69bfae022b681507565ab0d34f0c80aa1e9f954a5a7cbfb0ed054966ac8d/sqlalchemy-2.0.47-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:56cc834a3ffac34270cc2a41875e0f40e97aa651f4f3ca1cfbbf421c044cb62b", size = 3313014, upload-time = "2026-02-24T17:27:11.679Z" }, + { url = "https://files.pythonhosted.org/packages/04/f3/0eba329f7c182d53205a228c4fd24651b95489b431ea2bd830887b4c13c4/sqlalchemy-2.0.47-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:49b5e0c7244262f39e767c018e4fdb5e5dbc23cd54c5ddac8eea8f0ba32ef890", size = 3265389, upload-time = "2026-02-24T17:29:02.497Z" }, + { url = "https://files.pythonhosted.org/packages/5c/06/654edc084b3b46ac79e04200d7c46467ae80c759c4ee41c897f9272b036f/sqlalchemy-2.0.47-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:15cd822a3f1f6f77b5b841a30c1a07a07f7dee3385f17e638e1722de9ab683be", size = 3287604, upload-time = "2026-02-24T17:27:13.295Z" }, + { url = "https://files.pythonhosted.org/packages/78/33/c18c8f63b61981219d3aa12321bb7ccee605034d195e868ed94f9727b27c/sqlalchemy-2.0.47-cp311-cp311-win32.whl", hash = "sha256:9847a19548cd283a65e1ce0afd54016598d55ff72682d6fd3e493af6fc044064", size = 2116916, upload-time = "2026-02-24T17:14:37.392Z" }, + { url = "https://files.pythonhosted.org/packages/f5/c6/a59e3f9796fff844e16afbd821db9abfd6e12698db9441a231a96193a100/sqlalchemy-2.0.47-cp311-cp311-win_amd64.whl", hash = "sha256:722abf1c82aeca46a1a0803711244a48a298279eeaec9e02f7bfee9e064182e5", size = 2141587, upload-time = "2026-02-24T17:14:39.746Z" }, + { url = "https://files.pythonhosted.org/packages/80/88/74eb470223ff88ea6572a132c0b8de8c1d8ed7b843d3b44a8a3c77f31d39/sqlalchemy-2.0.47-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4fa91b19d6b9821c04cc8f7aa2476429cc8887b9687c762815aa629f5c0edec1", size = 2155687, upload-time = "2026-02-24T17:05:46.451Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ba/1447d3d558971b036cb93b557595cb5dcdfe728f1c7ac4dec16505ef5756/sqlalchemy-2.0.47-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7c5bbbd14eff577c8c79cbfe39a0771eecd20f430f3678533476f0087138f356", size = 3336978, upload-time = "2026-02-24T17:18:04.597Z" }, + { url = "https://files.pythonhosted.org/packages/8a/07/b47472d2ffd0776826f17ccf0b4d01b224c99fbd1904aeb103dffbb4b1cc/sqlalchemy-2.0.47-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5a6c555da8d4280a3c4c78c5b7a3f990cee2b2884e5f934f87a226191682ff7", size = 3349939, upload-time = "2026-02-24T17:27:18.937Z" }, + { url = "https://files.pythonhosted.org/packages/bb/c6/95fa32b79b57769da3e16f054cf658d90940317b5ca0ec20eac84aa19c4f/sqlalchemy-2.0.47-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ed48a1701d24dff3bb49a5bce94d6bc84cbe33d98af2aa2d3cdcce3dea1709ec", size = 3279648, upload-time = "2026-02-24T17:18:07.038Z" }, + { url = "https://files.pythonhosted.org/packages/bb/c8/3d07e7c73928dc59a0bed40961ca4e313e797bce650b088e8d5fdd3ad939/sqlalchemy-2.0.47-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4f3178c920ad98158f0b6309382194df04b14808fa6052ae07099fdde29d5602", size = 3314695, upload-time = "2026-02-24T17:27:20.93Z" }, + { url = "https://files.pythonhosted.org/packages/6b/d2/ed32b1611c1e19fdb028eee1adc5a9aa138c2952d09ae11f1670170f80ae/sqlalchemy-2.0.47-cp312-cp312-win32.whl", hash = "sha256:b9c11ac9934dd59ece9619fe42780a08abe2faab7b0543bb00d5eabea4f421b9", size = 2115502, upload-time = "2026-02-24T17:22:52.546Z" }, + { url = "https://files.pythonhosted.org/packages/fd/52/9de590356a4dd8e9ef5a881dbba64b2bbc4cbc71bf02bc68e775fb9b1899/sqlalchemy-2.0.47-cp312-cp312-win_amd64.whl", hash = "sha256:db43b72cf8274a99e089755c9c1e0b947159b71adbc2c83c3de2e38d5d607acb", size = 2142435, upload-time = "2026-02-24T17:22:54.268Z" }, + { url = "https://files.pythonhosted.org/packages/4a/e5/0af64ce7d8f60ec5328c10084e2f449e7912a9b8bdbefdcfb44454a25f49/sqlalchemy-2.0.47-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:456a135b790da5d3c6b53d0ef71ac7b7d280b7f41eb0c438986352bf03ca7143", size = 2152551, upload-time = "2026-02-24T17:05:47.675Z" }, + { url = "https://files.pythonhosted.org/packages/63/79/746b8d15f6940e2ac469ce22d7aa5b1124b1ab820bad9b046eb3000c88a6/sqlalchemy-2.0.47-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09a2f7698e44b3135433387da5d8846cf7cc7c10e5425af7c05fee609df978b6", size = 3278782, upload-time = "2026-02-24T17:18:10.012Z" }, + { url = "https://files.pythonhosted.org/packages/91/b1/bd793ddb34345d1ed43b13ab2d88c95d7d4eb2e28f5b5a99128b9cc2bca2/sqlalchemy-2.0.47-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0bbc72e6a177c78d724f9106aaddc0d26a2ada89c6332b5935414eccf04cbd5", size = 3295155, upload-time = "2026-02-24T17:27:22.827Z" }, + { url = "https://files.pythonhosted.org/packages/97/84/7213def33f94e5ca6f5718d259bc9f29de0363134648425aa218d4356b23/sqlalchemy-2.0.47-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:75460456b043b78b6006e41bdf5b86747ee42eafaf7fffa3b24a6e9a456a2092", size = 3226834, upload-time = "2026-02-24T17:18:11.465Z" }, + { url = "https://files.pythonhosted.org/packages/ef/06/456810204f4dc29b5f025b1b0a03b4bd6b600ebf3c1040aebd90a257fa33/sqlalchemy-2.0.47-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5d9adaa616c3bc7d80f9ded57cd84b51d6617cad6a5456621d858c9f23aaee01", size = 3265001, upload-time = "2026-02-24T17:27:24.813Z" }, + { url = "https://files.pythonhosted.org/packages/fb/20/df3920a4b2217dbd7390a5bd277c1902e0393f42baaf49f49b3c935e7328/sqlalchemy-2.0.47-cp313-cp313-win32.whl", hash = "sha256:76e09f974382a496a5ed985db9343628b1cb1ac911f27342e4cc46a8bac10476", size = 2113647, upload-time = "2026-02-24T17:22:55.747Z" }, + { url = "https://files.pythonhosted.org/packages/46/06/7873ddf69918efbfabd7211829f4bd8019739d0a719253112d305d3ba51d/sqlalchemy-2.0.47-cp313-cp313-win_amd64.whl", hash = "sha256:0664089b0bf6724a0bfb49a0cf4d4da24868a0a5c8e937cd7db356d5dcdf2c66", size = 2139425, upload-time = "2026-02-24T17:22:57.033Z" }, + { url = "https://files.pythonhosted.org/packages/54/fa/61ad9731370c90ac7ea5bf8f5eaa12c48bb4beec41c0fa0360becf4ac10d/sqlalchemy-2.0.47-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed0c967c701ae13da98eb220f9ddab3044ab63504c1ba24ad6a59b26826ad003", size = 3558809, upload-time = "2026-02-24T17:12:15.232Z" }, + { url = "https://files.pythonhosted.org/packages/33/d5/221fac96f0529391fe374875633804c866f2b21a9c6d3a6ca57d9c12cfd7/sqlalchemy-2.0.47-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d3537943a61fd25b241e976426a0c6814434b93cf9b09d39e8e78f3c9eb9a487", size = 3525480, upload-time = "2026-02-24T17:27:59.602Z" }, + { url = "https://files.pythonhosted.org/packages/ec/55/8247d53998c3673e4a8d1958eba75c6f5cc3b39082029d400bb1f2a911ae/sqlalchemy-2.0.47-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:57f7e336a64a0dba686c66392d46b9bc7af2c57d55ce6dc1697b4ef32b043ceb", size = 3466569, upload-time = "2026-02-24T17:12:16.94Z" }, + { url = "https://files.pythonhosted.org/packages/6b/b5/c1f0eea1bac6790845f71420a7fe2f2a0566203aa57543117d4af3b77d1c/sqlalchemy-2.0.47-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dff735a621858680217cb5142b779bad40ef7322ddbb7c12062190db6879772e", size = 3475770, upload-time = "2026-02-24T17:28:02.034Z" }, + { url = "https://files.pythonhosted.org/packages/c5/ed/2f43f92474ea0c43c204657dc47d9d002cd738b96ca2af8e6d29a9b5e42d/sqlalchemy-2.0.47-cp313-cp313t-win32.whl", hash = "sha256:3893dc096bb3cca9608ea3487372ffcea3ae9b162f40e4d3c51dd49db1d1b2dc", size = 2141300, upload-time = "2026-02-24T17:14:37.024Z" }, + { url = "https://files.pythonhosted.org/packages/cc/a9/8b73f9f1695b6e92f7aaf1711135a1e3bbeb78bca9eded35cb79180d3c6d/sqlalchemy-2.0.47-cp313-cp313t-win_amd64.whl", hash = "sha256:b5103427466f4b3e61f04833ae01f9a914b1280a2a8bcde3a9d7ab11f3755b42", size = 2173053, upload-time = "2026-02-24T17:14:38.688Z" }, + { url = "https://files.pythonhosted.org/packages/c1/30/98243209aae58ed80e090ea988d5182244ca7ab3ff59e6d850c3dfc7651e/sqlalchemy-2.0.47-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b03010a5a5dfe71676bc83f2473ebe082478e32d77e6f082c8fe15a31c3b42a6", size = 2154355, upload-time = "2026-02-24T17:05:48.959Z" }, + { url = "https://files.pythonhosted.org/packages/ab/62/12ca6ea92055fe486d6558a2a4efe93e194ff597463849c01f88e5adb99d/sqlalchemy-2.0.47-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f8e3371aa9024520883a415a09cc20c33cfd3eeccf9e0f4f4c367f940b9cbd44", size = 3274486, upload-time = "2026-02-24T17:18:13.659Z" }, + { url = "https://files.pythonhosted.org/packages/97/88/7dfbdeaa8d42b1584e65d6cc713e9d33b6fa563e0d546d5cb87e545bb0e5/sqlalchemy-2.0.47-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9449f747e50d518c6e1b40cc379e48bfc796453c47b15e627ea901c201e48a6", size = 3279481, upload-time = "2026-02-24T17:27:26.491Z" }, + { url = "https://files.pythonhosted.org/packages/d0/b7/75e1c1970616a9dd64a8a6fd788248da2ddaf81c95f4875f2a1e8aee4128/sqlalchemy-2.0.47-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:21410f60d5cac1d6bfe360e05bd91b179be4fa0aa6eea6be46054971d277608f", size = 3224269, upload-time = "2026-02-24T17:18:15.078Z" }, + { url = "https://files.pythonhosted.org/packages/31/ac/eec1a13b891df9a8bc203334caf6e6aac60b02f61b018ef3b4124b8c4120/sqlalchemy-2.0.47-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:819841dd5bb4324c284c09e2874cf96fe6338bfb57a64548d9b81a4e39c9871f", size = 3246262, upload-time = "2026-02-24T17:27:27.986Z" }, + { url = "https://files.pythonhosted.org/packages/c9/b0/661b0245b06421058610da39f8ceb34abcc90b49f90f256380968d761dbe/sqlalchemy-2.0.47-cp314-cp314-win32.whl", hash = "sha256:e255ee44821a7ef45649c43064cf94e74f81f61b4df70547304b97a351e9b7db", size = 2116528, upload-time = "2026-02-24T17:22:59.363Z" }, + { url = "https://files.pythonhosted.org/packages/aa/ef/1035a90d899e61810791c052004958be622a2cf3eb3df71c3fe20778c5d0/sqlalchemy-2.0.47-cp314-cp314-win_amd64.whl", hash = "sha256:209467ff73ea1518fe1a5aaed9ba75bb9e33b2666e2553af9ccd13387bf192cb", size = 2142181, upload-time = "2026-02-24T17:23:01.001Z" }, + { url = "https://files.pythonhosted.org/packages/76/bb/17a1dd09cbba91258218ceb582225f14b5364d2683f9f5a274f72f2d764f/sqlalchemy-2.0.47-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e78fd9186946afaa287f8a1fe147ead06e5d566b08c0afcb601226e9c7322a64", size = 3563477, upload-time = "2026-02-24T17:12:18.46Z" }, + { url = "https://files.pythonhosted.org/packages/66/8f/1a03d24c40cc321ef2f2231f05420d140bb06a84f7047eaa7eaa21d230ba/sqlalchemy-2.0.47-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5740e2f31b5987ed9619d6912ae5b750c03637f2078850da3002934c9532f172", size = 3528568, upload-time = "2026-02-24T17:28:03.732Z" }, + { url = "https://files.pythonhosted.org/packages/fd/53/d56a213055d6b038a5384f0db5ece7343334aca230ff3f0fa1561106f22c/sqlalchemy-2.0.47-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:fb9ac00d03de93acb210e8ec7243fefe3e012515bf5fd2f0898c8dff38bc77a4", size = 3472284, upload-time = "2026-02-24T17:12:20.319Z" }, + { url = "https://files.pythonhosted.org/packages/ff/19/c235d81b9cfdd6130bf63143b7bade0dc4afa46c4b634d5d6b2a96bea233/sqlalchemy-2.0.47-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c72a0b9eb2672d70d112cb149fbaf172d466bc691014c496aaac594f1988e706", size = 3478410, upload-time = "2026-02-24T17:28:05.892Z" }, + { url = "https://files.pythonhosted.org/packages/0e/db/cafdeca5ecdaa3bb0811ba5449501da677ce0d83be8d05c5822da72d2e86/sqlalchemy-2.0.47-cp314-cp314t-win32.whl", hash = "sha256:c200db1128d72a71dc3c31c24b42eb9fd85b2b3e5a3c9ba1e751c11ac31250ff", size = 2147164, upload-time = "2026-02-24T17:14:40.783Z" }, + { url = "https://files.pythonhosted.org/packages/fc/5e/ff41a010e9e0f76418b02ad352060a4341bb15f0af66cedc924ab376c7c6/sqlalchemy-2.0.47-cp314-cp314t-win_amd64.whl", hash = "sha256:669837759b84e575407355dcff912835892058aea9b80bd1cb76d6a151cf37f7", size = 2182154, upload-time = "2026-02-24T17:14:43.205Z" }, + { url = "https://files.pythonhosted.org/packages/15/9f/7c378406b592fcf1fc157248607b495a40e3202ba4a6f1372a2ba6447717/sqlalchemy-2.0.47-py3-none-any.whl", hash = "sha256:e2647043599297a1ef10e720cf310846b7f31b6c841fee093d2b09d81215eb93", size = 1940159, upload-time = "2026-02-24T17:15:07.158Z" }, ] [[package]] name = "sse-starlette" -version = "3.2.0" +version = "3.3.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "starlette" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8b/8d/00d280c03ffd39aaee0e86ec81e2d3b9253036a0f93f51d10503adef0e65/sse_starlette-3.2.0.tar.gz", hash = "sha256:8127594edfb51abe44eac9c49e59b0b01f1039d0c7461c6fd91d4e03b70da422", size = 27253, upload-time = "2026-01-17T13:11:05.62Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/9f/c3695c2d2d4ef70072c3a06992850498b01c6bc9be531950813716b426fa/sse_starlette-3.3.2.tar.gz", hash = "sha256:678fca55a1945c734d8472a6cad186a55ab02840b4f6786f5ee8770970579dcd", size = 32326, upload-time = "2026-02-28T11:24:34.36Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/96/7f/832f015020844a8b8f7a9cbc103dd76ba8e3875004c41e08440ea3a2b41a/sse_starlette-3.2.0-py3-none-any.whl", hash = "sha256:5876954bd51920fc2cd51baee47a080eb88a37b5b784e615abb0b283f801cdbf", size = 12763, upload-time = "2026-01-17T13:11:03.775Z" }, + { url = "https://files.pythonhosted.org/packages/61/28/8cb142d3fe80c4a2d8af54ca0b003f47ce0ba920974e7990fa6e016402d1/sse_starlette-3.3.2-py3-none-any.whl", hash = "sha256:5c3ea3dad425c601236726af2f27689b74494643f57017cafcb6f8c9acfbb862", size = 14270, upload-time = "2026-02-28T11:24:32.984Z" }, ] [[package]] @@ -6994,7 +7115,7 @@ wheels = [ [[package]] name = "strands-agents" -version = "1.27.0" +version = "1.28.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "boto3" }, @@ -7009,9 +7130,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "watchdog" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b9/54/bf0910a1c40feacaedcf5d30840be990eabd09eff5375fa40525ba530c8d/strands_agents-1.27.0.tar.gz", hash = "sha256:84d0b670e534d7c281104a22035c10de8d43e9ad8ee589bde16f54a8387b2c56", size = 712878, upload-time = "2026-02-19T17:18:23.327Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/27/9c1c114a83844f9e27fe0312bfdad27c753b922f123512669997f8af47e3/strands_agents-1.28.0.tar.gz", hash = "sha256:0372d8f75d694f3230b0035867455ef31c74f6d9c708985e41f646a1a0b29f7e", size = 717116, upload-time = "2026-02-25T19:36:46.959Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/ca/d5c269f83929bdc753dce3c6091a1671e50268769b0ace009264424bf165/strands_agents-1.27.0-py3-none-any.whl", hash = "sha256:d9012515a7b4f324a600cacc539e837a51b3f7fe21da7efe1764186ade3be498", size = 351988, upload-time = "2026-02-19T17:18:19Z" }, + { url = "https://files.pythonhosted.org/packages/b3/98/f4f87500251f1cab2bd9a0d271852d6d8796635ea8d287b5c51a12316d58/strands_agents-1.28.0-py3-none-any.whl", hash = "sha256:e4c238811949b4f8d31ea9df03a74a57afa5f1728a23bf1ddbf8703f34addc6b", size = 355636, upload-time = "2026-02-25T19:36:45.075Z" }, ] [[package]] @@ -7116,7 +7237,7 @@ wheels = [ [[package]] name = "timm" -version = "1.0.24" +version = "1.0.25" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -7125,9 +7246,9 @@ dependencies = [ { name = "torch" }, { name = "torchvision" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f4/9d/0ea45640be447445c8664ce2b10c74f763b0b0b9ed11620d41a4d4baa10c/timm-1.0.24.tar.gz", hash = "sha256:c7b909f43fe2ef8fe62c505e270cd4f1af230dfbc37f2ee93e3608492b9d9a40", size = 2412239, upload-time = "2026-01-07T00:26:17.541Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/2c/593109822fe735e637382aca6640c1102c19797f7791f1fd1dab2d6c3cb1/timm-1.0.25.tar.gz", hash = "sha256:47f59fc2754725735cc81bb83bcbfce5bec4ebd5d4bb9e69da57daa92fcfa768", size = 2414743, upload-time = "2026-02-23T16:49:00.137Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/92/dd/c1f5b0890f7b5db661bde0864b41cb0275be76851047e5f7e085fe0b455a/timm-1.0.24-py3-none-any.whl", hash = "sha256:8301ac783410c6ad72c73c49326af6d71a9e4d1558238552796e825c2464913f", size = 2560563, upload-time = "2026-01-07T00:26:13.956Z" }, + { url = "https://files.pythonhosted.org/packages/ef/50/de09f69a74278a16f08f1d562047a2d6713783765ee3c6971881a2b21a3f/timm-1.0.25-py3-none-any.whl", hash = "sha256:bef7f61dd717cb2dbbb7e326f143e13d660a47ecbd84116e6fe33732bed5c484", size = 2565837, upload-time = "2026-02-23T16:48:58.324Z" }, ] [[package]] @@ -7332,7 +7453,8 @@ name = "torchvision" version = "0.25.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "pillow" }, { name = "torch" }, ] @@ -7400,7 +7522,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "huggingface-hub" }, - { name = "numpy" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "packaging" }, { name = "pyyaml" }, { name = "regex" }, @@ -7430,7 +7553,7 @@ wheels = [ [[package]] name = "typer" -version = "0.24.0" +version = "0.24.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-doc" }, @@ -7438,18 +7561,18 @@ dependencies = [ { name = "rich" }, { name = "shellingham" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/b6/3e681d3b6bb22647509bdbfdd18055d5adc0dce5c5585359fa46ff805fdc/typer-0.24.0.tar.gz", hash = "sha256:f9373dc4eff901350694f519f783c29b6d7a110fc0dcc11b1d7e353b85ca6504", size = 118380, upload-time = "2026-02-16T22:08:48.496Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/85/d0/4da85c2a45054bb661993c93524138ace4956cb075a7ae0c9d1deadc331b/typer-0.24.0-py3-none-any.whl", hash = "sha256:5fc435a9c8356f6160ed6e85a6301fdd6e3d8b2851da502050d1f92c5e9eddc8", size = 56441, upload-time = "2026-02-16T22:08:47.535Z" }, + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, ] [[package]] name = "types-protobuf" -version = "6.32.1.20251210" +version = "6.32.1.20260221" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c2/59/c743a842911887cd96d56aa8936522b0cd5f7a7f228c96e81b59fced45be/types_protobuf-6.32.1.20251210.tar.gz", hash = "sha256:c698bb3f020274b1a2798ae09dc773728ce3f75209a35187bd11916ebfde6763", size = 63900, upload-time = "2025-12-10T03:14:25.451Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5f/e2/9aa4a3b2469508bd7b4e2ae11cbedaf419222a09a1b94daffcd5efca4023/types_protobuf-6.32.1.20260221.tar.gz", hash = "sha256:6d5fb060a616bfb076cbb61b4b3c3969f5fc8bec5810f9a2f7e648ee5cbcbf6e", size = 64408, upload-time = "2026-02-21T03:55:13.916Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/aa/43/58e75bac4219cbafee83179505ff44cae3153ec279be0e30583a73b8f108/types_protobuf-6.32.1.20251210-py3-none-any.whl", hash = "sha256:2641f78f3696822a048cfb8d0ff42ccd85c25f12f871fbebe86da63793692140", size = 77921, upload-time = "2025-12-10T03:14:24.477Z" }, + { url = "https://files.pythonhosted.org/packages/2e/e8/1fd38926f9cf031188fbc5a96694203ea6f24b0e34bd64a225ec6f6291ba/types_protobuf-6.32.1.20260221-py3-none-any.whl", hash = "sha256:da7cdd947975964a93c30bfbcc2c6841ee646b318d3816b033adc2c4eb6448e4", size = 77956, upload-time = "2026-02-21T03:55:12.894Z" }, ] [[package]] @@ -7684,17 +7807,18 @@ wheels = [ [[package]] name = "virtualenv" -version = "20.38.0" +version = "21.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, { name = "filelock" }, { name = "platformdirs" }, + { name = "python-discovery" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d2/03/a94d404ca09a89a7301a7008467aed525d4cdeb9186d262154dd23208709/virtualenv-20.38.0.tar.gz", hash = "sha256:94f39b1abaea5185bf7ea5a46702b56f1d0c9aa2f41a6c2b8b0af4ddc74c10a7", size = 5864558, upload-time = "2026-02-19T07:48:02.385Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/c9/18d4b36606d6091844daa3bd93cf7dc78e6f5da21d9f21d06c221104b684/virtualenv-21.1.0.tar.gz", hash = "sha256:1990a0188c8f16b6b9cf65c9183049007375b26aad415514d377ccacf1e4fb44", size = 5840471, upload-time = "2026-02-27T08:49:29.702Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/d7/394801755d4c8684b655d35c665aea7836ec68320304f62ab3c94395b442/virtualenv-20.38.0-py3-none-any.whl", hash = "sha256:d6e78e5889de3a4742df2d3d44e779366325a90cf356f15621fddace82431794", size = 5837778, upload-time = "2026-02-19T07:47:59.778Z" }, + { url = "https://files.pythonhosted.org/packages/78/55/896b06bf93a49bec0f4ae2a6f1ed12bd05c8860744ac3a70eda041064e4d/virtualenv-21.1.0-py3-none-any.whl", hash = "sha256:164f5e14c5587d170cf98e60378eb91ea35bf037be313811905d3a24ea33cc07", size = 5825072, upload-time = "2026-02-27T08:49:27.516Z" }, ] [[package]]