From 491d298c10307fd724ca1a072bbda57a453df278 Mon Sep 17 00:00:00 2001 From: aconchillo <951761+aconchillo@users.noreply.github.com> Date: Wed, 17 Dec 2025 19:07:43 +0000 Subject: [PATCH] Update changelog for version 0.0.98 --- CHANGELOG.md | 234 +++++++++++++++++++++++++++++++++++ changelog/3085.added.md | 2 - changelog/3175.added.md | 29 ----- changelog/3189.added.md | 6 - changelog/3189.changed.md | 3 - changelog/3206.changed.md | 3 - changelog/3207.added.md | 1 - changelog/3208.added.md | 1 - changelog/3210.added.md | 2 - changelog/3210.changed.md | 10 -- changelog/3212.added.md | 6 - changelog/3212.changed.md | 1 - changelog/3212.fixed.md | 2 - changelog/3214.changed.md | 1 - changelog/3219.deprecated.md | 1 - changelog/3224.fixed.2.md | 3 - changelog/3224.fixed.md | 3 - changelog/3226.fixed.md | 2 - changelog/3227.added.md | 4 - changelog/3227.removed.md | 1 - changelog/3228.changed.md | 4 - changelog/3230.fixed.md | 1 - changelog/3231.changed.md | 3 - changelog/3234.fixed.md | 1 - changelog/3235.added.md | 6 - changelog/3236.added.md | 1 - changelog/3236.changed.md | 6 - changelog/3236.fixed.md | 1 - changelog/3239.added.md | 1 - changelog/3239.changed.md | 8 -- changelog/3240.changed.md | 2 - changelog/3240.fixed.md | 2 - changelog/3245.added.md | 1 - changelog/3247.fixed.md | 1 - changelog/3248.added.md | 1 - changelog/3248.changed.md | 3 - changelog/3252.added.md | 2 - changelog/3252.changed.md | 2 - 38 files changed, 234 insertions(+), 127 deletions(-) delete mode 100644 changelog/3085.added.md delete mode 100644 changelog/3175.added.md delete mode 100644 changelog/3189.added.md delete mode 100644 changelog/3189.changed.md delete mode 100644 changelog/3206.changed.md delete mode 100644 changelog/3207.added.md delete mode 100644 changelog/3208.added.md delete mode 100644 changelog/3210.added.md delete mode 100644 changelog/3210.changed.md delete mode 100644 changelog/3212.added.md delete mode 100644 changelog/3212.changed.md delete mode 100644 changelog/3212.fixed.md delete mode 100644 changelog/3214.changed.md delete mode 100644 changelog/3219.deprecated.md delete mode 100644 changelog/3224.fixed.2.md delete mode 100644 changelog/3224.fixed.md delete mode 100644 changelog/3226.fixed.md delete mode 100644 changelog/3227.added.md delete mode 100644 changelog/3227.removed.md delete mode 100644 changelog/3228.changed.md delete mode 100644 changelog/3230.fixed.md delete mode 100644 changelog/3231.changed.md delete mode 100644 changelog/3234.fixed.md delete mode 100644 changelog/3235.added.md delete mode 100644 changelog/3236.added.md delete mode 100644 changelog/3236.changed.md delete mode 100644 changelog/3236.fixed.md delete mode 100644 changelog/3239.added.md delete mode 100644 changelog/3239.changed.md delete mode 100644 changelog/3240.changed.md delete mode 100644 changelog/3240.fixed.md delete mode 100644 changelog/3245.added.md delete mode 100644 changelog/3247.fixed.md delete mode 100644 changelog/3248.added.md delete mode 100644 changelog/3248.changed.md delete mode 100644 changelog/3252.added.md delete mode 100644 changelog/3252.changed.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 11bafc5b3..63a2caa66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,240 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 +## [0.0.98] - 2025-12-17 + +### Added + +- Added `RimeNonJsonTTSService` which supports non-JSON streaming mode. This + new class supports websocket streaming for the Arcana model. + (PR [#3085](https://github.com/pipecat-ai/pipecat/pull/3085)) + +- Added additional functionality related to "thinking", for Google and + Anthropic LLMs. + + 1. New typed parameters for Google and Anthropic LLMs that control the + models' thinking behavior (like how much thinking to do, and whether to + output thoughts or thought summaries): + - `AnthropicLLMService.ThinkingConfig` + - `GoogleLLMService.ThinkingConfig` + 2. New frames for representing thoughts output by LLMs: + - `LLMThoughtStartFrame` + - `LLMThoughtTextFrame` + - `LLMThoughtEndFrame` + 3. A generic mechanism for recording LLM thoughts to context, used + specifically to support Anthropic, whose thought signatures are expected + to appear alongside the text of the thoughts within assistant context + messages. See: + - `LLMThoughtEndFrame.signature` + - `LLMAssistantAggregator` handling of the above field + - `AnthropicLLMAdapter` handling of `"thought"` context messages + 4. Google-specific logic for inserting thought signatures into the context, + to help maintain thinking continuity in a chain of LLM calls. See: + - `GoogleLLMService` sending `LLMMessagesAppendFrame`s to add + LLM-specific + `"thought_signature"` messages to context + - `GeminiLLMAdapter` handling of `"thought_signature"` messages + 5. An expansion of `TranscriptProcessor` to process LLM thoughts in + addition to user and assistant utterances. See: + - `TranscriptProcessor(process_thoughts=True)` (defaults to `False`) + - `ThoughtTranscriptionMessage`, which is now also emitted with the + `"on_transcript_update"` event + (PR [#3175](https://github.com/pipecat-ai/pipecat/pull/3175)) + +- Data and control frames can now be marked as non-interruptible by using the + `UninterruptibleFrame` mixin. Frames marked as `UninterruptibleFrame` will + not be interrupted during processing, and any queued frames of this type will + be retained in the internal queues. This is useful when you need ordered + frames (data or control) that should not be discarded or cancelled due to + interruptions. + (PR [#3189](https://github.com/pipecat-ai/pipecat/pull/3189)) + +- Added `on_conversation_detected` event to `VoicemaiDetector`. + (PR [#3207](https://github.com/pipecat-ai/pipecat/pull/3207)) + +- Added `x-goog-api-client` header with Pipecat's version to all Google + services' requests. + (PR [#3208](https://github.com/pipecat-ai/pipecat/pull/3208)) + +- Added support for the HeyGen LiveAvatar API (see https://www.liveavatar.com/). + (PR [#3210](https://github.com/pipecat-ai/pipecat/pull/3210)) + +- Added to `AWSNovaSonicLLMService` functionality related to the new (and now + default) Nova 2 Sonic model (`"amazon.nova-2-sonic-v1:0"`): + - Added the `endpointing_sensitivity` parameter to control how quickly the + model decides the user has stopped speaking. + - Made the assistant-response-trigger hack a no-op. It's only needed for + the older Nova Sonic model. + (PR [#3212](https://github.com/pipecat-ai/pipecat/pull/3212)) + +- [Ultravox Realtime](https://docs.ultravox.ai) is now a supported + speech-to-speech service. + - Added `UltravoxRealtimeLLMService` for the integration. + - Added `49-ultravox-realtime.py` example (with tool calling). + (PR [#3227](https://github.com/pipecat-ai/pipecat/pull/3227)) + +- Added Daily PSTN dial-in support to the development runner with `--dialin` + flag. This includes: + + - `/daily-dialin-webhook` endpoint that handles incoming Daily PSTN webhooks + - Automatic Daily room creation with SIP configuration + - `DialinSettings` and `DailyDialinRequest` types in `pipecat.runner.types` + for type-safe dial-in data + - The runner now mimics Pipecat Cloud's dial-in webhook handling for local + development + (PR [#3235](https://github.com/pipecat-ai/pipecat/pull/3235)) + +- Add Gladia session id to logs for `GladiaSTTService`. + (PR [#3236](https://github.com/pipecat-ai/pipecat/pull/3236)) + +- Added `InworldHttpTTSService` which uses Inworld's HTTP based TTS service in + either streaming or non-streaming mode. Note: This class was previously named + `InworldTTSService`. + (PR [#3239](https://github.com/pipecat-ai/pipecat/pull/3239)) + +- Added `language_hints_strict` parameter to `SonioxSTTService` to strictly + enforces language hints. This ensures that transcription occurs in the + specified language. + (PR [#3245](https://github.com/pipecat-ai/pipecat/pull/3245)) + +- Added Pipecat library version info to the `about` field in the `bot-ready` + RTVI message. + (PR [#3248](https://github.com/pipecat-ai/pipecat/pull/3248)) + +- Added `VisionFullResponseStartFrame`, `VisionFullResponseEndFrame` and + `VisionTextFrame`. This are used by vision services similar to LLM + services. + (PR [#3252](https://github.com/pipecat-ai/pipecat/pull/3252)) + +### Changed + +- `FunctionCallInProgressFrame` and `FunctionCallResultFrame` have changed from + system frames to a control frame and a data frame, respectively, and are + now both marked as `UninterruptibleFrame`. + (PR [#3189](https://github.com/pipecat-ai/pipecat/pull/3189)) + +- `UserBotLatencyLogObserver` now uses `VADUserStartedSpeakingFrame` and + `VADUserStoppedSpeakingFrame` to determine latency from user stopped speaking + to bot started speaking. + (PR [#3206](https://github.com/pipecat-ai/pipecat/pull/3206)) + +- Updated `HeyGenVideoService` and `HeyGenTransport` to support both HeyGen + APIs (Interactive Avatar and Live Avatar). + Using them is as simple as specifying the `service_type` when creating the + `HeyGenVideoService` and the `HeyGenTransport`: + ```python + heyGen = HeyGenVideoService( + api_key=os.getenv("HEYGEN_LIVE_AVATAR_API_KEY"), + service_type=ServiceType.LIVE_AVATAR, + session=session, + ) + ``` + (PR [#3210](https://github.com/pipecat-ai/pipecat/pull/3210)) + +- Made `"amazon.nova-2-sonic-v1:0"` the new default model for + `AWSNovaSonicLLMService`. + (PR [#3212](https://github.com/pipecat-ai/pipecat/pull/3212)) + +- Updated the `run_inference` methods in the LLM service classes + (`AnthropicLLMService`, `AWSBedrockLLMService`, `GoogleLLMService`, and + `OpenAILLMService` and its base classes) to use the provided LLM + configuration parameters. + (PR [#3214](https://github.com/pipecat-ai/pipecat/pull/3214)) + +- Updated default models for: + - `GeminiLiveLLMService` to `gemini-2.5-flash-native-audio-preview-12-2025`. + - `GeminiLiveVertexLLMService` to `gemini-live-2.5-flash-native-audio`. + (PR [#3228](https://github.com/pipecat-ai/pipecat/pull/3228)) + +- Changed the `reason` field in `EndFrame`, `CancelFrame`, `EndTaskFrame`, and + `CancelTaskFrame` from `str` to `Any` to indicate that it can hold values + other than strings. + (PR [#3231](https://github.com/pipecat-ai/pipecat/pull/3231)) + +- Updated websocket STT services to use the `WebsocketSTTService` base class. + This base class manages the websocket connection and handles reconnects. + Updated services: + - `AssemblyAISTTService` + - `AWSTranscribeSTTService` + - `GladiaSTTService` + - `SonioxSTTService` + (PR [#3236](https://github.com/pipecat-ai/pipecat/pull/3236)) + +- Changed Inworld's TTS service implementations: + - Previously, the HTTP implementation was named `InworldTTSService`. That + has been moved to `InworldHttpTTSService`. This service now supports + word-timestamp alignment data in both streaming and non-streaming modes. + - Updated the `InworldTTSService` class to use Inworld's Websocket API. + This class now has support for word-timestamp alignment data and tracks + contexts for each user turn. + (PR [#3239](https://github.com/pipecat-ai/pipecat/pull/3239)) + +- ⚠️ Breaking change: `WordTTSService.start_word_timestamps()` and + `WordTTSService.reset_word_timestamps()` are now async. + (PR [#3240](https://github.com/pipecat-ai/pipecat/pull/3240)) + +- Updated the current RTVI version to 1.1.0 to reflect recent additions and + deprecations. + - New RTVI Messages: `send-text` and `bot-output` + - Deprecated Messages: `append-to-context` and `bot-transcription` + (PR [#3248](https://github.com/pipecat-ai/pipecat/pull/3248)) + +- `MoondreamService` now pushes `VisionFullResponseStartFrame`, + `VisionFullResponseEndFrame` and `VisionTextFrame`. + (PR [#3252](https://github.com/pipecat-ai/pipecat/pull/3252)) + +### Deprecated + +- `FalSmartTurnAnalyzer` and `LocalSmartTurnAnalyzer` are deprecated and will + be removed in a future version. Use `LocalSmartTurnAnalyzerV3` instead. + (PR [#3219](https://github.com/pipecat-ai/pipecat/pull/3219)) + +### Removed + +- Removed the deprecated VLLM-based open source Ultravox STT service. + (PR [#3227](https://github.com/pipecat-ai/pipecat/pull/3227)) + +### Fixed + +- Fixed a bug in `AWSNovaSonicLLMService` where we would mishandle cancelled + tool calls in the context, resulting in errors. + (PR [#3212](https://github.com/pipecat-ai/pipecat/pull/3212)) + +- Better support conversation history with Gemini 2.5 Flash Image (model + "gemini-2.5-flash-image"). Prior to this fix, the model had no memory of + previous images it had generated, so it wouldn't be able to iterate on + them. + (PR [#3224](https://github.com/pipecat-ai/pipecat/pull/3224)) + +- Support conversations with Gemini 3 Pro Image (model + "gemini-3-pro-image-preview"). Prior to this fix, after the model generated + an image the conversation would not be able to progress. + (PR [#3224](https://github.com/pipecat-ai/pipecat/pull/3224)) + +- Fixed an issue where `ElevenLabsHttpTTSService` was not updating + voice settings when receiving a `TTSUpdateSettingsFrame`. + (PR [#3226](https://github.com/pipecat-ai/pipecat/pull/3226)) + +- Fixed the return type for `SmallWebRTCRequestHandler.handle_web_request()` + function. + (PR [#3230](https://github.com/pipecat-ai/pipecat/pull/3230)) + +- Fix a bug in LLM context audio content handling + (PR [#3234](https://github.com/pipecat-ai/pipecat/pull/3234)) + +- In `GladiaSTTService`, reset the `_bytes_sent` counter on connecting the + websocket. This avoids unnecessary audio buffer trimming. + (PR [#3236](https://github.com/pipecat-ai/pipecat/pull/3236)) + +- Fixed a TTS service word-timestamp issue that could cause generated + `TTSTextFrame` instances to have an incorrect pts (`pts = -1`). + (PR [#3240](https://github.com/pipecat-ai/pipecat/pull/3240)) + +- Fixed an issue in `SimpleTextAggreagtor` where spaces were not being stripped + before returning the aggregation. This resulted in an extra space for TTS + services that don't support word-timestamp alignment data. + (PR [#3247](https://github.com/pipecat-ai/pipecat/pull/3247)) + ## [0.0.97] - 2025-12-05 ### Added diff --git a/changelog/3085.added.md b/changelog/3085.added.md deleted file mode 100644 index c1effe44b..000000000 --- a/changelog/3085.added.md +++ /dev/null @@ -1,2 +0,0 @@ -- Added `RimeNonJsonTTSService` which supports non-JSON streaming mode. This new class supports websocket streaming for the Arcana model. - diff --git a/changelog/3175.added.md b/changelog/3175.added.md deleted file mode 100644 index 60557f84e..000000000 --- a/changelog/3175.added.md +++ /dev/null @@ -1,29 +0,0 @@ -- Added additional functionality related to "thinking", for Google and Anthropic - LLMs. - - 1. New typed parameters for Google and Anthropic LLMs that control the - models' thinking behavior (like how much thinking to do, and whether to - output thoughts or thought summaries): - - `AnthropicLLMService.ThinkingConfig` - - `GoogleLLMService.ThinkingConfig` - 2. New frames for representing thoughts output by LLMs: - - `LLMThoughtStartFrame` - - `LLMThoughtTextFrame` - - `LLMThoughtEndFrame` - 3. A generic mechanism for recording LLM thoughts to context, used - specifically to support Anthropic, whose thought signatures are expected to - appear alongside the text of the thoughts within assistant context - messages. See: - - `LLMThoughtEndFrame.signature` - - `LLMAssistantAggregator` handling of the above field - - `AnthropicLLMAdapter` handling of `"thought"` context messages - 4. Google-specific logic for inserting thought signatures into the context, - to help maintain thinking continuity in a chain of LLM calls. See: - - `GoogleLLMService` sending `LLMMessagesAppendFrame`s to add LLM-specific - `"thought_signature"` messages to context - - `GeminiLLMAdapter` handling of `"thought_signature"` messages - 5. An expansion of `TranscriptProcessor` to process LLM thoughts in addition - to user and assistant utterances. See: - - `TranscriptProcessor(process_thoughts=True)` (defaults to `False`) - - `ThoughtTranscriptionMessage`, which is now also emitted with the - `"on_transcript_update"` event diff --git a/changelog/3189.added.md b/changelog/3189.added.md deleted file mode 100644 index 5dbcb6058..000000000 --- a/changelog/3189.added.md +++ /dev/null @@ -1,6 +0,0 @@ -- Data and control frames can now be marked as non-interruptible by using the - `UninterruptibleFrame` mixin. Frames marked as `UninterruptibleFrame` will not - be interrupted during processing, and any queued frames of this type will be - retained in the internal queues. This is useful when you need ordered frames - (data or control) that should not be discarded or cancelled due to - interruptions. diff --git a/changelog/3189.changed.md b/changelog/3189.changed.md deleted file mode 100644 index f8f24a856..000000000 --- a/changelog/3189.changed.md +++ /dev/null @@ -1,3 +0,0 @@ -- `FunctionCallInProgressFrame` and `FunctionCallResultFrame` have changed from - system frames to a control frame and a data frame, respectively, and are now - both marked as `UninterruptibleFrame`. diff --git a/changelog/3206.changed.md b/changelog/3206.changed.md deleted file mode 100644 index 01c3e007a..000000000 --- a/changelog/3206.changed.md +++ /dev/null @@ -1,3 +0,0 @@ -- `UserBotLatencyLogObserver` now uses `VADUserStartedSpeakingFrame` and -`VADUserStoppedSpeakingFrame` to determine latency from user stopped speaking -to bot started speaking. diff --git a/changelog/3207.added.md b/changelog/3207.added.md deleted file mode 100644 index 3b480a561..000000000 --- a/changelog/3207.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `on_conversation_detected` event to `VoicemaiDetector`. diff --git a/changelog/3208.added.md b/changelog/3208.added.md deleted file mode 100644 index 0cda1dafb..000000000 --- a/changelog/3208.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `x-goog-api-client` header with Pipecat's version to all Google services' requests. diff --git a/changelog/3210.added.md b/changelog/3210.added.md deleted file mode 100644 index 7c994438d..000000000 --- a/changelog/3210.added.md +++ /dev/null @@ -1,2 +0,0 @@ -- Added support for the HeyGen LiveAvatar API - (see https://www.liveavatar.com/). \ No newline at end of file diff --git a/changelog/3210.changed.md b/changelog/3210.changed.md deleted file mode 100644 index 17ec79a62..000000000 --- a/changelog/3210.changed.md +++ /dev/null @@ -1,10 +0,0 @@ -- Updated `HeyGenVideoService` and `HeyGenTransport` to support both HeyGen APIs (Interactive Avatar and Live Avatar). - Using them is as simple as specifying the `service_type` when creating the `HeyGenVideoService` and the `HeyGenTransport`: - ```python - heyGen = HeyGenVideoService( - api_key=os.getenv("HEYGEN_LIVE_AVATAR_API_KEY"), - service_type=ServiceType.LIVE_AVATAR, - session=session, - ) - ``` - diff --git a/changelog/3212.added.md b/changelog/3212.added.md deleted file mode 100644 index 042f926e3..000000000 --- a/changelog/3212.added.md +++ /dev/null @@ -1,6 +0,0 @@ -- Added to `AWSNovaSonicLLMService` functionality related to the new (and now - default) Nova 2 Sonic model (`"amazon.nova-2-sonic-v1:0"`): - - Added the `endpointing_sensitivity` parameter to control how quickly the - model decides the user has stopped speaking. - - Made the assistant-response-trigger hack a no-op. It's only needed for the - older Nova Sonic model. diff --git a/changelog/3212.changed.md b/changelog/3212.changed.md deleted file mode 100644 index b63fc16ce..000000000 --- a/changelog/3212.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Made `"amazon.nova-2-sonic-v1:0"` the new default model for `AWSNovaSonicLLMService`. diff --git a/changelog/3212.fixed.md b/changelog/3212.fixed.md deleted file mode 100644 index 73b4acac7..000000000 --- a/changelog/3212.fixed.md +++ /dev/null @@ -1,2 +0,0 @@ -- Fixed a bug in `AWSNovaSonicLLMService` where we would mishandle cancelled - tool calls in the context, resulting in errors. diff --git a/changelog/3214.changed.md b/changelog/3214.changed.md deleted file mode 100644 index 203f675d8..000000000 --- a/changelog/3214.changed.md +++ /dev/null @@ -1 +0,0 @@ -- Updated the `run_inference` methods in the LLM service classes (`AnthropicLLMService`, `AWSBedrockLLMService`, `GoogleLLMService`, and `OpenAILLMService` and its base classes) to use the provided LLM configuration parameters. diff --git a/changelog/3219.deprecated.md b/changelog/3219.deprecated.md deleted file mode 100644 index 65dc3aa17..000000000 --- a/changelog/3219.deprecated.md +++ /dev/null @@ -1 +0,0 @@ -- `FalSmartTurnAnalyzer` and `LocalSmartTurnAnalyzer` are deprecated and will be removed in a future version. Use `LocalSmartTurnAnalyzerV3` instead. diff --git a/changelog/3224.fixed.2.md b/changelog/3224.fixed.2.md deleted file mode 100644 index ad54a9572..000000000 --- a/changelog/3224.fixed.2.md +++ /dev/null @@ -1,3 +0,0 @@ -- Better support conversation history with Gemini 2.5 Flash Image (model - "gemini-2.5-flash-image"). Prior to this fix, the model had no memory of - previous images it had generated, so it wouldn't be able to iterate on them. diff --git a/changelog/3224.fixed.md b/changelog/3224.fixed.md deleted file mode 100644 index ddae072cf..000000000 --- a/changelog/3224.fixed.md +++ /dev/null @@ -1,3 +0,0 @@ -- Support conversations with Gemini 3 Pro Image (model - "gemini-3-pro-image-preview"). Prior to this fix, after the model generated - an image the conversation would not be able to progress. diff --git a/changelog/3226.fixed.md b/changelog/3226.fixed.md deleted file mode 100644 index e71ec52c2..000000000 --- a/changelog/3226.fixed.md +++ /dev/null @@ -1,2 +0,0 @@ -- Fixed an issue where `ElevenLabsHttpTTSService` was not updating - voice settings when receiving a `TTSUpdateSettingsFrame`. diff --git a/changelog/3227.added.md b/changelog/3227.added.md deleted file mode 100644 index ba197ace0..000000000 --- a/changelog/3227.added.md +++ /dev/null @@ -1,4 +0,0 @@ -- [Ultravox Realtime](https://docs.ultravox.ai) is now a supported speech-to-speech - service. - - Added `UltravoxRealtimeLLMService` for the integration. - - Added `49-ultravox-realtime.py` example (with tool calling). diff --git a/changelog/3227.removed.md b/changelog/3227.removed.md deleted file mode 100644 index e54a7725e..000000000 --- a/changelog/3227.removed.md +++ /dev/null @@ -1 +0,0 @@ -- Removed the deprecated VLLM-based open source Ultravox STT service. diff --git a/changelog/3228.changed.md b/changelog/3228.changed.md deleted file mode 100644 index 5444eef28..000000000 --- a/changelog/3228.changed.md +++ /dev/null @@ -1,4 +0,0 @@ -- Updated default models for: - - - `GeminiLiveLLMService` to `gemini-2.5-flash-native-audio-preview-12-2025`. - - `GeminiLiveVertexLLMService` to `gemini-live-2.5-flash-native-audio`. diff --git a/changelog/3230.fixed.md b/changelog/3230.fixed.md deleted file mode 100644 index e42f914e7..000000000 --- a/changelog/3230.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed the return type for `SmallWebRTCRequestHandler.handle_web_request()` function. diff --git a/changelog/3231.changed.md b/changelog/3231.changed.md deleted file mode 100644 index 02de3cf8f..000000000 --- a/changelog/3231.changed.md +++ /dev/null @@ -1,3 +0,0 @@ -- Changed the `reason` field in `EndFrame`, `CancelFrame`, `EndTaskFrame`, and - `CancelTaskFrame` from `str` to `Any` to indicate that it can hold values - other than strings. diff --git a/changelog/3234.fixed.md b/changelog/3234.fixed.md deleted file mode 100644 index 022241279..000000000 --- a/changelog/3234.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fix a bug in LLM context audio content handling \ No newline at end of file diff --git a/changelog/3235.added.md b/changelog/3235.added.md deleted file mode 100644 index ed0eb378e..000000000 --- a/changelog/3235.added.md +++ /dev/null @@ -1,6 +0,0 @@ -- Added Daily PSTN dial-in support to the development runner with `--dialin` flag. This includes: - - - `/daily-dialin-webhook` endpoint that handles incoming Daily PSTN webhooks - - Automatic Daily room creation with SIP configuration - - `DialinSettings` and `DailyDialinRequest` types in `pipecat.runner.types` for type-safe dial-in data - - The runner now mimics Pipecat Cloud's dial-in webhook handling for local development diff --git a/changelog/3236.added.md b/changelog/3236.added.md deleted file mode 100644 index ace865739..000000000 --- a/changelog/3236.added.md +++ /dev/null @@ -1 +0,0 @@ -- Add Gladia session id to logs for `GladiaSTTService`. diff --git a/changelog/3236.changed.md b/changelog/3236.changed.md deleted file mode 100644 index 7c5015ead..000000000 --- a/changelog/3236.changed.md +++ /dev/null @@ -1,6 +0,0 @@ -- Updated websocket STT services to use the `WebsocketSTTService` base class. This base class manages the websocket connection and handles reconnects. Updated services: - - - `AssemblyAISTTService` - - `AWSTranscribeSTTService` - - `GladiaSTTService` - - `SonioxSTTService` diff --git a/changelog/3236.fixed.md b/changelog/3236.fixed.md deleted file mode 100644 index 7814e80f8..000000000 --- a/changelog/3236.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- In `GladiaSTTService`, reset the `_bytes_sent` counter on connecting the websocket. This avoids unnecessary audio buffer trimming. diff --git a/changelog/3239.added.md b/changelog/3239.added.md deleted file mode 100644 index f91a6a903..000000000 --- a/changelog/3239.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `InworldHttpTTSService` which uses Inworld's HTTP based TTS service in either streaming or non-streaming mode. Note: This class was previously named `InworldTTSService`. diff --git a/changelog/3239.changed.md b/changelog/3239.changed.md deleted file mode 100644 index 7a72d85a1..000000000 --- a/changelog/3239.changed.md +++ /dev/null @@ -1,8 +0,0 @@ -- Changed Inworld's TTS service implementations: - - - Previously, the HTTP implementation was named `InworldTTSService`. That has - been moved to `InworldHttpTTSService`. This service now supports - word-timestamp alignment data in both streaming and non-streaming modes. - - Updated the `InworldTTSService` class to use Inworld's Websocket API. This - class now has support for word-timestamp alignment data and tracks contexts - for each user turn. diff --git a/changelog/3240.changed.md b/changelog/3240.changed.md deleted file mode 100644 index a5ee377c9..000000000 --- a/changelog/3240.changed.md +++ /dev/null @@ -1,2 +0,0 @@ -- ⚠️ Breaking change: `WordTTSService.start_word_timestamps()` and - `WordTTSService.reset_word_timestamps()` are now async. diff --git a/changelog/3240.fixed.md b/changelog/3240.fixed.md deleted file mode 100644 index caff36b55..000000000 --- a/changelog/3240.fixed.md +++ /dev/null @@ -1,2 +0,0 @@ -- Fixed a TTS service word-timestamp issue that could cause generated - `TTSTextFrame` instances to have an incorrect pts (`pts = -1`). diff --git a/changelog/3245.added.md b/changelog/3245.added.md deleted file mode 100644 index b24736ae9..000000000 --- a/changelog/3245.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added `language_hints_strict` parameter to `SonioxSTTService` to strictly enforces language hints. This ensures that transcription occurs in the specified language. diff --git a/changelog/3247.fixed.md b/changelog/3247.fixed.md deleted file mode 100644 index a3b5d80f7..000000000 --- a/changelog/3247.fixed.md +++ /dev/null @@ -1 +0,0 @@ -- Fixed an issue in `SimpleTextAggreagtor` where spaces were not being stripped before returning the aggregation. This resulted in an extra space for TTS services that don't support word-timestamp alignment data. diff --git a/changelog/3248.added.md b/changelog/3248.added.md deleted file mode 100644 index f3ef6d316..000000000 --- a/changelog/3248.added.md +++ /dev/null @@ -1 +0,0 @@ -- Added Pipecat library version info to the `about` field in the `bot-ready` RTVI message. diff --git a/changelog/3248.changed.md b/changelog/3248.changed.md deleted file mode 100644 index e2ea6ba32..000000000 --- a/changelog/3248.changed.md +++ /dev/null @@ -1,3 +0,0 @@ -- Updated the current RTVI version to 1.1.0 to reflect recent additions and deprecations. - - New RTVI Messages: `send-text` and `bot-output` - - Deprecated Messages: `append-to-context` and `bot-transcription` \ No newline at end of file diff --git a/changelog/3252.added.md b/changelog/3252.added.md deleted file mode 100644 index c6f85b713..000000000 --- a/changelog/3252.added.md +++ /dev/null @@ -1,2 +0,0 @@ -- Added `VisionFullResponseStartFrame`, `VisionFullResponseEndFrame` and - `VisionTextFrame`. This are used by vision services similar to LLM services. diff --git a/changelog/3252.changed.md b/changelog/3252.changed.md deleted file mode 100644 index 11cdca2ff..000000000 --- a/changelog/3252.changed.md +++ /dev/null @@ -1,2 +0,0 @@ -- `MoondreamService` now pushes `VisionFullResponseStartFrame`, - `VisionFullResponseEndFrame` and `VisionTextFrame`.