diff --git a/CHANGELOG.md b/CHANGELOG.md index 7bf00326d..9badb6c70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `DailyTransport` now supports setting the audio bitrate to improve audio + quality through the `DailyParams.audio_out_bitrate` parameter. The new + default is 96kbps. + +- `DailyTransport` now uses the number of audio output channels (1 or 2) to set + mono or stereo audio when needed. + +- Interruptions support has been added to `TwilioFrameSerializer` when using + `FastAPIWebsocketTransport`. + - Added new `LmntTTSService` text-to-speech service. (see https://www.lmnt.com/) @@ -32,6 +42,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 big chunk (i.e. from when the user starts speaking until the user stops speaking) instead of a continous stream. +### Fixed + +- `StartFrame` should be the first frame every processor receives to avoid + situations where things are not initialized (because initialization happens on + `StartFrame`) and other frames come in resulting in undesired behavior. + +### Performance + +- `obj_id()` and `obj_count()` now use `itertools.count` avoiding the need of + `threading.Lock`. + ## [0.0.41] - 2024-08-22 ### Added diff --git a/examples/dialin-chatbot/requirements.txt b/examples/dialin-chatbot/requirements.txt index 38a0a93b0..e59a9c3d2 100644 --- a/examples/dialin-chatbot/requirements.txt +++ b/examples/dialin-chatbot/requirements.txt @@ -3,3 +3,4 @@ fastapi uvicorn python-dotenv twilio +python-multipart diff --git a/src/pipecat/processors/aggregators/llm_response.py b/src/pipecat/processors/aggregators/llm_response.py index 7c38e62ad..ab0552578 100644 --- a/src/pipecat/processors/aggregators/llm_response.py +++ b/src/pipecat/processors/aggregators/llm_response.py @@ -109,7 +109,7 @@ class LLMResponseAggregator(FrameProcessor): await self.push_frame(frame, direction) elif isinstance(frame, self._accumulator_frame): if self._aggregating: - self._aggregation += f" {frame.text}" + self._aggregation += f" {frame.text}" if self._aggregation else frame.text # We have recevied a complete sentence, so if we have seen the # end frame and we were still aggregating, it means we should # send the aggregation. diff --git a/src/pipecat/transports/base_transport.py b/src/pipecat/transports/base_transport.py index 72e609263..083aeac37 100644 --- a/src/pipecat/transports/base_transport.py +++ b/src/pipecat/transports/base_transport.py @@ -32,6 +32,7 @@ class TransportParams(BaseModel): audio_out_is_live: bool = False audio_out_sample_rate: int = 16000 audio_out_channels: int = 1 + audio_out_bitrate: int = 96000 audio_in_enabled: bool = False audio_in_sample_rate: int = 16000 audio_in_channels: int = 1 diff --git a/src/pipecat/transports/services/daily.py b/src/pipecat/transports/services/daily.py index bb6032fa4..7cf330b9e 100644 --- a/src/pipecat/transports/services/daily.py +++ b/src/pipecat/transports/services/daily.py @@ -366,6 +366,12 @@ class DailyTransportClient(EventHandler): } }, } + }, + "microphone": { + "sendSettings": { + "channelConfig": "stereo" if self._params.audio_out_channels == 2 else "mono", + "bitrate": self._params.audio_out_bitrate, + } } }, })