Compare commits
6 Commits
aleix/audi
...
vp-rtvi-er
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5d67006121 | ||
|
|
8c6a441064 | ||
|
|
fddc058ce2 | ||
|
|
89750086c5 | ||
|
|
e69406c7e2 | ||
|
|
878ae42d84 |
18
CHANGELOG.md
18
CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Added
|
||||
|
||||
- Added `lexicon_names` parameter to `AWSPollyTTSService.InputParams`.
|
||||
|
||||
- Added reconnection logic and audio buffer management to `GladiaSTTService`.
|
||||
|
||||
- Added Polish support to `AWSTranscribeSTTService`.
|
||||
@@ -37,21 +39,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
- Upgraded `daily-python` to 0.19.3.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `AudioBufferProcessor` parameter `user_continuos_stream` is deprecated.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an `AudioBufferProcessor` issue that was causing crackling on the audio
|
||||
stream with lower sample rate (due to upsampling the other stream). We now
|
||||
record with the lowest sample rate to avoid upsampling.
|
||||
- Fixed function calling in `AWSNovaSonicLLMService`.
|
||||
|
||||
- Fixed an issue that would cause multiple `PipelineTask.on_idle_timeout`
|
||||
events to be triggered repeatedly.
|
||||
|
||||
- Fixed an `AudioBufferProcessor` issue that was causing user and bot speech to
|
||||
not be synchronized during recordings.
|
||||
- Fixed an issue that was causing user and bot speech to not be synchronized
|
||||
during recordings.
|
||||
|
||||
- Fixed an issue where voice settings weren't applied to ElevenLabsTTSService.
|
||||
|
||||
@@ -63,6 +59,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
- Fixed an issue where `GoogleLLMService`'s TTFB value was incorrect.
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `AudioBufferProcessor` parameter `user_continuos_stream` is deprecated.
|
||||
|
||||
### Other
|
||||
|
||||
- Rename `14e-function-calling-gemini.py` to `14e-function-calling-google.py`.
|
||||
|
||||
@@ -7,8 +7,6 @@
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.utils import create_default_resampler, interleave_stereo_audio, mix_audio
|
||||
from pipecat.frames.frames import (
|
||||
AudioRawFrame,
|
||||
@@ -183,14 +181,7 @@ class AudioBufferProcessor(FrameProcessor):
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
def _update_sample_rate(self, frame: StartFrame):
|
||||
# Record to the minimum sample rate to avoid possible downsampling
|
||||
# artifacts.
|
||||
min_sample_rate = min(frame.audio_in_sample_rate, frame.audio_out_sample_rate)
|
||||
if frame.audio_in_sample_rate != frame.audio_out_sample_rate:
|
||||
logger.debug(
|
||||
f"{self} Input and output sample rates don't match, recording with smaller sample rate: {min_sample_rate} (this might get fixed in the future)"
|
||||
)
|
||||
self._sample_rate = self._init_sample_rate or min_sample_rate
|
||||
self._sample_rate = self._init_sample_rate or frame.audio_out_sample_rate
|
||||
self._audio_buffer_size_1s = self._sample_rate * 2
|
||||
|
||||
async def _process_recording(self, frame: Frame):
|
||||
|
||||
@@ -400,6 +400,8 @@ class RTVIObserverParams:
|
||||
"""
|
||||
Parameters for configuring RTVI Observer behavior.
|
||||
|
||||
Protip: Set these all to `False` if the bot will talk to another bot.
|
||||
|
||||
Attributes:
|
||||
bot_llm_enabled (bool): Indicates if the bot's LLM messages should be sent.
|
||||
bot_tts_enabled (bool): Indicates if the bot's TTS messages should be sent.
|
||||
@@ -802,7 +804,7 @@ class RTVIProcessor(FrameProcessor):
|
||||
await self._message_queue.put(message)
|
||||
except ValidationError as e:
|
||||
await self.send_error(f"Invalid RTVI transport message: {e}")
|
||||
logger.warning(f"Invalid RTVI transport message: {e}")
|
||||
logger.warning(f"Invalid RTVI transport message '{transport_message}': {e}")
|
||||
|
||||
async def _handle_message(self, message: RTVIMessage):
|
||||
try:
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from typing import AsyncGenerator, Optional
|
||||
from typing import AsyncGenerator, List, Optional
|
||||
|
||||
from loguru import logger
|
||||
from pydantic import BaseModel
|
||||
@@ -115,6 +115,7 @@ class AWSPollyTTSService(TTSService):
|
||||
pitch: Optional[str] = None
|
||||
rate: Optional[str] = None
|
||||
volume: Optional[str] = None
|
||||
lexicon_names: Optional[List[str]] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -147,6 +148,7 @@ class AWSPollyTTSService(TTSService):
|
||||
"pitch": params.pitch,
|
||||
"rate": params.rate,
|
||||
"volume": params.volume,
|
||||
"lexicon_names": params.lexicon_names,
|
||||
}
|
||||
|
||||
self._resampler = create_default_resampler()
|
||||
@@ -235,6 +237,7 @@ class AWSPollyTTSService(TTSService):
|
||||
"Engine": self._settings["engine"],
|
||||
# AWS only supports 8000 and 16000 for PCM. We select 16000.
|
||||
"SampleRate": "16000",
|
||||
"LexiconNames": self._settings["lexicon_names"],
|
||||
}
|
||||
|
||||
# Filter out None values
|
||||
|
||||
@@ -25,6 +25,7 @@ from pipecat.frames.frames import (
|
||||
CancelFrame,
|
||||
EndFrame,
|
||||
Frame,
|
||||
FunctionCallFromLLM,
|
||||
InputAudioRawFrame,
|
||||
InterimTranscriptionFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
@@ -804,12 +805,16 @@ class AWSNovaSonicLLMService(LLMService):
|
||||
# Call tool function
|
||||
if self.has_function(function_name):
|
||||
if function_name in self._functions.keys() or None in self._functions.keys():
|
||||
await self.call_function(
|
||||
context=self._context,
|
||||
tool_call_id=tool_call_id,
|
||||
function_name=function_name,
|
||||
arguments=arguments,
|
||||
)
|
||||
function_calls_llm = [
|
||||
FunctionCallFromLLM(
|
||||
context=self._context,
|
||||
tool_call_id=tool_call_id,
|
||||
function_name=function_name,
|
||||
arguments=arguments,
|
||||
)
|
||||
]
|
||||
|
||||
await self.run_function_calls(function_calls_llm)
|
||||
else:
|
||||
raise AWSNovaSonicUnhandledFunctionException(
|
||||
f"The LLM tried to call a function named '{function_name}', but there isn't a callback registered for that function."
|
||||
|
||||
Reference in New Issue
Block a user