rtvi: minor comment add

Merge pull request #2035 from smokyabdulrahman/feat/aws-polly-lexicon-names-support
Support AWS Polly Lexicon Names parameter
2025-06-20 11:12:40 -05:00 · 2025-06-20 10:03:27 -04:00 · 2025-06-20 14:15:24 +01:00 · 2025-06-20 09:47:46 +01:00 · 2025-06-19 14:42:47 -07:00 · 2025-06-19 14:26:34 -07:00
5 changed files with 28 additions and 27 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Added

+- Added `lexicon_names` parameter to `AWSPollyTTSService.InputParams`.
+
 - Added reconnection logic and audio buffer management to `GladiaSTTService`.

 - Added Polish support to `AWSTranscribeSTTService`.
@@ -37,21 +39,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 - Upgraded `daily-python` to 0.19.3.

-### Deprecated
-
- `AudioBufferProcessor` parameter `user_continuos_stream` is deprecated.
-
 ### Fixed

- Fixed an `AudioBufferProcessor` issue that was causing crackling on the audio
-  stream with lower sample rate (due to upsampling the other stream). We now
-  record with the lowest sample rate to avoid upsampling.
+- Fixed function calling in `AWSNovaSonicLLMService`.

 - Fixed an issue that would cause multiple `PipelineTask.on_idle_timeout`
  events to be triggered repeatedly.

- Fixed an `AudioBufferProcessor` issue that was causing user and bot speech to
-  not be synchronized during recordings.
+- Fixed an issue that was causing user and bot speech to not be synchronized
+  during recordings.

 - Fixed an issue where voice settings weren't applied to ElevenLabsTTSService.

@@ -63,6 +59,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 - Fixed an issue where `GoogleLLMService`'s TTFB value was incorrect.

+### Deprecated
+
+- `AudioBufferProcessor` parameter `user_continuos_stream` is deprecated.
+
 ### Other

 - Rename `14e-function-calling-gemini.py` to `14e-function-calling-google.py`.
--- a/src/pipecat/processors/audio/audio_buffer_processor.py
+++ b/src/pipecat/processors/audio/audio_buffer_processor.py
@@ -7,8 +7,6 @@
 import time
 from typing import Optional

-from loguru import logger
-
 from pipecat.audio.utils import create_default_resampler, interleave_stereo_audio, mix_audio
 from pipecat.frames.frames import (
    AudioRawFrame,
@@ -183,14 +181,7 @@ class AudioBufferProcessor(FrameProcessor):
        await self.push_frame(frame, direction)

    def _update_sample_rate(self, frame: StartFrame):
-        # Record to the minimum sample rate to avoid possible downsampling
-        # artifacts.
-        min_sample_rate = min(frame.audio_in_sample_rate, frame.audio_out_sample_rate)
-        if frame.audio_in_sample_rate != frame.audio_out_sample_rate:
-            logger.debug(
-                f"{self} Input and output sample rates don't match, recording with smaller sample rate: {min_sample_rate} (this might get fixed in the future)"
-            )
-        self._sample_rate = self._init_sample_rate or min_sample_rate
+        self._sample_rate = self._init_sample_rate or frame.audio_out_sample_rate
        self._audio_buffer_size_1s = self._sample_rate * 2

    async def _process_recording(self, frame: Frame):
--- a/src/pipecat/processors/frameworks/rtvi.py
+++ b/src/pipecat/processors/frameworks/rtvi.py
@@ -400,6 +400,8 @@ class RTVIObserverParams:
    """
    Parameters for configuring RTVI Observer behavior.

+    Protip: Set these all to `False` if the bot will talk to another bot.
+
    Attributes:
        bot_llm_enabled (bool): Indicates if the bot's LLM messages should be sent.
        bot_tts_enabled (bool): Indicates if the bot's TTS messages should be sent.
@@ -802,7 +804,7 @@ class RTVIProcessor(FrameProcessor):
            await self._message_queue.put(message)
        except ValidationError as e:
            await self.send_error(f"Invalid RTVI transport message: {e}")
-            logger.warning(f"Invalid RTVI transport message: {e}")
+            logger.warning(f"Invalid RTVI transport message '{transport_message}': {e}")

    async def _handle_message(self, message: RTVIMessage):
        try:
--- a/src/pipecat/services/aws/tts.py
+++ b/src/pipecat/services/aws/tts.py
@@ -6,7 +6,7 @@

 import asyncio
 import os
-from typing import AsyncGenerator, Optional
+from typing import AsyncGenerator, List, Optional

 from loguru import logger
 from pydantic import BaseModel
@@ -115,6 +115,7 @@ class AWSPollyTTSService(TTSService):
        pitch: Optional[str] = None
        rate: Optional[str] = None
        volume: Optional[str] = None
+        lexicon_names: Optional[List[str]] = None

    def __init__(
        self,
@@ -147,6 +148,7 @@ class AWSPollyTTSService(TTSService):
            "pitch": params.pitch,
            "rate": params.rate,
            "volume": params.volume,
+            "lexicon_names": params.lexicon_names,
        }

        self._resampler = create_default_resampler()
@@ -235,6 +237,7 @@ class AWSPollyTTSService(TTSService):
                "Engine": self._settings["engine"],
                # AWS only supports 8000 and 16000 for PCM. We select 16000.
                "SampleRate": "16000",
+                "LexiconNames": self._settings["lexicon_names"],
            }

            # Filter out None values
--- a/src/pipecat/services/aws_nova_sonic/aws.py
+++ b/src/pipecat/services/aws_nova_sonic/aws.py
@@ -25,6 +25,7 @@ from pipecat.frames.frames import (
    CancelFrame,
    EndFrame,
    Frame,
+    FunctionCallFromLLM,
    InputAudioRawFrame,
    InterimTranscriptionFrame,
    LLMFullResponseEndFrame,
@@ -804,12 +805,16 @@ class AWSNovaSonicLLMService(LLMService):
        # Call tool function
        if self.has_function(function_name):
            if function_name in self._functions.keys() or None in self._functions.keys():
-                await self.call_function(
-                    context=self._context,
-                    tool_call_id=tool_call_id,
-                    function_name=function_name,
-                    arguments=arguments,
-                )
+                function_calls_llm = [
+                    FunctionCallFromLLM(
+                        context=self._context,
+                        tool_call_id=tool_call_id,
+                        function_name=function_name,
+                        arguments=arguments,
+                    )
+                ]
+
+                await self.run_function_calls(function_calls_llm)
        else:
            raise AWSNovaSonicUnhandledFunctionException(
                f"The LLM tried to call a function named '{function_name}', but there isn't a callback registered for that function."
Author	SHA1	Message	Date
vipyne	5d67006121	rtvi: minor comment add	2025-06-20 11:12:40 -05:00
Mark Backman	8c6a441064	Merge pull request #2035 from smokyabdulrahman/feat/aws-polly-lexicon-names-support Support AWS Polly Lexicon Names parameter	2025-06-20 10:03:27 -04:00
Alrahma	fddc058ce2	add CHANGELOG entry	2025-06-20 14:15:24 +01:00
Alrahma	89750086c5	Support AWS Polly Lexicon Names parameter Documentation reference [AWS Managing Lexicons](https://docs.aws.amazon.com/polly/latest/dg/managing-lexicons.html)	2025-06-20 09:47:46 +01:00
Aleix Conchillo Flaqué	e69406c7e2	Merge pull request #2032 from pipecat-ai/aleix/aws-nova-sonic-function-calls AWSNovaSonicLLMService: fix function calling	2025-06-19 14:42:47 -07:00
Aleix Conchillo Flaqué	878ae42d84	AWSNovaSonicLLMService: fix function calling	2025-06-19 14:26:34 -07:00