Merge pull request #3007 from pipecat-ai/aleix/pipecat-0.0.93

update CHANGELOG for 0.0.93
2025-11-07 13:25:25 -08:00
parent 9b1192ca9b 4cf9e1409e
commit 16e2d5b998
9 changed files with 44 additions and 36 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,11 +5,15 @@ All notable changes to **Pipecat** will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

-## [Unreleased]
+## [0.0.93] - 2025-11-07

 ### Added

- Added support for passing in a `ToolsSchem` in lieu of a list of provider-
+- Added support for Sarvam Speech-to-Text service (`SarvamSTTService`) with
+  streaming WebSocket support for `saarika` (STT) and `saaras` (STT-translate)
+  models.
+
+- Added support for passing in a `ToolsSchema` in lieu of a list of provider-
  specific dicts when initializing `OpenAIRealtimeLLMService` or when updating
  it using `LLMUpdateSettingsFrame`.

@@ -84,6 +88,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Changed

+- Updated `simli-ai` to 0.1.25.
+
 - Improved `concatenate_aggregated_text()` to one word outputs from OpenAI
  Realtime and Gemini Live. Text fragments are now correctly concatenated
  without spaces when these patterns are detected.
@@ -114,6 +120,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  supported languages before Pipecat's service classes are updated, while still
  providing guidance on verified languages.

+### Removed
+
+- Removed `needs_mcp_alternate_schema()` from `LLMService`. The mechanism that
+  relied on it went away.
+
 ### Fixed

 - Restore backwards compatibility for vision/image features (broken in 0.0.92)
@@ -137,18 +148,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed `GoogleLLMService` token counting to avoid double-counting tokens when
  Gemini sends usage metadata across multiple streaming chunks.

-### Removed
-
- Removed `needs_mcp_alternate_schema()` from `LLMService`. The mechanism that
-  relied on it went away.
-
 ## [0.0.92] - 2025-10-31 🎃 "The Haunted Edition" 👻

 ### Added

- Added supprt for Sarvam Speech-to-Text service (`SarvamSTTService`) with streaming WebSocket
-  support for `saarika` (STT) and `saaras` (STT-translate) models.
-
 - Added a new `DeepgramHttpTTSService`, which delivers a meaningful reduction
  in latency when compared to the `DeepgramTTSService`.

--- a/examples/foundational/07-interruptible-cartesia-http.py
+++ b/examples/foundational/07-interruptible-cartesia-http.py
@@ -21,8 +21,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
+from pipecat.services.cartesia.stt import CartesiaSTTService
 from pipecat.services.cartesia.tts import CartesiaHttpTTSService
-from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.openai.llm import OpenAILLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
@@ -59,7 +59,7 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
+    stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))

    tts = CartesiaHttpTTSService(
        api_key=os.getenv("CARTESIA_API_KEY"),
--- a/examples/foundational/07-interruptible.py
+++ b/examples/foundational/07-interruptible.py
@@ -21,8 +21,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
 from pipecat.runner.types import RunnerArguments
 from pipecat.runner.utils import create_transport
-from pipecat.services.cartesia.stt import CartesiaSTTService
 from pipecat.services.cartesia.tts import CartesiaTTSService
+from pipecat.services.deepgram.stt import DeepgramSTTService
 from pipecat.services.openai.llm import OpenAILLMService
 from pipecat.transports.base_transport import BaseTransport, TransportParams
 from pipecat.transports.daily.transport import DailyParams
@@ -58,7 +58,7 @@ transport_params = {
 async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info(f"Starting bot")

-    stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))
+    stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))

    tts = CartesiaTTSService(
        api_key=os.getenv("CARTESIA_API_KEY"),
--- a/examples/foundational/14n-function-calling-perplexity.py
+++ b/examples/foundational/14n-function-calling-perplexity.py
@@ -77,7 +77,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    messages = [
        {
            "role": "user",
-            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but try to be brief.",
        },
    ]

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -99,9 +99,9 @@ local-smart-turn = [ "coremltools>=8.0", "transformers", "torch>=2.5.0,<3", "tor
 local-smart-turn-v3 = [ "transformers", "onnxruntime>=1.20.1,<2" ]
 remote-smart-turn = []
 silero = [ "onnxruntime>=1.20.1,<2" ]
-simli = [ "simli-ai~=0.1.10"]
+simli = [ "simli-ai~=0.1.25"]
 soniox = [ "pipecat-ai[websockets-base]" ]
-soundfile = [ "soundfile~=0.13.0" ]
+soundfile = [ "soundfile~=0.13.1" ]
 speechmatics = [ "speechmatics-rt>=0.5.0" ]
 strands = [ "strands-agents>=1.9.1,<2" ]
 tavus=[]
--- a/scripts/evals/eval.py
+++ b/scripts/evals/eval.py
@@ -244,10 +244,10 @@ async def run_eval_pipeline(

    llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))

-    llm.register_function("assert_eval", eval_runner.assert_eval)
+    llm.register_function("eval_function", eval_runner.assert_eval)

    eval_function = FunctionSchema(
-        name="assert_eval",
+        name="eval_function",
        description="Called when the user answers a question.",
        properties={
            "result": {
@@ -272,13 +272,15 @@ async def run_eval_pipeline(
        example_prompt, example_image = eval_config.prompt

    common_system_prompt = (
-        "The user might say things other than the answer and that's allowed. "
-        f"You should only call the eval function when the user: {eval_config.eval}"
+        "You should only call the eval function if:\n"
+        "- The user explicitly attempts to answer the question, AND\n"
+        f"- Their answer can be cleanly evaluated using: {eval_config.eval}\n"
+        "Ignore greetings, comments, non-answers, or requests for clarification."
    )
    if eval_config.eval_speaks_first:
-        system_prompt = f"You are an LLM eval, be extremly brief. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
+        system_prompt = f"You are an evaluation agent, be extremly brief. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
    else:
-        system_prompt = f"You are an LLM eval, be extremly brief. Your goal is to first ask one question: {example_prompt}. {common_system_prompt}"
+        system_prompt = f"You are an evaluation agent, be extremly brief. First, ask one question: {example_prompt}. {common_system_prompt}"

    messages = [
        {
--- a/scripts/evals/run-release-evals.py
+++ b/scripts/evals/run-release-evals.py
@@ -180,7 +180,7 @@ TESTS_26 = [
    ("26-gemini-live.py", EVAL_SIMPLE_MATH),
    ("26a-gemini-live-transcription.py", EVAL_SIMPLE_MATH),
    ("26b-gemini-live-function-calling.py", EVAL_WEATHER),
-    ("26c-gemini-live-video.py", EVAL_SIMPLE_MATH),
+    ("26c-gemini-live-video.py", EVAL_VISION_CAMERA),
    ("26e-gemini-live-google-search.py", EVAL_ONLINE_SEARCH),
    ("26h-gemini-live-vertex-function-calling.py", EVAL_WEATHER),
    # Currently not working.
--- a/src/pipecat/services/simli/video.py
+++ b/src/pipecat/services/simli/video.py
@@ -158,14 +158,17 @@ class SimliVideoService(FrameProcessor):

    async def _start_connection(self):
        """Start the connection to Simli service and begin processing tasks."""
-        if not self._initialized:
-            await self._simli_client.Initialize()
-            self._initialized = True
+        try:
+            if not self._initialized:
+                await self._simli_client.Initialize()
+                self._initialized = True

-        # Create task to consume and process audio and video
-        await self._simli_client.sendSilence()
-        self._audio_task = self.create_task(self._consume_and_process_audio())
-        self._video_task = self.create_task(self._consume_and_process_video())
+            # Create task to consume and process audio and video
+            await self._simli_client.sendSilence()
+            self._audio_task = self.create_task(self._consume_and_process_audio())
+            self._video_task = self.create_task(self._consume_and_process_video())
+        except Exception as e:
+            logger.error(f"{self}: unable to start connection: {e}")

    async def _consume_and_process_audio(self):
        """Consume audio frames from Simli and push them downstream."""
--- a/uv.lock
+++ b/uv.lock
@@ -4727,8 +4727,8 @@ requires-dist = [
    { name = "resampy", specifier = "~=0.4.3" },
    { name = "sarvamai", marker = "extra == 'sarvam'", specifier = "==0.1.21" },
    { name = "sentry-sdk", marker = "extra == 'sentry'", specifier = ">=2.28.0,<3" },
-    { name = "simli-ai", marker = "extra == 'simli'", specifier = "~=0.1.10" },
-    { name = "soundfile", marker = "extra == 'soundfile'", specifier = "~=0.13.0" },
+    { name = "simli-ai", marker = "extra == 'simli'", specifier = "~=0.1.25" },
+    { name = "soundfile", marker = "extra == 'soundfile'", specifier = "~=0.13.1" },
    { name = "soxr", specifier = "~=0.5.0" },
    { name = "speechmatics-rt", marker = "extra == 'speechmatics'", specifier = ">=0.5.0" },
    { name = "strands-agents", marker = "extra == 'strands'", specifier = ">=1.9.1,<2" },
@@ -6496,7 +6496,7 @@ wheels = [

 [[package]]
 name = "simli-ai"
-version = "0.1.19"
+version = "0.1.25"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "aiortc" },
@@ -6505,9 +6505,9 @@ dependencies = [
    { name = "numpy" },
    { name = "websockets" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/14/cf/bd31b76e00d2770a65081701108a39df2267cb585b0c2a000f71de790ee9/simli_ai-0.1.19.tar.gz", hash = "sha256:2ab8c6ec1e232dbf38c77d3920fe88b01acc7ba8d76b865fb5a3f4af968e3172", size = 12682, upload-time = "2025-09-23T14:14:30.263Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/64/6a/b28f90baf76f6a60865985f6233ff44abc72d45b66b76658bff3961e20a7/simli_ai-0.1.25.tar.gz", hash = "sha256:7a00b3426dc26a6a421641072c3e49014b7950c621cf4544152f35c58d13fcff", size = 13182, upload-time = "2025-11-06T16:27:08.862Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/63/86/fe2ed1b9d067634c4e0178c33080655c5c1f5b503fec20ac2af699238afb/simli_ai-0.1.19-py3-none-any.whl", hash = "sha256:35bcff89945dcb5f6171996d16d627e64981888c3134bdec7ce925680a17e058", size = 13233, upload-time = "2025-09-23T14:14:27.756Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/57/ae1032fd88214ea4ee6d3028c817c12a999eb90a67766bbab31e9819385a/simli_ai-0.1.25-py3-none-any.whl", hash = "sha256:7d01f65321dc9052f25e15d0463af6a20a86c6d37d9a7b3a2c4b01cbec0a54ed", size = 13651, upload-time = "2025-11-06T16:27:07.765Z" },
 ]

 [[package]]