Merge pull request #3007 from pipecat-ai/aleix/pipecat-0.0.93

update CHANGELOG for 0.0.93
This commit is contained in:
Aleix Conchillo Flaqué
2025-11-07 13:25:25 -08:00
committed by GitHub
9 changed files with 44 additions and 36 deletions

View File

@@ -5,11 +5,15 @@ All notable changes to **Pipecat** will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
## [0.0.93] - 2025-11-07
### Added
- Added support for passing in a `ToolsSchem` in lieu of a list of provider-
- Added support for Sarvam Speech-to-Text service (`SarvamSTTService`) with
streaming WebSocket support for `saarika` (STT) and `saaras` (STT-translate)
models.
- Added support for passing in a `ToolsSchema` in lieu of a list of provider-
specific dicts when initializing `OpenAIRealtimeLLMService` or when updating
it using `LLMUpdateSettingsFrame`.
@@ -84,6 +88,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Updated `simli-ai` to 0.1.25.
- Improved `concatenate_aggregated_text()` to one word outputs from OpenAI
Realtime and Gemini Live. Text fragments are now correctly concatenated
without spaces when these patterns are detected.
@@ -114,6 +120,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
supported languages before Pipecat's service classes are updated, while still
providing guidance on verified languages.
### Removed
- Removed `needs_mcp_alternate_schema()` from `LLMService`. The mechanism that
relied on it went away.
### Fixed
- Restore backwards compatibility for vision/image features (broken in 0.0.92)
@@ -137,18 +148,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fixed `GoogleLLMService` token counting to avoid double-counting tokens when
Gemini sends usage metadata across multiple streaming chunks.
### Removed
- Removed `needs_mcp_alternate_schema()` from `LLMService`. The mechanism that
relied on it went away.
## [0.0.92] - 2025-10-31 🎃 "The Haunted Edition" 👻
### Added
- Added supprt for Sarvam Speech-to-Text service (`SarvamSTTService`) with streaming WebSocket
support for `saarika` (STT) and `saaras` (STT-translate) models.
- Added a new `DeepgramHttpTTSService`, which delivers a meaningful reduction
in latency when compared to the `DeepgramTTSService`.

View File

@@ -21,8 +21,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.cartesia.stt import CartesiaSTTService
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
@@ -59,7 +59,7 @@ transport_params = {
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))
tts = CartesiaHttpTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),

View File

@@ -21,8 +21,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.cartesia.stt import CartesiaSTTService
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.deepgram.stt import DeepgramSTTService
from pipecat.services.openai.llm import OpenAILLMService
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams
@@ -58,7 +58,7 @@ transport_params = {
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
logger.info(f"Starting bot")
stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
tts = CartesiaTTSService(
api_key=os.getenv("CARTESIA_API_KEY"),

View File

@@ -77,7 +77,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
messages = [
{
"role": "user",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but try to be brief.",
},
]

View File

@@ -99,9 +99,9 @@ local-smart-turn = [ "coremltools>=8.0", "transformers", "torch>=2.5.0,<3", "tor
local-smart-turn-v3 = [ "transformers", "onnxruntime>=1.20.1,<2" ]
remote-smart-turn = []
silero = [ "onnxruntime>=1.20.1,<2" ]
simli = [ "simli-ai~=0.1.10"]
simli = [ "simli-ai~=0.1.25"]
soniox = [ "pipecat-ai[websockets-base]" ]
soundfile = [ "soundfile~=0.13.0" ]
soundfile = [ "soundfile~=0.13.1" ]
speechmatics = [ "speechmatics-rt>=0.5.0" ]
strands = [ "strands-agents>=1.9.1,<2" ]
tavus=[]

View File

@@ -244,10 +244,10 @@ async def run_eval_pipeline(
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
llm.register_function("assert_eval", eval_runner.assert_eval)
llm.register_function("eval_function", eval_runner.assert_eval)
eval_function = FunctionSchema(
name="assert_eval",
name="eval_function",
description="Called when the user answers a question.",
properties={
"result": {
@@ -272,13 +272,15 @@ async def run_eval_pipeline(
example_prompt, example_image = eval_config.prompt
common_system_prompt = (
"The user might say things other than the answer and that's allowed. "
f"You should only call the eval function when the user: {eval_config.eval}"
"You should only call the eval function if:\n"
"- The user explicitly attempts to answer the question, AND\n"
f"- Their answer can be cleanly evaluated using: {eval_config.eval}\n"
"Ignore greetings, comments, non-answers, or requests for clarification."
)
if eval_config.eval_speaks_first:
system_prompt = f"You are an LLM eval, be extremly brief. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
system_prompt = f"You are an evaluation agent, be extremly brief. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
else:
system_prompt = f"You are an LLM eval, be extremly brief. Your goal is to first ask one question: {example_prompt}. {common_system_prompt}"
system_prompt = f"You are an evaluation agent, be extremly brief. First, ask one question: {example_prompt}. {common_system_prompt}"
messages = [
{

View File

@@ -180,7 +180,7 @@ TESTS_26 = [
("26-gemini-live.py", EVAL_SIMPLE_MATH),
("26a-gemini-live-transcription.py", EVAL_SIMPLE_MATH),
("26b-gemini-live-function-calling.py", EVAL_WEATHER),
("26c-gemini-live-video.py", EVAL_SIMPLE_MATH),
("26c-gemini-live-video.py", EVAL_VISION_CAMERA),
("26e-gemini-live-google-search.py", EVAL_ONLINE_SEARCH),
("26h-gemini-live-vertex-function-calling.py", EVAL_WEATHER),
# Currently not working.

View File

@@ -158,14 +158,17 @@ class SimliVideoService(FrameProcessor):
async def _start_connection(self):
"""Start the connection to Simli service and begin processing tasks."""
if not self._initialized:
await self._simli_client.Initialize()
self._initialized = True
try:
if not self._initialized:
await self._simli_client.Initialize()
self._initialized = True
# Create task to consume and process audio and video
await self._simli_client.sendSilence()
self._audio_task = self.create_task(self._consume_and_process_audio())
self._video_task = self.create_task(self._consume_and_process_video())
# Create task to consume and process audio and video
await self._simli_client.sendSilence()
self._audio_task = self.create_task(self._consume_and_process_audio())
self._video_task = self.create_task(self._consume_and_process_video())
except Exception as e:
logger.error(f"{self}: unable to start connection: {e}")
async def _consume_and_process_audio(self):
"""Consume audio frames from Simli and push them downstream."""

10
uv.lock generated
View File

@@ -4727,8 +4727,8 @@ requires-dist = [
{ name = "resampy", specifier = "~=0.4.3" },
{ name = "sarvamai", marker = "extra == 'sarvam'", specifier = "==0.1.21" },
{ name = "sentry-sdk", marker = "extra == 'sentry'", specifier = ">=2.28.0,<3" },
{ name = "simli-ai", marker = "extra == 'simli'", specifier = "~=0.1.10" },
{ name = "soundfile", marker = "extra == 'soundfile'", specifier = "~=0.13.0" },
{ name = "simli-ai", marker = "extra == 'simli'", specifier = "~=0.1.25" },
{ name = "soundfile", marker = "extra == 'soundfile'", specifier = "~=0.13.1" },
{ name = "soxr", specifier = "~=0.5.0" },
{ name = "speechmatics-rt", marker = "extra == 'speechmatics'", specifier = ">=0.5.0" },
{ name = "strands-agents", marker = "extra == 'strands'", specifier = ">=1.9.1,<2" },
@@ -6496,7 +6496,7 @@ wheels = [
[[package]]
name = "simli-ai"
version = "0.1.19"
version = "0.1.25"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "aiortc" },
@@ -6505,9 +6505,9 @@ dependencies = [
{ name = "numpy" },
{ name = "websockets" },
]
sdist = { url = "https://files.pythonhosted.org/packages/14/cf/bd31b76e00d2770a65081701108a39df2267cb585b0c2a000f71de790ee9/simli_ai-0.1.19.tar.gz", hash = "sha256:2ab8c6ec1e232dbf38c77d3920fe88b01acc7ba8d76b865fb5a3f4af968e3172", size = 12682, upload-time = "2025-09-23T14:14:30.263Z" }
sdist = { url = "https://files.pythonhosted.org/packages/64/6a/b28f90baf76f6a60865985f6233ff44abc72d45b66b76658bff3961e20a7/simli_ai-0.1.25.tar.gz", hash = "sha256:7a00b3426dc26a6a421641072c3e49014b7950c621cf4544152f35c58d13fcff", size = 13182, upload-time = "2025-11-06T16:27:08.862Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/63/86/fe2ed1b9d067634c4e0178c33080655c5c1f5b503fec20ac2af699238afb/simli_ai-0.1.19-py3-none-any.whl", hash = "sha256:35bcff89945dcb5f6171996d16d627e64981888c3134bdec7ce925680a17e058", size = 13233, upload-time = "2025-09-23T14:14:27.756Z" },
{ url = "https://files.pythonhosted.org/packages/ac/57/ae1032fd88214ea4ee6d3028c817c12a999eb90a67766bbab31e9819385a/simli_ai-0.1.25-py3-none-any.whl", hash = "sha256:7d01f65321dc9052f25e15d0463af6a20a86c6d37d9a7b3a2c4b01cbec0a54ed", size = 13651, upload-time = "2025-11-06T16:27:07.765Z" },
]
[[package]]