Merge pull request #3007 from pipecat-ai/aleix/pipecat-0.0.93
update CHANGELOG for 0.0.93
This commit is contained in:
23
CHANGELOG.md
23
CHANGELOG.md
@@ -5,11 +5,15 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
## [0.0.93] - 2025-11-07
|
||||
|
||||
### Added
|
||||
|
||||
- Added support for passing in a `ToolsSchem` in lieu of a list of provider-
|
||||
- Added support for Sarvam Speech-to-Text service (`SarvamSTTService`) with
|
||||
streaming WebSocket support for `saarika` (STT) and `saaras` (STT-translate)
|
||||
models.
|
||||
|
||||
- Added support for passing in a `ToolsSchema` in lieu of a list of provider-
|
||||
specific dicts when initializing `OpenAIRealtimeLLMService` or when updating
|
||||
it using `LLMUpdateSettingsFrame`.
|
||||
|
||||
@@ -84,6 +88,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Changed
|
||||
|
||||
- Updated `simli-ai` to 0.1.25.
|
||||
|
||||
- Improved `concatenate_aggregated_text()` to one word outputs from OpenAI
|
||||
Realtime and Gemini Live. Text fragments are now correctly concatenated
|
||||
without spaces when these patterns are detected.
|
||||
@@ -114,6 +120,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
supported languages before Pipecat's service classes are updated, while still
|
||||
providing guidance on verified languages.
|
||||
|
||||
### Removed
|
||||
|
||||
- Removed `needs_mcp_alternate_schema()` from `LLMService`. The mechanism that
|
||||
relied on it went away.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Restore backwards compatibility for vision/image features (broken in 0.0.92)
|
||||
@@ -137,18 +148,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Fixed `GoogleLLMService` token counting to avoid double-counting tokens when
|
||||
Gemini sends usage metadata across multiple streaming chunks.
|
||||
|
||||
### Removed
|
||||
|
||||
- Removed `needs_mcp_alternate_schema()` from `LLMService`. The mechanism that
|
||||
relied on it went away.
|
||||
|
||||
## [0.0.92] - 2025-10-31 🎃 "The Haunted Edition" 👻
|
||||
|
||||
### Added
|
||||
|
||||
- Added supprt for Sarvam Speech-to-Text service (`SarvamSTTService`) with streaming WebSocket
|
||||
support for `saarika` (STT) and `saaras` (STT-translate) models.
|
||||
|
||||
- Added a new `DeepgramHttpTTSService`, which delivers a meaningful reduction
|
||||
in latency when compared to the `DeepgramTTSService`.
|
||||
|
||||
|
||||
@@ -21,8 +21,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.stt import CartesiaSTTService
|
||||
from pipecat.services.cartesia.tts import CartesiaHttpTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
@@ -59,7 +59,7 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))
|
||||
|
||||
tts = CartesiaHttpTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
|
||||
@@ -21,8 +21,8 @@ from pipecat.processors.aggregators.llm_context import LLMContext
|
||||
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
||||
from pipecat.runner.types import RunnerArguments
|
||||
from pipecat.runner.utils import create_transport
|
||||
from pipecat.services.cartesia.stt import CartesiaSTTService
|
||||
from pipecat.services.cartesia.tts import CartesiaTTSService
|
||||
from pipecat.services.deepgram.stt import DeepgramSTTService
|
||||
from pipecat.services.openai.llm import OpenAILLMService
|
||||
from pipecat.transports.base_transport import BaseTransport, TransportParams
|
||||
from pipecat.transports.daily.transport import DailyParams
|
||||
@@ -58,7 +58,7 @@ transport_params = {
|
||||
async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
logger.info(f"Starting bot")
|
||||
|
||||
stt = CartesiaSTTService(api_key=os.getenv("CARTESIA_API_KEY"))
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
|
||||
@@ -77,7 +77,7 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way, but try to be brief.",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -99,9 +99,9 @@ local-smart-turn = [ "coremltools>=8.0", "transformers", "torch>=2.5.0,<3", "tor
|
||||
local-smart-turn-v3 = [ "transformers", "onnxruntime>=1.20.1,<2" ]
|
||||
remote-smart-turn = []
|
||||
silero = [ "onnxruntime>=1.20.1,<2" ]
|
||||
simli = [ "simli-ai~=0.1.10"]
|
||||
simli = [ "simli-ai~=0.1.25"]
|
||||
soniox = [ "pipecat-ai[websockets-base]" ]
|
||||
soundfile = [ "soundfile~=0.13.0" ]
|
||||
soundfile = [ "soundfile~=0.13.1" ]
|
||||
speechmatics = [ "speechmatics-rt>=0.5.0" ]
|
||||
strands = [ "strands-agents>=1.9.1,<2" ]
|
||||
tavus=[]
|
||||
|
||||
@@ -244,10 +244,10 @@ async def run_eval_pipeline(
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"))
|
||||
|
||||
llm.register_function("assert_eval", eval_runner.assert_eval)
|
||||
llm.register_function("eval_function", eval_runner.assert_eval)
|
||||
|
||||
eval_function = FunctionSchema(
|
||||
name="assert_eval",
|
||||
name="eval_function",
|
||||
description="Called when the user answers a question.",
|
||||
properties={
|
||||
"result": {
|
||||
@@ -272,13 +272,15 @@ async def run_eval_pipeline(
|
||||
example_prompt, example_image = eval_config.prompt
|
||||
|
||||
common_system_prompt = (
|
||||
"The user might say things other than the answer and that's allowed. "
|
||||
f"You should only call the eval function when the user: {eval_config.eval}"
|
||||
"You should only call the eval function if:\n"
|
||||
"- The user explicitly attempts to answer the question, AND\n"
|
||||
f"- Their answer can be cleanly evaluated using: {eval_config.eval}\n"
|
||||
"Ignore greetings, comments, non-answers, or requests for clarification."
|
||||
)
|
||||
if eval_config.eval_speaks_first:
|
||||
system_prompt = f"You are an LLM eval, be extremly brief. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
|
||||
system_prompt = f"You are an evaluation agent, be extremly brief. You will start the conversation by saying: '{example_prompt}'. {common_system_prompt}"
|
||||
else:
|
||||
system_prompt = f"You are an LLM eval, be extremly brief. Your goal is to first ask one question: {example_prompt}. {common_system_prompt}"
|
||||
system_prompt = f"You are an evaluation agent, be extremly brief. First, ask one question: {example_prompt}. {common_system_prompt}"
|
||||
|
||||
messages = [
|
||||
{
|
||||
|
||||
@@ -180,7 +180,7 @@ TESTS_26 = [
|
||||
("26-gemini-live.py", EVAL_SIMPLE_MATH),
|
||||
("26a-gemini-live-transcription.py", EVAL_SIMPLE_MATH),
|
||||
("26b-gemini-live-function-calling.py", EVAL_WEATHER),
|
||||
("26c-gemini-live-video.py", EVAL_SIMPLE_MATH),
|
||||
("26c-gemini-live-video.py", EVAL_VISION_CAMERA),
|
||||
("26e-gemini-live-google-search.py", EVAL_ONLINE_SEARCH),
|
||||
("26h-gemini-live-vertex-function-calling.py", EVAL_WEATHER),
|
||||
# Currently not working.
|
||||
|
||||
@@ -158,14 +158,17 @@ class SimliVideoService(FrameProcessor):
|
||||
|
||||
async def _start_connection(self):
|
||||
"""Start the connection to Simli service and begin processing tasks."""
|
||||
if not self._initialized:
|
||||
await self._simli_client.Initialize()
|
||||
self._initialized = True
|
||||
try:
|
||||
if not self._initialized:
|
||||
await self._simli_client.Initialize()
|
||||
self._initialized = True
|
||||
|
||||
# Create task to consume and process audio and video
|
||||
await self._simli_client.sendSilence()
|
||||
self._audio_task = self.create_task(self._consume_and_process_audio())
|
||||
self._video_task = self.create_task(self._consume_and_process_video())
|
||||
# Create task to consume and process audio and video
|
||||
await self._simli_client.sendSilence()
|
||||
self._audio_task = self.create_task(self._consume_and_process_audio())
|
||||
self._video_task = self.create_task(self._consume_and_process_video())
|
||||
except Exception as e:
|
||||
logger.error(f"{self}: unable to start connection: {e}")
|
||||
|
||||
async def _consume_and_process_audio(self):
|
||||
"""Consume audio frames from Simli and push them downstream."""
|
||||
|
||||
10
uv.lock
generated
10
uv.lock
generated
@@ -4727,8 +4727,8 @@ requires-dist = [
|
||||
{ name = "resampy", specifier = "~=0.4.3" },
|
||||
{ name = "sarvamai", marker = "extra == 'sarvam'", specifier = "==0.1.21" },
|
||||
{ name = "sentry-sdk", marker = "extra == 'sentry'", specifier = ">=2.28.0,<3" },
|
||||
{ name = "simli-ai", marker = "extra == 'simli'", specifier = "~=0.1.10" },
|
||||
{ name = "soundfile", marker = "extra == 'soundfile'", specifier = "~=0.13.0" },
|
||||
{ name = "simli-ai", marker = "extra == 'simli'", specifier = "~=0.1.25" },
|
||||
{ name = "soundfile", marker = "extra == 'soundfile'", specifier = "~=0.13.1" },
|
||||
{ name = "soxr", specifier = "~=0.5.0" },
|
||||
{ name = "speechmatics-rt", marker = "extra == 'speechmatics'", specifier = ">=0.5.0" },
|
||||
{ name = "strands-agents", marker = "extra == 'strands'", specifier = ">=1.9.1,<2" },
|
||||
@@ -6496,7 +6496,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "simli-ai"
|
||||
version = "0.1.19"
|
||||
version = "0.1.25"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "aiortc" },
|
||||
@@ -6505,9 +6505,9 @@ dependencies = [
|
||||
{ name = "numpy" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/14/cf/bd31b76e00d2770a65081701108a39df2267cb585b0c2a000f71de790ee9/simli_ai-0.1.19.tar.gz", hash = "sha256:2ab8c6ec1e232dbf38c77d3920fe88b01acc7ba8d76b865fb5a3f4af968e3172", size = 12682, upload-time = "2025-09-23T14:14:30.263Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/64/6a/b28f90baf76f6a60865985f6233ff44abc72d45b66b76658bff3961e20a7/simli_ai-0.1.25.tar.gz", hash = "sha256:7a00b3426dc26a6a421641072c3e49014b7950c621cf4544152f35c58d13fcff", size = 13182, upload-time = "2025-11-06T16:27:08.862Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/63/86/fe2ed1b9d067634c4e0178c33080655c5c1f5b503fec20ac2af699238afb/simli_ai-0.1.19-py3-none-any.whl", hash = "sha256:35bcff89945dcb5f6171996d16d627e64981888c3134bdec7ce925680a17e058", size = 13233, upload-time = "2025-09-23T14:14:27.756Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ac/57/ae1032fd88214ea4ee6d3028c817c12a999eb90a67766bbab31e9819385a/simli_ai-0.1.25-py3-none-any.whl", hash = "sha256:7d01f65321dc9052f25e15d0463af6a20a86c6d37d9a7b3a2c4b01cbec0a54ed", size = 13651, upload-time = "2025-11-06T16:27:07.765Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
Reference in New Issue
Block a user