Compare commits
156 Commits
hush/firew
...
mb/flush-a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
10e31958fd | ||
|
|
373ffc48b6 | ||
|
|
5ae6229c03 | ||
|
|
615cbe966a | ||
|
|
3caccab608 | ||
|
|
78d3c77369 | ||
|
|
44b3e6cefa | ||
|
|
20ea073398 | ||
|
|
154fe65011 | ||
|
|
61f534ca34 | ||
|
|
a91c26785f | ||
|
|
d7e93551d2 | ||
|
|
06c742a2ad | ||
|
|
55b0797fd5 | ||
|
|
21443b9a08 | ||
|
|
4b167a3c3d | ||
|
|
2df77430aa | ||
|
|
2d114b15f9 | ||
|
|
26000b616d | ||
|
|
710eebab09 | ||
|
|
532423eb4c | ||
|
|
bb29e50adb | ||
|
|
4048d6782b | ||
|
|
76d36a312b | ||
|
|
2a75373c04 | ||
|
|
a840b0e815 | ||
|
|
ebcde719a6 | ||
|
|
5c912927bb | ||
|
|
0e55db054e | ||
|
|
5967ac0d4f | ||
|
|
1451483cf7 | ||
|
|
c14b85c12b | ||
|
|
9f3c0219d7 | ||
|
|
ec36fef26e | ||
|
|
5f1848d24b | ||
|
|
d6867bd12f | ||
|
|
17a1f30572 | ||
|
|
8e0dc1f256 | ||
|
|
b9100beee3 | ||
|
|
b8bc3d2565 | ||
|
|
3213e85b7d | ||
|
|
de3bcd64c4 | ||
|
|
ad7f1eec12 | ||
|
|
29310b4e92 | ||
|
|
2f4d36a146 | ||
|
|
6c9bb782b1 | ||
|
|
010d9103d4 | ||
|
|
12131eb7c5 | ||
|
|
80b830322a | ||
|
|
8db9d16174 | ||
|
|
1c92fab1fb | ||
|
|
974717d1b9 | ||
|
|
59fb631390 | ||
|
|
4824220260 | ||
|
|
55a338614d | ||
|
|
f033046963 | ||
|
|
6018fc068c | ||
|
|
d5b634301f | ||
|
|
a37eb1049d | ||
|
|
803ea9d8bc | ||
|
|
499bc25217 | ||
|
|
53d403af4b | ||
|
|
a0a8ea1641 | ||
|
|
26c68ccd7c | ||
|
|
fa010c8644 | ||
|
|
d58f398bc4 | ||
|
|
11383a86a1 | ||
|
|
daa52ff8df | ||
|
|
a5f41e22f7 | ||
|
|
530bb5233d | ||
|
|
4a64e09f6c | ||
|
|
74582bb8d5 | ||
|
|
1ca2101e3a | ||
|
|
e80311c323 | ||
|
|
2f24c422b6 | ||
|
|
0d0b9fddef | ||
|
|
1753cc99f4 | ||
|
|
4f8b036abe | ||
|
|
f83c89c202 | ||
|
|
bb89a036e5 | ||
|
|
b994a03466 | ||
|
|
27161f8e3b | ||
|
|
8acf9a488b | ||
|
|
96c6aeaada | ||
|
|
6722aae598 | ||
|
|
66564392a6 | ||
|
|
f258f5ab66 | ||
|
|
f8f0578c3d | ||
|
|
aa60a413f3 | ||
|
|
3e66f2378d | ||
|
|
9a50f33e36 | ||
|
|
4bd5e9c0a7 | ||
|
|
12092c8715 | ||
|
|
92cc6d39f2 | ||
|
|
34a50033cb | ||
|
|
e60b65228b | ||
|
|
e74864335b | ||
|
|
27a088a457 | ||
|
|
cfe72143b8 | ||
|
|
36a729cbfe | ||
|
|
d2f006682c | ||
|
|
fb7fe540f5 | ||
|
|
1ec68bd071 | ||
|
|
4536d03e82 | ||
|
|
699704732c | ||
|
|
376d969a77 | ||
|
|
68789dfcf0 | ||
|
|
fe9fc61c4e | ||
|
|
6028f0f23a | ||
|
|
e9a0959e28 | ||
|
|
f66be2cfa7 | ||
|
|
f818bed58f | ||
|
|
07b9be5308 | ||
|
|
40c2452d6e | ||
|
|
30cdd1b71a | ||
|
|
2110b79507 | ||
|
|
fc544fa61c | ||
|
|
976fe95304 | ||
|
|
408270b647 | ||
|
|
1dfb75bc9d | ||
|
|
cefc2a1088 | ||
|
|
3b9b9200ea | ||
|
|
d6f29a0f4b | ||
|
|
5b762d11ef | ||
|
|
2f3e2da6b9 | ||
|
|
45058d4a94 | ||
|
|
5b637bd826 | ||
|
|
2d4fd7e903 | ||
|
|
b5662520aa | ||
|
|
af45c170b5 | ||
|
|
65f548b2ec | ||
|
|
b29ab8c608 | ||
|
|
d6dc37f0b6 | ||
|
|
12bce2e8c0 | ||
|
|
4acf7296e0 | ||
|
|
98706d429c | ||
|
|
41720b1a13 | ||
|
|
3ef4245166 | ||
|
|
3bb0797922 | ||
|
|
7c7b4c52af | ||
|
|
01f083b7fc | ||
|
|
91fcaebe25 | ||
|
|
9c5fe5c85e | ||
|
|
7e5e167a4b | ||
|
|
d04c4b36f3 | ||
|
|
a811e53626 | ||
|
|
df57202a05 | ||
|
|
69e6f3fdb7 | ||
|
|
6809254963 | ||
|
|
81093d3bed | ||
|
|
d9a67164f6 | ||
|
|
d0f67fc189 | ||
|
|
6e3f96aa83 | ||
|
|
293677588d | ||
|
|
a5cdd5f1b8 | ||
|
|
5f937b8479 |
@@ -1,7 +1,8 @@
|
||||
repos:
|
||||
- repo: local
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.9.7
|
||||
hooks:
|
||||
- id: ruff-format-hook
|
||||
name: Check ruff formatting
|
||||
entry: sh scripts/pre-commit.sh
|
||||
language: system
|
||||
- id: ruff
|
||||
language_version: python3
|
||||
args: [ --select, I, ]
|
||||
- id: ruff-format
|
||||
|
||||
175
CHANGELOG.md
175
CHANGELOG.md
@@ -5,10 +5,114 @@ All notable changes to **Pipecat** will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Unreleased
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Added a `flush_audio()` method to `AzureTTSService`, `FishTTSService`,
|
||||
`PlayHTTTSService`, and `LMNTTTSService`.
|
||||
|
||||
- Added `set_language()` and `set_model()` to `AzureSTTService`,
|
||||
`AssemblySTTService`, and `GladiaSTTService`.
|
||||
|
||||
- Added `on_user_turn_audio_data` and `on_bot_turn_audio_data` to
|
||||
`AudioBufferProcessor`. This gives the ability to grab the audio of only that
|
||||
turn for both the user and the bot.
|
||||
|
||||
- Added new base class `BaseObject` which is now the base class of
|
||||
`FrameProcessor`, `PipelineRunner`, `PipelineTask` and `BaseTransport`. The
|
||||
new `BaseObject` adds supports for event handlers.
|
||||
|
||||
- Added support for a unified format for specifying function calling across all
|
||||
LLM services.
|
||||
|
||||
```python
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
```
|
||||
|
||||
- Added `speech_threshold` parameter to `GladiaSTTService`.
|
||||
|
||||
- Allow passing user (`user_kwargs`) and assistant (`assistant_kwargs`) context
|
||||
aggregator parameters when using `create_context_aggregator()`. The values are
|
||||
passed as a mapping that will then be converted to arguments.
|
||||
|
||||
- Added `speed` as an `InputParam` for both `ElevenLabsTTSService` and
|
||||
`ElevenLabsHttpTTSService`.
|
||||
|
||||
- Added new `LLMFullResponseAggregator` to aggregate full LLM completions. At
|
||||
every completion the `on_completion` event handler is triggered.
|
||||
|
||||
- Added a new frame, `RTVIServerMessageFrame`, and RTVI message
|
||||
`RTVIServerMessage` which provides a generic mechanism for sending custom
|
||||
messages from server to client. The `RTVIServerMessageFrame` is processed by
|
||||
the `RTVIObserver` and will be delivered to the client's `onServerMessage`
|
||||
callback or `ServerMessage` event.
|
||||
|
||||
- Added `GoogleLLMOpenAIBetaService` for Google LLM integration with an
|
||||
OpenAI-compatible interface. Added foundational example
|
||||
`14o-function-calling-gemini-openai-format.py`.
|
||||
|
||||
- Added `AzureRealtimeBetaLLMService` to support Azure's OpeanAI Realtime API. Added
|
||||
foundational example `19a-azure-realtime-beta.py`.
|
||||
|
||||
### Changed
|
||||
|
||||
- Moved `flush_audio()` from the `TTSService` base class to
|
||||
`WebsocketTTSService`.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed an issue in `GoogleSTTService`, where it didn't have a `set_language`
|
||||
function. This required a name change from `set_languages` to `set_language`.
|
||||
|
||||
## [0.0.58] - 2025-02-26
|
||||
|
||||
### Added
|
||||
|
||||
- Added track-specific audio event `on_track_audio_data` to
|
||||
`AudioBufferProcessor` for accessing separate input and output audio tracks.
|
||||
|
||||
- Pipecat version will now be logged on every application startup. This will
|
||||
help us identify what version we are running in case of any issues.
|
||||
|
||||
- Added a new `StopFrame` which can be used to stop a pipeline task while
|
||||
keeping the frame processors running. The frame processors could then be used
|
||||
in a different pipeline. The difference between a `StopFrame` and a
|
||||
`StopTaskFrame` is that, as with `EndFrame` and `EndTaskFrame`, the
|
||||
`StopFrame` is pushed from the task and the `StopTaskFrame` is pushed upstream
|
||||
inside the pipeline by any processor.
|
||||
|
||||
- Added a new `PipelineTask` parameter `observers` that replaces the previous
|
||||
`PipelineParams.observers`.
|
||||
|
||||
- Added a new `PipelineTask` parameter `check_dangling_tasks` to enable or
|
||||
disable checking for frame processors' dangling tasks when the Pipeline
|
||||
finishes running.
|
||||
|
||||
- Added new `on_completion_timeout` event for LLM services (all OpenAI-based
|
||||
services, Anthropic and Google). Note that this event will only get triggered
|
||||
if LLM timeouts are setup and if the timeout was reached. It can be useful to
|
||||
retrigger another completion and see if the timeout was just a blip.
|
||||
|
||||
- Added new log observers `LLMLogObserver` and `TranscriptionLogObserver` that
|
||||
can be useful for debugging your pipelines.
|
||||
|
||||
- Added `room_url` property to `DailyTransport`.
|
||||
|
||||
- Added `addons` argument to `DeepgramSTTService`.
|
||||
@@ -17,6 +121,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Changed
|
||||
|
||||
- ⚠️ `PipelineTask` now requires keyword arguments (except for the first one for
|
||||
the pipeline).
|
||||
|
||||
- Updated `PlayHTHttpTTSService` to take a `voice_engine` and `protocol` input
|
||||
in the constructor. The previous method of providing a `voice_engine` input
|
||||
that contains the engine and protocol is deprecated by PlayHT.
|
||||
|
||||
- The base `TTSService` class now strips leading newlines before sending text
|
||||
to the TTS provider. This change is to solve issues where some TTS providers,
|
||||
like Azure, would not output text due to newlines.
|
||||
|
||||
- `GrokLLMSService` now uses `grok-2` as the default model.
|
||||
|
||||
- `AnthropicLLMService` now uses `claude-3-7-sonnet-20250219` as the default
|
||||
model.
|
||||
|
||||
- `RimeHttpTTSService` needs an `aiohttp.ClientSession` to be passed to the
|
||||
constructor as all the other HTTP-based services.
|
||||
|
||||
- `RimeHttpTTSService` doesn't use a default voice anymore.
|
||||
|
||||
- `DeepgramSTTService` now uses the new `nova-3` model by default. If you want
|
||||
to use the previous model you can pass `LiveOptions(model="nova-2-general")`.
|
||||
(see https://deepgram.com/learn/introducing-nova-3-speech-to-text-api)
|
||||
@@ -25,8 +150,53 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
|
||||
```
|
||||
|
||||
### Deprecated
|
||||
|
||||
- `PipelineParams.observers` is now deprecated, you the new `PipelineTask`
|
||||
parameter `observers`.
|
||||
|
||||
### Removed
|
||||
|
||||
- Remove `TransportParams.audio_out_is_live` since it was not being used at all.
|
||||
|
||||
### Fixed
|
||||
|
||||
- Fixed a `GoogleLLMService` that was causing an exception when sending inline
|
||||
audio in some cases.
|
||||
|
||||
- Fixed an `AudioContextWordTTSService` issue that would cause an `EndFrame` to
|
||||
disconnect from the TTS service before audio from all the contexts was
|
||||
received. This affected services like Cartesia and Rime.
|
||||
|
||||
- Fixed an issue that was not allowing to pass an `OpenAILLMContext` to create
|
||||
`GoogleLLMService`'s context aggregators.
|
||||
|
||||
- Fixed a `ElevenLabsTTSService`, `FishAudioTTSService`, `LMNTTTSService` and
|
||||
`PlayHTTTSService` issue that was resulting in audio requested before an
|
||||
interruption being played after an interruption.
|
||||
|
||||
- Fixed `match_endofsentence` support for ellipses.
|
||||
|
||||
- Fixed an issue that would cause undesired interruptions via
|
||||
`EmulateUserStartedSpeakingFrame` when only interim transcriptions (i.e. no
|
||||
final transcriptions) where received.
|
||||
|
||||
- Fixed an issue where `EndTaskFrame` was not triggering
|
||||
`on_client_disconnected` or closing the WebSocket in FastAPI.
|
||||
|
||||
- Fixed an issue in `DeepgramSTTService` where the `sample_rate` passed to the
|
||||
`LiveOptions` was not being used, causing the service to use the default
|
||||
sample rate of pipeline.
|
||||
|
||||
- Fixed a context aggregator issue that would not append the LLM text response
|
||||
to the context if a function call happened in the same LLM turn.
|
||||
|
||||
- Fixed an issue that was causing HTTP TTS services to push `TTSStoppedFrame`
|
||||
more than once.
|
||||
|
||||
- Fixed a `FishAudioTTSService` issue where `TTSStoppedFrame` was not being
|
||||
pushed.
|
||||
|
||||
- Fixed an issue that `start_callback` was not invoked for some LLM services.
|
||||
|
||||
- Fixed an issue that would cause `DeepgramSTTService` to stop working after an
|
||||
@@ -40,6 +210,9 @@ stt = DeepgramSTTService(..., live_options=LiveOptions(model="nova-2-general"))
|
||||
|
||||
- Added Gemini support to `examples/phone-chatbot`.
|
||||
|
||||
- Added foundational example `34-audio-recording.py` showing how to use the
|
||||
AudioBufferProcessor callbacks to save merged and track recordings.
|
||||
|
||||
## [0.0.57] - 2025-02-14
|
||||
|
||||
### Added
|
||||
|
||||
@@ -3,10 +3,10 @@ coverage~=7.6.12
|
||||
grpcio-tools~=1.67.1
|
||||
pip-tools~=7.4.1
|
||||
pre-commit~=4.0.1
|
||||
pyright~=1.1.393
|
||||
pyright~=1.1.394
|
||||
pytest~=8.3.4
|
||||
pytest-asyncio~=0.25.2
|
||||
ruff~=0.9.5
|
||||
pytest-asyncio~=0.25.3
|
||||
ruff~=0.9.7
|
||||
setuptools~=70.0.0
|
||||
setuptools_scm~=8.1.0
|
||||
python-dotenv~=1.0.1
|
||||
|
||||
@@ -18,6 +18,9 @@ AZURE_DALLE_API_KEY=...
|
||||
AZURE_DALLE_ENDPOINT=https://...
|
||||
AZURE_DALLE_MODEL=...
|
||||
|
||||
# Cartesia
|
||||
CARTESIA_API_KEY=...
|
||||
|
||||
# Daily
|
||||
DAILY_API_KEY=...
|
||||
DAILY_SAMPLE_ROOM_URL=https://...
|
||||
|
||||
@@ -17,7 +17,7 @@ from runner import configure
|
||||
from pipecat.frames.frames import AudioRawFrame, EndFrame, OutputAudioRawFrame, TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
|
||||
@@ -113,13 +113,13 @@ async def main():
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
audio_buffer_processor, # captures audio into a buffer
|
||||
canonical, # uploads audio buffer to Canonical AI for metrics
|
||||
audio_buffer_processor, # captures audio into a buffer
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -32,10 +32,16 @@ load_dotenv(override=True)
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
# Create the recordings directory if it doesn't exist
|
||||
os.makedirs("recordings", exist_ok=True)
|
||||
|
||||
async def save_audio(audio: bytes, sample_rate: int, num_channels: int):
|
||||
|
||||
async def save_audio(audio: bytes, sample_rate: int, num_channels: int, name: str):
|
||||
if len(audio) > 0:
|
||||
filename = f"conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
|
||||
filename = os.path.join(
|
||||
"recordings",
|
||||
f"{name}_conversation_recording{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.wav",
|
||||
)
|
||||
with io.BytesIO() as buffer:
|
||||
with wave.open(buffer, "wb") as wf:
|
||||
wf.setsampwidth(2)
|
||||
@@ -110,7 +116,7 @@ async def main():
|
||||
|
||||
# NOTE: Watch out! This will save all the conversation in memory. You
|
||||
# can pass `buffer_size` to get periodic callbacks.
|
||||
audiobuffer = AudioBufferProcessor()
|
||||
audiobuffer = AudioBufferProcessor(enable_turn_audio=True)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
@@ -124,11 +130,19 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@audiobuffer.event_handler("on_audio_data")
|
||||
async def on_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
await save_audio(audio, sample_rate, num_channels)
|
||||
await save_audio(audio, sample_rate, num_channels, "full")
|
||||
|
||||
@audiobuffer.event_handler("on_user_turn_audio_data")
|
||||
async def on_user_turn_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
await save_audio(audio, sample_rate, num_channels, "user")
|
||||
|
||||
@audiobuffer.event_handler("on_bot_turn_audio_data")
|
||||
async def on_bot_turn_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
await save_audio(audio, sample_rate, num_channels, "bot")
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -70,7 +70,7 @@ async def main(room_url: str, token: str):
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -62,7 +62,7 @@ async def main(room_url: str, token: str):
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -18,8 +18,7 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineTask
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -34,7 +33,9 @@ async def main():
|
||||
|
||||
transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024),
|
||||
TkTransportParams(
|
||||
camera_out_enabled=True, camera_out_width=1024, camera_out_height=1024
|
||||
),
|
||||
)
|
||||
|
||||
imagegen = FalImageGenService(
|
||||
|
||||
@@ -44,7 +44,8 @@ async def main():
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(
|
||||
Pipeline([imagegen, transport.output()]), PipelineParams(enable_metrics=True)
|
||||
Pipeline([imagegen, transport.output()]),
|
||||
params=PipelineParams(enable_metrics=True),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
|
||||
@@ -30,8 +30,7 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.cartesia import CartesiaHttpTTSService
|
||||
from pipecat.services.fal import FalImageGenService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkOutputTransport
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -152,7 +151,7 @@ async def main():
|
||||
|
||||
transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
TkTransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1024,
|
||||
|
||||
@@ -105,7 +105,10 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(enable_metrics=True, enable_usage_metrics=True),
|
||||
params=PipelineParams(
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
|
||||
@@ -127,7 +127,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -76,7 +76,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -74,7 +74,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -79,7 +79,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -103,7 +103,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -81,7 +81,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -74,7 +74,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
103
examples/foundational/07d-interruptible-elevenlabs-http.py
Normal file
103
examples/foundational/07d-interruptible-elevenlabs-http.py
Normal file
@@ -0,0 +1,103 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.elevenlabs import ElevenLabsHttpTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsHttpTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -74,7 +74,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -75,7 +75,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -77,7 +77,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -83,7 +83,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -81,7 +81,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -81,7 +81,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -75,7 +75,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -21,6 +21,7 @@ from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.gladia import GladiaSTTService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transcriptions.language import Language
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -80,7 +81,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -71,7 +71,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -88,7 +88,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -81,7 +81,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -79,7 +79,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -80,7 +80,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -76,7 +76,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
103
examples/foundational/07q-interruptible-rime-http.py
Normal file
103
examples/foundational/07q-interruptible-rime-http.py
Normal file
@@ -0,0 +1,103 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.services.rime import RimeHttpTTSService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = RimeHttpTTSService(
|
||||
api_key=os.getenv("RIME_API_KEY", ""),
|
||||
voice_id="rex",
|
||||
aiohttp_session=session,
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.cancel()
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -74,7 +74,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -74,7 +74,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -251,7 +251,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -74,7 +74,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -78,7 +78,11 @@ async def main():
|
||||
runner = PipelineRunner()
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline, PipelineParams(audio_in_sample_rate=24000, audio_out_sample_rate=24000)
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=24000,
|
||||
audio_out_sample_rate=24000,
|
||||
),
|
||||
)
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
@@ -24,8 +24,7 @@ from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.transports.base_transport import TransportParams
|
||||
from pipecat.transports.local.tk import TkLocalTransport
|
||||
from pipecat.transports.local.tk import TkLocalTransport, TkTransportParams
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
@@ -67,7 +66,7 @@ async def main():
|
||||
|
||||
tk_transport = TkLocalTransport(
|
||||
tk_root,
|
||||
TransportParams(
|
||||
TkTransportParams(
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_is_live=True,
|
||||
@@ -83,7 +82,11 @@ async def main():
|
||||
pipeline = Pipeline([daily_transport.input(), MirrorProcessor(), tk_transport.output()])
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline, PipelineParams(audio_in_sample_rate=24000, audio_out_sample_rate=24000)
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
audio_in_sample_rate=24000,
|
||||
audio_out_sample_rate=24000,
|
||||
),
|
||||
)
|
||||
|
||||
async def run_tk():
|
||||
|
||||
@@ -76,7 +76,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -29,22 +30,14 @@ logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def start_fetch_products(function_name, llm, context):
|
||||
async def start_fetch_weather(function_name, llm, context):
|
||||
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
|
||||
await llm.push_frame(TTSSpeakFrame("I'll take a look!"))
|
||||
logger.debug(f"Starting fetch_products_from_api with function_name: {function_name}")
|
||||
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
||||
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
|
||||
|
||||
|
||||
async def fetch_products_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
logger.debug(f"args for fetch_products_from_api: {args}")
|
||||
# In the real world you'd fetch the products from an API. We're hardcoding them here.
|
||||
product = args["product"]
|
||||
if product == "vacuums":
|
||||
await result_callback({"vacuums": ["Dyson V11", "Roomba i7"]})
|
||||
elif product == "tvs":
|
||||
await result_callback({"tvs": ["Samsung 65 inch", "LG 55 inch"]})
|
||||
else:
|
||||
await result_callback({"error": "Unknown product"})
|
||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
await result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def main():
|
||||
@@ -71,32 +64,30 @@ async def main():
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
|
||||
# Register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_products_from_api, start_callback=start_fetch_products)
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_products",
|
||||
"description": "Get the list of products available.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"product": {
|
||||
"type": "string",
|
||||
"enum": ["vacuums", "tvs"],
|
||||
"description": "The type of product to show.",
|
||||
}
|
||||
},
|
||||
"required": ["product"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful customer service agent named Hailey in a video call. Your goal is to sell vacuums or tvs. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
"content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
|
||||
},
|
||||
]
|
||||
|
||||
@@ -116,7 +107,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -13,6 +13,8 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -59,22 +61,18 @@ async def main():
|
||||
)
|
||||
llm.register_function("get_weather", get_weather)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
weather_function = FunctionSchema(
|
||||
name="get_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
}
|
||||
]
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
@@ -99,7 +97,13 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -13,6 +13,8 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -72,36 +74,29 @@ async def main():
|
||||
llm.register_function("get_weather", get_weather)
|
||||
llm.register_function("get_image", get_image)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
}
|
||||
},
|
||||
"required": ["location"],
|
||||
weather_function = FunctionSchema(
|
||||
name="get_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "get_image",
|
||||
"description": "Get an image from the video stream.",
|
||||
"input_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
"required": ["question"],
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
get_image_function = FunctionSchema(
|
||||
name="get_image",
|
||||
description="Get an image from the video stream.",
|
||||
properties={
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
]
|
||||
required=["question"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, get_image_function])
|
||||
|
||||
# todo: test with very short initial user message
|
||||
|
||||
@@ -153,7 +148,13 @@ If you need to use a tool, simply use the tool. Do not tell the user the tool yo
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -69,30 +70,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
@@ -66,47 +67,34 @@ async def main():
|
||||
llm.register_function("get_weather", get_weather)
|
||||
llm.register_function("get_image", get_image)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
),
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_image",
|
||||
"description": "Get an image from the video stream.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question to ask the AI to generate an image of",
|
||||
},
|
||||
},
|
||||
"required": ["question"],
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
),
|
||||
]
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
get_image_function = FunctionSchema(
|
||||
name="get_image",
|
||||
description="Get an image from the video stream.",
|
||||
properties={
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
required=["question"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, get_image_function])
|
||||
|
||||
system_prompt = """\
|
||||
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
||||
|
||||
@@ -13,6 +13,8 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -73,45 +75,34 @@ async def main():
|
||||
llm.register_function("get_weather", get_weather, start_fetch_weather)
|
||||
llm.register_function("get_image", get_image)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"function_declarations": [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "get_image",
|
||||
"description": "Get and image from the camera or video stream.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question to to use when running inference on the acquired image.",
|
||||
},
|
||||
},
|
||||
"required": ["question"],
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
]
|
||||
weather_function = FunctionSchema(
|
||||
name="get_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
get_image_function = FunctionSchema(
|
||||
name="get_image",
|
||||
description="Get an image from the video stream.",
|
||||
properties={
|
||||
"question": {
|
||||
"type": "string",
|
||||
"description": "The question that the user is asking about the image.",
|
||||
}
|
||||
},
|
||||
required=["question"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function, get_image_function])
|
||||
|
||||
system_prompt = """\
|
||||
You are a helpful assistant who converses with a user and answers questions. Respond concisely to general questions.
|
||||
@@ -152,7 +143,7 @@ indicate you should use the get_image tool are:
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -68,30 +69,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -116,7 +110,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -66,30 +67,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -113,7 +107,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -70,30 +71,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -117,7 +111,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -69,30 +70,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -116,7 +110,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -69,30 +70,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Returns the current weather at a location, if one is specified, and defaults to the user's location.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The location to find the weather of, or if not provided, it's the default location.",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "Whether to use SI or USCS units (celsius or fahrenheit).",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -116,7 +110,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -66,30 +67,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather for a specific location. You MUST use this function whenever asked about weather.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Use fahrenheit for US locations, celsius for others.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -123,7 +117,7 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -66,30 +67,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather for a specific location. You MUST use this function whenever asked about weather.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Use fahrenheit for US locations, celsius for others.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -123,7 +117,7 @@ Start by asking me for my location. Then, use 'get_weather_current' to give me a
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -11,9 +11,10 @@ import sys
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from openai.types.chat import ChatCompletionToolParam
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -70,30 +71,23 @@ async def main():
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(None, fetch_weather_from_api, start_callback=start_fetch_weather)
|
||||
|
||||
tools = [
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function={
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
)
|
||||
]
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
@@ -117,7 +111,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -83,7 +83,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -0,0 +1,131 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import TTSSpeakFrame
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.google import GoogleLLMOpenAIBetaService
|
||||
from pipecat.services.openai import OpenAILLMContext
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def start_fetch_weather(function_name, llm, context):
|
||||
"""Push a frame to the LLM; this is handy when the LLM response might take a while."""
|
||||
await llm.push_frame(TTSSpeakFrame("Let me check on that."))
|
||||
logger.debug(f"Starting fetch_weather_from_api with function_name: {function_name}")
|
||||
|
||||
|
||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
await result_callback({"conditions": "nice", "temperature": "75"})
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
),
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
api_key=os.getenv("ELEVENLABS_API_KEY", ""),
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
llm = GoogleLLMOpenAIBetaService(api_key=os.getenv("GEMINI_API_KEY"))
|
||||
# Register a function_name of None to get all functions
|
||||
# sent to the same callback with an additional function_name parameter.
|
||||
llm.register_function(
|
||||
"get_current_weather", fetch_weather_from_api, start_callback=start_fetch_weather
|
||||
)
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
tools = ToolsSchema(standard_tools=[weather_function])
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Start a conversation with 'Hey there' to get the current weather.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages, tools)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -133,7 +133,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -126,7 +126,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -85,7 +85,13 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True, enable_metrics=True))
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
),
|
||||
)
|
||||
|
||||
# When a participant joins, start transcription for that participant so the
|
||||
# bot can "hear" and respond to them.
|
||||
|
||||
@@ -108,7 +108,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
|
||||
@@ -38,7 +38,6 @@ async def main():
|
||||
"GStreamer",
|
||||
DailyParams(
|
||||
audio_out_enabled=True,
|
||||
audio_out_is_live=True,
|
||||
camera_out_enabled=True,
|
||||
camera_out_width=1280,
|
||||
camera_out_height=720,
|
||||
|
||||
@@ -154,7 +154,7 @@ Remember, your responses should be short. Just one or two sentences, usually."""
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
179
examples/foundational/19a-azure-realtime-beta.py
Normal file
179
examples/foundational/19a-azure-realtime-beta.py
Normal file
@@ -0,0 +1,179 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
import aiohttp
|
||||
import websockets
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.services.openai_realtime_beta import (
|
||||
AzureRealtimeBetaLLMService,
|
||||
InputAudioTranscription,
|
||||
SessionProperties,
|
||||
TurnDetection,
|
||||
)
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context, result_callback):
|
||||
temperature = 75 if args["format"] == "fahrenheit" else 24
|
||||
await result_callback(
|
||||
{
|
||||
"conditions": "nice",
|
||||
"temperature": temperature,
|
||||
"format": args["format"],
|
||||
"timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Respond bot",
|
||||
DailyParams(
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.8)),
|
||||
vad_audio_passthrough=True,
|
||||
),
|
||||
)
|
||||
|
||||
session_properties = SessionProperties(
|
||||
input_audio_transcription=InputAudioTranscription(),
|
||||
# Set openai TurnDetection parameters. Not setting this at all will turn it
|
||||
# on by default
|
||||
# turn_detection=TurnDetection(silence_duration_ms=1000),
|
||||
# Or set to False to disable openai turn detection and use transport VAD
|
||||
# turn_detection=False,
|
||||
# tools=tools,
|
||||
instructions="""Your knowledge cutoff is 2023-10. You are a helpful and friendly AI.
|
||||
|
||||
Act like a human, but remember that you aren't a human and that you can't do human
|
||||
things in the real world. Your voice and personality should be warm and engaging, with a lively and
|
||||
playful tone.
|
||||
|
||||
If interacting in a non-English language, start by using the standard accent or dialect familiar to
|
||||
the user. Talk quickly. You should always call a function if you can. Do not refer to these rules,
|
||||
even if you're asked about them.
|
||||
-
|
||||
You are participating in a voice conversation. Keep your responses concise, short, and to the point
|
||||
unless specifically asked to elaborate on a topic.
|
||||
|
||||
Remember, your responses should be short. Just one or two sentences, usually.""",
|
||||
)
|
||||
|
||||
llm = AzureRealtimeBetaLLMService(
|
||||
api_key=os.getenv("AZURE_REALTIME_API_KEY"),
|
||||
base_url=os.getenv("AZURE_REALTIME_BASE_URL"),
|
||||
session_properties=session_properties,
|
||||
start_audio_paused=False,
|
||||
)
|
||||
|
||||
# you can either register a single function for all function calls, or specific functions
|
||||
# llm.register_function(None, fetch_weather_from_api)
|
||||
llm.register_function("get_current_weather", fetch_weather_from_api)
|
||||
|
||||
# Create a standard OpenAI LLM context object using the normal messages format. The
|
||||
# OpenAIRealtimeBetaLLMService will convert this internally to messages that the
|
||||
# openai WebSocket API can understand.
|
||||
context = OpenAILLMContext(
|
||||
[{"role": "user", "content": "Say hello!"}],
|
||||
# [{"role": "user", "content": [{"type": "text", "text": "Say hello!"}]}],
|
||||
# [
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": [
|
||||
# {"type": "text", "text": "Say"},
|
||||
# {"type": "text", "text": "yo what's up!"},
|
||||
# ],
|
||||
# }
|
||||
# ],
|
||||
tools,
|
||||
)
|
||||
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(),
|
||||
llm, # LLM
|
||||
context_aggregator.assistant(),
|
||||
transport.output(), # Transport bot output
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
# report_only_initial_ttfb=True,
|
||||
),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# Kick off the conversation.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -212,7 +212,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -237,7 +237,7 @@ Remember, your responses should be short. Just one or two sentences, usually."""
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -209,7 +209,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -263,7 +263,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -87,7 +87,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
# We just use 16000 because that's what Tavus is expecting and
|
||||
# we avoid resampling.
|
||||
audio_in_sample_rate=16000,
|
||||
|
||||
@@ -145,7 +145,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -138,6 +138,7 @@ class OutputGate(FrameProcessor):
|
||||
self._gate_open = start_open
|
||||
self._frames_buffer = []
|
||||
self._notifier = notifier
|
||||
self._gate_task = None
|
||||
|
||||
def close_gate(self):
|
||||
self._gate_open = False
|
||||
@@ -178,10 +179,13 @@ class OutputGate(FrameProcessor):
|
||||
|
||||
async def _start(self):
|
||||
self._frames_buffer = []
|
||||
self._gate_task = self.create_task(self._gate_task_handler())
|
||||
if not self._gate_task:
|
||||
self._gate_task = self.create_task(self._gate_task_handler())
|
||||
|
||||
async def _stop(self):
|
||||
await self.cancel_task(self._gate_task)
|
||||
if self._gate_task:
|
||||
await self.cancel_task(self._gate_task)
|
||||
self._gate_task = None
|
||||
|
||||
async def _gate_task_handler(self):
|
||||
while True:
|
||||
@@ -351,7 +355,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -342,6 +342,7 @@ class OutputGate(FrameProcessor):
|
||||
self._gate_open = start_open
|
||||
self._frames_buffer = []
|
||||
self._notifier = notifier
|
||||
self._gate_task = None
|
||||
|
||||
def close_gate(self):
|
||||
self._gate_open = False
|
||||
@@ -382,10 +383,13 @@ class OutputGate(FrameProcessor):
|
||||
|
||||
async def _start(self):
|
||||
self._frames_buffer = []
|
||||
self._gate_task = self.create_task(self._gate_task_handler())
|
||||
if not self._gate_task:
|
||||
self._gate_task = self.create_task(self._gate_task_handler())
|
||||
|
||||
async def _stop(self):
|
||||
await self.cancel_task(self._gate_task)
|
||||
if self._gate_task:
|
||||
await self.cancel_task(self._gate_task)
|
||||
self._gate_task = None
|
||||
|
||||
async def _gate_task_handler(self):
|
||||
while True:
|
||||
@@ -560,7 +564,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -23,12 +23,9 @@ from pipecat.frames.frames import (
|
||||
FunctionCallInProgressFrame,
|
||||
FunctionCallResultFrame,
|
||||
InputAudioRawFrame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMMessagesFrame,
|
||||
StartFrame,
|
||||
StartInterruptionFrame,
|
||||
StopInterruptionFrame,
|
||||
SystemFrame,
|
||||
TextFrame,
|
||||
TranscriptionFrame,
|
||||
@@ -39,7 +36,7 @@ from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.llm_response import LLMResponseAggregator
|
||||
from pipecat.processors.aggregators.llm_response import LLMAssistantResponseAggregator
|
||||
from pipecat.processors.aggregators.openai_llm_context import (
|
||||
OpenAILLMContext,
|
||||
OpenAILLMContextFrame,
|
||||
@@ -391,7 +388,7 @@ class AudioAccumulator(FrameProcessor):
|
||||
)
|
||||
self._user_speaking = False
|
||||
context = GoogleLLMContext()
|
||||
context.add_audio_frames_message(text="Audio follows", audio_frames=self._audio_frames)
|
||||
context.add_audio_frames_message(audio_frames=self._audio_frames)
|
||||
await self.push_frame(OpenAILLMContextFrame(context=context))
|
||||
elif isinstance(frame, InputAudioRawFrame):
|
||||
# Append the audio frame to our buffer. Treat the buffer as a ring buffer, dropping the oldest
|
||||
@@ -434,7 +431,11 @@ class CompletenessCheck(FrameProcessor):
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, UserStartedSpeakingFrame):
|
||||
if isinstance(frame, (EndFrame, CancelFrame)):
|
||||
if self._idle_task:
|
||||
await self.cancel_task(self._idle_task)
|
||||
self._idle_task = None
|
||||
elif isinstance(frame, UserStartedSpeakingFrame):
|
||||
if self._idle_task:
|
||||
await self.cancel_task(self._idle_task)
|
||||
elif isinstance(frame, TextFrame) and frame.text.startswith("YES"):
|
||||
@@ -476,19 +477,11 @@ class CompletenessCheck(FrameProcessor):
|
||||
self._idle_task = None
|
||||
|
||||
|
||||
class UserAggregatorBuffer(LLMResponseAggregator):
|
||||
class LLMAggregatorBuffer(LLMAssistantResponseAggregator):
|
||||
"""Buffers the output of the transcription LLM. Used by the bot output gate."""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(
|
||||
messages=None,
|
||||
role=None,
|
||||
start_frame=LLMFullResponseStartFrame,
|
||||
end_frame=LLMFullResponseEndFrame,
|
||||
accumulator_frame=TextFrame,
|
||||
handle_interruptions=True,
|
||||
expect_stripped_words=False,
|
||||
)
|
||||
super().__init__(expect_stripped_words=False)
|
||||
self._transcription = ""
|
||||
|
||||
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
||||
@@ -546,7 +539,7 @@ class OutputGate(FrameProcessor):
|
||||
self,
|
||||
notifier: BaseNotifier,
|
||||
context: OpenAILLMContext,
|
||||
user_transcription_buffer: "UserAggregatorBuffer",
|
||||
llm_transcription_buffer: LLMAggregatorBuffer,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
@@ -554,7 +547,8 @@ class OutputGate(FrameProcessor):
|
||||
self._frames_buffer = []
|
||||
self._notifier = notifier
|
||||
self._context = context
|
||||
self._transcription_buffer = user_transcription_buffer
|
||||
self._transcription_buffer = llm_transcription_buffer
|
||||
self._gate_task = None
|
||||
|
||||
def close_gate(self):
|
||||
self._gate_open = False
|
||||
@@ -602,10 +596,13 @@ class OutputGate(FrameProcessor):
|
||||
|
||||
async def _start(self):
|
||||
self._frames_buffer = []
|
||||
self._gate_task = self.create_task(self._gate_task_handler())
|
||||
if not self._gate_task:
|
||||
self._gate_task = self.create_task(self._gate_task_handler())
|
||||
|
||||
async def _stop(self):
|
||||
await self.cancel_task(self._gate_task)
|
||||
if self._gate_task:
|
||||
await self.cancel_task(self._gate_task)
|
||||
self._gate_task = None
|
||||
|
||||
async def _gate_task_handler(self):
|
||||
while True:
|
||||
@@ -697,10 +694,10 @@ async def main():
|
||||
|
||||
conversation_audio_context_assembler = ConversationAudioContextAssembler(context=context)
|
||||
|
||||
user_aggregator_buffer = UserAggregatorBuffer()
|
||||
llm_aggregator_buffer = LLMAggregatorBuffer()
|
||||
|
||||
bot_output_gate = OutputGate(
|
||||
notifier=notifier, context=context, user_transcription_buffer=user_aggregator_buffer
|
||||
notifier=notifier, context=context, llm_transcription_buffer=llm_aggregator_buffer
|
||||
)
|
||||
|
||||
pipeline = Pipeline(
|
||||
@@ -721,7 +718,7 @@ async def main():
|
||||
],
|
||||
[
|
||||
tx_llm,
|
||||
user_aggregator_buffer,
|
||||
llm_aggregator_buffer,
|
||||
],
|
||||
)
|
||||
],
|
||||
@@ -740,7 +737,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -87,7 +87,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -122,7 +122,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -354,7 +354,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -63,7 +63,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -89,7 +89,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -14,6 +14,8 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.adapters.schemas.function_schema import FunctionSchema
|
||||
from pipecat.adapters.schemas.tools_schema import AdapterType, ToolsSchema
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.audio.vad.vad_analyzer import VADParams
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
@@ -41,32 +43,6 @@ async def fetch_weather_from_api(function_name, tool_call_id, args, llm, context
|
||||
)
|
||||
|
||||
|
||||
tools = [
|
||||
{
|
||||
"function_declarations": [
|
||||
{
|
||||
"name": "get_current_weather",
|
||||
"description": "Get the current weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the users location.",
|
||||
},
|
||||
},
|
||||
"required": ["location", "format"],
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
system_instruction = """
|
||||
You are a helpful assistant who can answer questions and use tools.
|
||||
|
||||
@@ -95,6 +71,27 @@ async def main():
|
||||
),
|
||||
)
|
||||
|
||||
weather_function = FunctionSchema(
|
||||
name="get_current_weather",
|
||||
description="Get the current weather",
|
||||
properties={
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"format": {
|
||||
"type": "string",
|
||||
"enum": ["celsius", "fahrenheit"],
|
||||
"description": "The temperature unit to use. Infer this from the user's location.",
|
||||
},
|
||||
},
|
||||
required=["location", "format"],
|
||||
)
|
||||
search_tool = {"google_search": {}}
|
||||
tools = ToolsSchema(
|
||||
standard_tools=[weather_function], custom_tools={AdapterType.GEMINI: [search_tool]}
|
||||
)
|
||||
|
||||
llm = GeminiMultimodalLiveLLMService(
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
system_instruction=system_instruction,
|
||||
@@ -120,7 +117,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -79,7 +79,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -106,7 +106,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024-2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -34,7 +34,7 @@ search_tool = {"google_search": {}}
|
||||
tools = [search_tool]
|
||||
|
||||
system_instruction = """
|
||||
You are an expert at providing the most recent news from any place. Your responses will be converted to audio, so avoid using special characters or overly complex formatting.
|
||||
You are an expert at providing the most recent news from any place. Your responses will be converted to audio, so avoid using special characters or overly complex formatting.
|
||||
|
||||
Always use the google search API to retrieve the latest news. You must also use it to check which day is today.
|
||||
|
||||
@@ -93,7 +93,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -83,7 +83,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
),
|
||||
|
||||
@@ -150,7 +150,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -150,7 +150,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -178,7 +178,7 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
@@ -18,12 +18,10 @@ from pipecat.frames.frames import (
|
||||
BotStartedSpeakingFrame,
|
||||
BotStoppedSpeakingFrame,
|
||||
Frame,
|
||||
LLMFullResponseEndFrame,
|
||||
LLMFullResponseStartFrame,
|
||||
LLMTextFrame,
|
||||
StartInterruptionFrame,
|
||||
)
|
||||
from pipecat.observers.base_observer import BaseObserver
|
||||
from pipecat.observers.loggers.llm_log_observer import LLMLogObserver
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
@@ -73,38 +71,6 @@ class DebugObserver(BaseObserver):
|
||||
logger.info(f"🤖 BOT STOP SPEAKING: {src} {arrow} {dst} at {time_sec:.2f}s")
|
||||
|
||||
|
||||
class LLMLogObserver(BaseObserver):
|
||||
"""Observer to log LLM activity to the console.
|
||||
|
||||
Logs all frame instances of:
|
||||
- LLMFullResponseStartFrame (only from LLM service)
|
||||
- LLMTextFrame
|
||||
- LLMFullResponseEndFrame (only from LLM service)
|
||||
|
||||
This allows you to track when the LLM starts responding, what it generates, and when it finishes.
|
||||
Log format: [LLM EVENT]: [details] at [timestamp]s
|
||||
"""
|
||||
|
||||
async def on_push_frame(
|
||||
self,
|
||||
src: FrameProcessor,
|
||||
dst: FrameProcessor,
|
||||
frame: Frame,
|
||||
direction: FrameDirection,
|
||||
timestamp: int,
|
||||
):
|
||||
time_sec = timestamp / 1_000_000_000
|
||||
|
||||
# Only log start/end frames from OpenAILLMService
|
||||
if isinstance(frame, (LLMFullResponseStartFrame, LLMFullResponseEndFrame)):
|
||||
if isinstance(src, OpenAILLMService):
|
||||
event = "START" if isinstance(frame, LLMFullResponseStartFrame) else "END"
|
||||
logger.info(f"🧠 LLM {event} RESPONSE at {time_sec:.2f}s")
|
||||
# Log all LLMTextFrames
|
||||
elif isinstance(frame, LLMTextFrame):
|
||||
logger.info(f"🧠 LLM GENERATING: {frame.text!r} at {time_sec:.2f}s")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
@@ -151,13 +117,13 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
report_only_initial_ttfb=True,
|
||||
observers=[DebugObserver(), LLMLogObserver()],
|
||||
),
|
||||
observers=[DebugObserver(), LLMLogObserver()],
|
||||
)
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
|
||||
@@ -32,7 +32,7 @@ async def main():
|
||||
|
||||
pipeline = Pipeline([NullProcessor()])
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(enable_heartbeats=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(enable_heartbeats=True))
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (c) 2024, Daily
|
||||
# Copyright (c) 2024-2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
@@ -38,7 +38,7 @@ search_tool = {"google_search_retrieval": {}}
|
||||
tools = [search_tool]
|
||||
|
||||
system_instruction = """
|
||||
You are an expert at providing the most recent news from any place. Your responses will be converted to audio, so avoid using special characters or overly complex formatting.
|
||||
You are an expert at providing the most recent news from any place. Your responses will be converted to audio, so avoid using special characters or overly complex formatting.
|
||||
|
||||
Always use the google search API to retrieve the latest news. You must also use it to check which day is today.
|
||||
|
||||
@@ -117,7 +117,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -230,7 +230,7 @@ Your response will be turned into speech so use only simple words and punctuatio
|
||||
)
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
enable_metrics=True,
|
||||
enable_usage_metrics=True,
|
||||
|
||||
186
examples/foundational/34-audio-recording.py
Normal file
186
examples/foundational/34-audio-recording.py
Normal file
@@ -0,0 +1,186 @@
|
||||
#
|
||||
# Copyright (c) 2024–2025, Daily
|
||||
#
|
||||
# SPDX-License-Identifier: BSD 2-Clause License
|
||||
#
|
||||
|
||||
"""Audio Recording Example with Pipecat.
|
||||
|
||||
This example demonstrates how to record audio from a conversation between a user and an AI assistant,
|
||||
saving both merged and individual audio tracks. It showcases the AudioBufferProcessor's capabilities
|
||||
to handle both combined and separate audio streams.
|
||||
|
||||
The example:
|
||||
1. Sets up a basic conversation with an AI assistant
|
||||
2. Records the entire conversation
|
||||
3. Saves three separate WAV files:
|
||||
- A merged recording of both participants
|
||||
- Individual recording of user audio
|
||||
- Individual recording of assistant audio
|
||||
|
||||
Example usage (run from pipecat root directory):
|
||||
$ pip install "pipecat-ai[daily,openai,cartesia,silero]"
|
||||
$ pip install -r dev-requirements.txt
|
||||
$ python examples/foundational/34-audio-recording.py
|
||||
|
||||
Requirements:
|
||||
- OpenAI API key (for GPT-4)
|
||||
- Cartesia API key (for text-to-speech)
|
||||
- Daily API key (for video/audio transport)
|
||||
|
||||
Environment variables (.env file):
|
||||
OPENAI_API_KEY=your_openai_key
|
||||
CARTESIA_API_KEY=your_cartesia_key
|
||||
DAILY_API_KEY=your_daily_key
|
||||
|
||||
The recordings will be saved in a 'recordings' directory with timestamps:
|
||||
recordings/
|
||||
merged_20240315_123456.wav (Combined audio)
|
||||
user_20240315_123456.wav (User audio only)
|
||||
bot_20240315_123456.wav (Bot audio only)
|
||||
|
||||
Note:
|
||||
This example requires the AudioBufferProcessor with track-specific audio support,
|
||||
which provides both 'on_audio_data' and 'on_track_audio_data' events for
|
||||
handling merged and separate audio tracks respectively.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import datetime
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import wave
|
||||
|
||||
import aiofiles
|
||||
import aiohttp
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from runner import configure
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.aggregators.openai_llm_context import OpenAILLMContext
|
||||
from pipecat.processors.audio.audio_buffer_processor import AudioBufferProcessor
|
||||
from pipecat.services.cartesia import CartesiaTTSService
|
||||
from pipecat.services.openai import OpenAILLMService
|
||||
from pipecat.transports.services.daily import DailyParams, DailyTransport
|
||||
|
||||
load_dotenv(override=True)
|
||||
logger.remove(0)
|
||||
logger.add(sys.stderr, level="DEBUG")
|
||||
|
||||
|
||||
async def save_audio_file(audio: bytes, filename: str, sample_rate: int, num_channels: int):
|
||||
"""Save audio data to a WAV file."""
|
||||
if len(audio) > 0:
|
||||
with io.BytesIO() as buffer:
|
||||
with wave.open(buffer, "wb") as wf:
|
||||
wf.setsampwidth(2)
|
||||
wf.setnchannels(num_channels)
|
||||
wf.setframerate(sample_rate)
|
||||
wf.writeframes(audio)
|
||||
async with aiofiles.open(filename, "wb") as file:
|
||||
await file.write(buffer.getvalue())
|
||||
logger.info(f"Audio saved to {filename}")
|
||||
|
||||
|
||||
async def main():
|
||||
async with aiohttp.ClientSession() as session:
|
||||
(room_url, token) = await configure(session)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Recording bot",
|
||||
DailyParams(
|
||||
# audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
transcription_enabled=True,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True, # Enable audio passthrough for recording
|
||||
),
|
||||
)
|
||||
|
||||
tts = CartesiaTTSService(
|
||||
api_key=os.getenv("CARTESIA_API_KEY"),
|
||||
voice_id="79a125e8-cd45-4c13-8a67-188112f4dd22",
|
||||
)
|
||||
|
||||
llm = OpenAILLMService(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4")
|
||||
|
||||
# Create audio buffer processor
|
||||
audiobuffer = AudioBufferProcessor()
|
||||
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant demonstrating audio recording capabilities. Keep your responses brief and clear.",
|
||||
},
|
||||
]
|
||||
|
||||
context = OpenAILLMContext(messages)
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(),
|
||||
context_aggregator.user(),
|
||||
llm,
|
||||
tts,
|
||||
transport.output(),
|
||||
audiobuffer, # Add audio buffer to pipeline
|
||||
context_aggregator.assistant(),
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
await audiobuffer.start_recording()
|
||||
messages.append(
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Greet the user and explain that this conversation will be recorded.",
|
||||
}
|
||||
)
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await audiobuffer.stop_recording()
|
||||
await task.cancel()
|
||||
|
||||
# Handler for merged audio
|
||||
@audiobuffer.event_handler("on_audio_data")
|
||||
async def on_audio_data(buffer, audio, sample_rate, num_channels):
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"recordings/merged_{timestamp}.wav"
|
||||
os.makedirs("recordings", exist_ok=True)
|
||||
await save_audio_file(audio, filename, sample_rate, num_channels)
|
||||
|
||||
# Handler for separate tracks
|
||||
@audiobuffer.event_handler("on_track_audio_data")
|
||||
async def on_track_audio_data(buffer, user_audio, bot_audio, sample_rate, num_channels):
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
os.makedirs("recordings", exist_ok=True)
|
||||
|
||||
# Save user audio
|
||||
user_filename = f"recordings/user_{timestamp}.wav"
|
||||
await save_audio_file(user_audio, user_filename, sample_rate, 1)
|
||||
|
||||
# Save bot audio
|
||||
bot_filename = f"recordings/bot_{timestamp}.wav"
|
||||
await save_audio_file(bot_audio, bot_filename, sample_rate, 1)
|
||||
|
||||
runner = PipelineRunner()
|
||||
await runner.run(task)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -92,10 +92,8 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
params=PipelineParams(
|
||||
allow_interruptions=True,
|
||||
observers=[rtvi.observer()],
|
||||
),
|
||||
params=PipelineParams(allow_interruptions=True),
|
||||
observers=[rtvi.observer()],
|
||||
)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
|
||||
@@ -140,10 +140,8 @@ async def main():
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(
|
||||
allow_interruptions=True,
|
||||
observers=[GoogleRTVIObserver(rtvi)],
|
||||
),
|
||||
params=PipelineParams(allow_interruptions=True),
|
||||
observers=[GoogleRTVIObserver(rtvi)],
|
||||
)
|
||||
|
||||
@rtvi.event_handler("on_client_ready")
|
||||
|
||||
@@ -346,7 +346,7 @@ async def main():
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=False))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=False))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
@@ -106,12 +106,12 @@ curl -X POST "http://localhost:7860/daily_start_bot" \
|
||||
-d '{"dialoutNumber": "+18057145330", "detectVoicemail": true}'
|
||||
```
|
||||
|
||||
### New! Using Gemini with Daily
|
||||
### New! Using Gemini 2.0 Flash Lite with Daily
|
||||
|
||||
We have introduced a new example file that uses Gemini. You can find the code within bot_daily_gemini.py.
|
||||
If you want to spin up a Gemini-based bot for this demo, instead of an OpenAI-based bot, call the same properties above but on the `daily_gemini_start_bot` endpoint instead.
|
||||
We have introduced support for Google's Gemini 2.0 Flash Lite model in this example. This lightweight model offers faster response times and reduced costs while maintaining good conversational capabilities.
|
||||
|
||||
For example:
|
||||
**Quick Start**
|
||||
To use the Gemini-based bot instead of OpenAI:
|
||||
|
||||
```shell
|
||||
curl -X POST "http://localhost:7860/daily_gemini_start_bot" \ py pipecat
|
||||
@@ -119,7 +119,27 @@ curl -X POST "http://localhost:7860/daily_gemini_start_bot" \
|
||||
-d '{"detectVoicemail": true}'
|
||||
```
|
||||
|
||||
Any request body properties supported by `/daily_start_bot` (such as "detectVoicemail", "dialoutnumber", etc) can also be passed to `/daily_gemini_start_bot`. The only difference is that calling the Gemini endpoint will start a Gemini bot session.
|
||||
All request body parameters supported by /daily_start_bot (such as detectVoicemail, dialoutNumber, etc.) are also compatible with /daily_gemini_start_bot.
|
||||
|
||||
This example uses context switching to help steer the bot in the right direction. As Flash Lite is a smaller model, breaking the prompt down into smaller piece helps to improve the bot's accuracy.
|
||||
|
||||
For example, instead of giving one large prompt like:
|
||||
|
||||
```python
|
||||
system_instruction="""You are a chatbot that needs to detect if you're talking to a voicemail system or human, then either leave a message or have a conversation. If it's voicemail, say "Hello, this is a message..." and hang up. If it's a human, introduce yourself and be helpful until they say goodbye."""
|
||||
```
|
||||
|
||||
We break it into stages:
|
||||
|
||||
First prompt focuses only on detection: "Determine if this is voicemail or human"
|
||||
After detection, we switch to a new context: either "Leave this specific voicemail message" or "Have a conversation with the human".
|
||||
|
||||
**Implementation Details**
|
||||
The implementation is available in bot_daily_gemini.py and features:
|
||||
|
||||
- Staged prompting approach: Breaking down complex tasks into smaller, more focused prompts to improve the lightweight model's performance
|
||||
- Dynamic context switching: The bot can change its behavior in real-time based on what it detects (voicemail vs. human caller)
|
||||
- Function-based architecture: Uses function calling to trigger context switches and call termination
|
||||
|
||||
### More information
|
||||
|
||||
|
||||
@@ -49,7 +49,11 @@ async def main(
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
|
||||
dialin_settings = DailyDialinSettings(call_id=callId, call_domain=callDomain)
|
||||
# We don't want to specify dial-in settings if we're not dialing in
|
||||
dialin_settings = None
|
||||
if callId and callDomain:
|
||||
dialin_settings = DailyDialinSettings(call_id=callId, call_domain=callDomain)
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
@@ -96,6 +100,13 @@ async def main(
|
||||
- **"Please leave a message after the beep."**
|
||||
- **"No one is available to take your call."**
|
||||
- **"Record your message after the tone."**
|
||||
- **"Please leave a message after the beep"**
|
||||
- **"You have reached voicemail for..."**
|
||||
- **"You have reached [phone number]"**
|
||||
- **"[phone number] is unavailable"**
|
||||
- **"The person you are trying to reach..."**
|
||||
- **"The number you have dialed..."**
|
||||
- **"Your call has been forwarded to an automated voice messaging system"**
|
||||
- **Any phrase that suggests an answering machine or voicemail.**
|
||||
- **ASSUME IT IS A VOICEMAIL. DO NOT WAIT FOR MORE CONFIRMATION.**
|
||||
- **IF THE CALL SAYS "PLEASE LEAVE A MESSAGE AFTER THE BEEP", WAIT FOR THE BEEP BEFORE LEAVING A MESSAGE.**
|
||||
@@ -139,7 +150,7 @@ async def main(
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
if dialout_number:
|
||||
logger.debug("dialout number detected; doing dialout")
|
||||
|
||||
@@ -13,15 +13,29 @@ from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
|
||||
from pipecat.audio.vad.silero import SileroVADAnalyzer
|
||||
from pipecat.frames.frames import EndTaskFrame
|
||||
from pipecat.frames.frames import (
|
||||
EndFrame,
|
||||
EndTaskFrame,
|
||||
InputAudioRawFrame,
|
||||
StopTaskFrame,
|
||||
TranscriptionFrame,
|
||||
UserStartedSpeakingFrame,
|
||||
UserStoppedSpeakingFrame,
|
||||
)
|
||||
from pipecat.pipeline.pipeline import Pipeline
|
||||
from pipecat.pipeline.runner import PipelineRunner
|
||||
from pipecat.pipeline.task import PipelineParams, PipelineTask
|
||||
from pipecat.processors.frame_processor import FrameDirection
|
||||
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
||||
from pipecat.services.ai_services import LLMService
|
||||
from pipecat.services.deepgram import DeepgramSTTService
|
||||
from pipecat.services.elevenlabs import ElevenLabsTTSService
|
||||
from pipecat.services.google import GoogleLLMContext, GoogleLLMService
|
||||
from pipecat.transports.services.daily import DailyDialinSettings, DailyParams, DailyTransport
|
||||
from pipecat.services.google import GoogleLLMService
|
||||
from pipecat.services.google.google import GoogleLLMContext
|
||||
from pipecat.transports.services.daily import (
|
||||
DailyDialinSettings,
|
||||
DailyParams,
|
||||
DailyTransport,
|
||||
)
|
||||
|
||||
load_dotenv(override=True)
|
||||
|
||||
@@ -32,31 +46,139 @@ logger.add(sys.stderr, level="DEBUG")
|
||||
daily_api_key = os.getenv("DAILY_API_KEY", "")
|
||||
daily_api_url = os.getenv("DAILY_API_URL", "https://api.daily.co/v1")
|
||||
|
||||
system_message = None
|
||||
|
||||
|
||||
class UserAudioCollector(FrameProcessor):
|
||||
"""This FrameProcessor collects audio frames in a buffer, then adds them to the
|
||||
LLM context when the user stops speaking.
|
||||
"""
|
||||
|
||||
def __init__(self, context, user_context_aggregator):
|
||||
super().__init__()
|
||||
self._context = context
|
||||
self._user_context_aggregator = user_context_aggregator
|
||||
self._audio_frames = []
|
||||
self._start_secs = 0.2 # this should match VAD start_secs (hardcoding for now)
|
||||
self._user_speaking = False
|
||||
|
||||
async def process_frame(self, frame, direction):
|
||||
await super().process_frame(frame, direction)
|
||||
|
||||
if isinstance(frame, TranscriptionFrame):
|
||||
# We could gracefully handle both audio input and text/transcription input ...
|
||||
# but let's leave that as an exercise to the reader. :-)
|
||||
return
|
||||
if isinstance(frame, UserStartedSpeakingFrame):
|
||||
self._user_speaking = True
|
||||
elif isinstance(frame, UserStoppedSpeakingFrame):
|
||||
self._user_speaking = False
|
||||
self._context.add_audio_frames_message(audio_frames=self._audio_frames)
|
||||
await self._user_context_aggregator.push_frame(
|
||||
self._user_context_aggregator.get_context_frame()
|
||||
)
|
||||
elif isinstance(frame, InputAudioRawFrame):
|
||||
if self._user_speaking:
|
||||
self._audio_frames.append(frame)
|
||||
else:
|
||||
# Append the audio frame to our buffer. Treat the buffer as a ring buffer, dropping the oldest
|
||||
# frames as necessary. Assume all audio frames have the same duration.
|
||||
self._audio_frames.append(frame)
|
||||
frame_duration = len(frame.audio) / 16 * frame.num_channels / frame.sample_rate
|
||||
buffer_duration = frame_duration * len(self._audio_frames)
|
||||
while buffer_duration > self._start_secs:
|
||||
self._audio_frames.pop(0)
|
||||
buffer_duration -= frame_duration
|
||||
|
||||
await self.push_frame(frame, direction)
|
||||
|
||||
|
||||
class ContextSwitcher:
|
||||
def __init__(self, llm, context_aggregator):
|
||||
self._llm = llm
|
||||
self._context_aggregator = context_aggregator
|
||||
|
||||
async def switch_context(self, system_instruction):
|
||||
"""Switch the context to a new system instruction based on what the bot hears."""
|
||||
# Create messages with updated system instruction
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": system_instruction,
|
||||
}
|
||||
]
|
||||
|
||||
# Update context with new messages
|
||||
self._context_aggregator.set_messages(messages)
|
||||
# Get the context frame with the updated messages
|
||||
context_frame = self._context_aggregator.get_context_frame()
|
||||
# Trigger LLM response by pushing a context frame
|
||||
await self._llm.push_frame(context_frame)
|
||||
|
||||
|
||||
class FunctionHandlers:
|
||||
def __init__(self, context_switcher):
|
||||
self.context_switcher = context_switcher
|
||||
|
||||
async def voicemail_response(
|
||||
self,
|
||||
function_name,
|
||||
tool_call_id,
|
||||
args,
|
||||
llm: LLMService,
|
||||
context,
|
||||
result_callback,
|
||||
):
|
||||
"""Function the bot can call to leave a voicemail message."""
|
||||
message = """You are Chatbot leaving a voicemail message. Say EXACTLY this message and nothing else:
|
||||
|
||||
"Hello, this is a message for Pipecat example user. This is Chatbot. Please call back on 123-456-7891. Thank you."
|
||||
|
||||
After saying this message, call the terminate_call function."""
|
||||
|
||||
await self.context_switcher.switch_context(system_instruction=message)
|
||||
await result_callback("Leaving a voicemail message")
|
||||
|
||||
async def human_conversation(
|
||||
self,
|
||||
function_name,
|
||||
tool_call_id,
|
||||
args,
|
||||
llm: LLMService,
|
||||
context,
|
||||
result_callback,
|
||||
):
|
||||
"""Function the bot can when it detects it's talking to a human."""
|
||||
await llm.push_frame(StopTaskFrame(), FrameDirection.UPSTREAM)
|
||||
|
||||
|
||||
async def terminate_call(
|
||||
function_name, tool_call_id, args, llm: LLMService, context, result_callback
|
||||
function_name,
|
||||
tool_call_id,
|
||||
args,
|
||||
llm: LLMService,
|
||||
context,
|
||||
result_callback,
|
||||
call_state=None,
|
||||
):
|
||||
"""Function the bot can call to terminate the call upon completion of a voicemail message."""
|
||||
await llm.queue_frame(EndTaskFrame(), FrameDirection.UPSTREAM)
|
||||
"""Function the bot can call to terminate the call upon completion of the call."""
|
||||
if call_state:
|
||||
call_state.bot_terminated_call = True
|
||||
await llm.push_frame(EndTaskFrame(), FrameDirection.UPSTREAM)
|
||||
|
||||
|
||||
async def main(
|
||||
room_url: str,
|
||||
token: str,
|
||||
callId: str,
|
||||
callDomain: str,
|
||||
callId: Optional[str],
|
||||
callDomain: Optional[str],
|
||||
detect_voicemail: bool,
|
||||
dialout_number: Optional[str],
|
||||
):
|
||||
# dialin_settings are only needed if Daily's SIP URI is used
|
||||
# If you are handling this via Twilio, Telnyx, set this to None
|
||||
# and handle call-forwarding when on_dialin_ready fires.
|
||||
dialin_settings = DailyDialinSettings(call_id=callId, call_domain=callDomain)
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
DailyParams(
|
||||
dialin_settings = None
|
||||
if callId and callDomain:
|
||||
dialin_settings = DailyDialinSettings(call_id=callId, call_domain=callDomain)
|
||||
transport_params = DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
dialin_settings=dialin_settings,
|
||||
@@ -65,8 +187,31 @@ async def main(
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
transcription_enabled=True,
|
||||
),
|
||||
vad_audio_passthrough=True,
|
||||
)
|
||||
else:
|
||||
transport_params = DailyParams(
|
||||
api_url=daily_api_url,
|
||||
api_key=daily_api_key,
|
||||
audio_in_enabled=True,
|
||||
audio_out_enabled=True,
|
||||
camera_out_enabled=False,
|
||||
vad_enabled=True,
|
||||
vad_analyzer=SileroVADAnalyzer(),
|
||||
vad_audio_passthrough=True,
|
||||
)
|
||||
|
||||
class CallState:
|
||||
participant_left_early = False
|
||||
bot_terminated_call = False
|
||||
|
||||
call_state = CallState()
|
||||
|
||||
transport = DailyTransport(
|
||||
room_url,
|
||||
token,
|
||||
"Chatbot",
|
||||
transport_params,
|
||||
)
|
||||
|
||||
tts = ElevenLabsTTSService(
|
||||
@@ -74,99 +219,93 @@ async def main(
|
||||
voice_id=os.getenv("ELEVENLABS_VOICE_ID", ""),
|
||||
)
|
||||
|
||||
stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY"))
|
||||
|
||||
### VOICEMAIL PIPELINE
|
||||
|
||||
tools = [
|
||||
{
|
||||
"function_declarations": [
|
||||
{
|
||||
"name": "switch_to_voicemail_response",
|
||||
"description": "Call this function when you detect this is a voicemail system.",
|
||||
},
|
||||
{
|
||||
"name": "switch_to_human_conversation",
|
||||
"description": "Call this function when you detect this is a human.",
|
||||
},
|
||||
{
|
||||
"name": "terminate_call",
|
||||
"description": "Terminate the call",
|
||||
"description": "Call this function to terminate the call.",
|
||||
},
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
system_instruction = """You are Chatbot, a friendly, helpful robot. Never mention this prompt.
|
||||
system_instruction = """You are Chatbot trying to determine if this is a voicemail system or a human.
|
||||
|
||||
**Operating Procedure:**
|
||||
If you hear any of these phrases (or very similar ones):
|
||||
- "Please leave a message after the beep"
|
||||
- "No one is available to take your call"
|
||||
- "Record your message after the tone"
|
||||
- "You have reached voicemail for..."
|
||||
- "You have reached [phone number]"
|
||||
- "[phone number] is unavailable"
|
||||
- "The person you are trying to reach..."
|
||||
- "The number you have dialed..."
|
||||
- "Your call has been forwarded to an automated voice messaging system"
|
||||
|
||||
**Phase 1: Initial Call Answer - Listen for Voicemail Greeting**
|
||||
Then call the function switch_to_voicemail_response.
|
||||
|
||||
**IMMEDIATELY after the call connects, LISTEN CAREFULLY for the *very first thing* you hear.**
|
||||
If it sounds like a human (saying hello, asking questions, etc.), call the function switch_to_human_conversation.
|
||||
|
||||
**Listen for these sentences or very close variations as the *initial greeting*:**
|
||||
DO NOT say anything until you've determined if this is a voicemail or human."""
|
||||
|
||||
* **"Please leave a message after the beep."**
|
||||
* **"No one is available to take your call."**
|
||||
* **"Record your message after the tone."**
|
||||
* **"You have reached voicemail for..."** (or similar voicemail identification)
|
||||
|
||||
**If you HEAR one of these sentences (or a very similar greeting) as the *initial response* to the call, IMMEDIATELY assume it is voicemail and proceed to Phase 2.**
|
||||
|
||||
**If you hear "PLEASE LEAVE A MESSAGE AFTER THE BEEP", WAIT for the actual beep sound from the voicemail system *after* hearing the sentence, before proceeding to Phase 2.**
|
||||
|
||||
**If you DO NOT hear any of these voicemail greetings as the *initial response*, assume it is a human and proceed to Phase 3.**
|
||||
|
||||
|
||||
**Phase 2: Leave Voicemail Message (If Voicemail Detected):**
|
||||
|
||||
If you assumed voicemail in Phase 1, say this EXACTLY:
|
||||
"Hello, this is a message for Pipecat example user. This is Chatbot. Please call back on 123-456-7891. Thank you."
|
||||
|
||||
**Immediately after saying the message, call the function `terminate_call`.**
|
||||
**DO NOT SAY ANYTHING ELSE. SILENCE IS REQUIRED AFTER `terminate_call`.**
|
||||
|
||||
|
||||
**Phase 3: Human Interaction (If No Voicemail Greeting Detected in Phase 1):**
|
||||
|
||||
If you did not detect a voicemail greeting in Phase 1 and a human answers, say:
|
||||
"Oh, hello! I'm a friendly chatbot. Is there anything I can help you with?"
|
||||
|
||||
Keep your responses **short and helpful.**
|
||||
|
||||
If the human is finished, say:
|
||||
"Okay, thank you! Have a great day!"
|
||||
|
||||
**Then, immediately call the function `terminate_call`.**
|
||||
|
||||
|
||||
**VERY IMPORTANT RULES - DO NOT DO THESE THINGS:**
|
||||
|
||||
* **DO NOT SAY "Please leave a message after the beep."**
|
||||
* **DO NOT SAY "No one is available to take your call."**
|
||||
* **DO NOT SAY "Record your message after the tone."**
|
||||
* **DO NOT SAY ANY voicemail greeting yourself.**
|
||||
* **Only check for voicemail greetings in Phase 1, *immediately after the call connects*.**
|
||||
* **After voicemail or human interaction, ALWAYS call `terminate_call` immediately.**
|
||||
* **Do not speak after calling `terminate_call`.**
|
||||
* Your speech will be audio, so use simple language without special characters.
|
||||
"""
|
||||
|
||||
llm = GoogleLLMService(
|
||||
model="models/gemini-2.0-flash-exp",
|
||||
voicemail_detection_llm = GoogleLLMService(
|
||||
model="models/gemini-2.0-flash-lite",
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
system_instruction=system_instruction,
|
||||
tools=tools,
|
||||
)
|
||||
llm.register_function("terminate_call", terminate_call)
|
||||
|
||||
context = GoogleLLMContext()
|
||||
voicemail_detection_context = GoogleLLMContext()
|
||||
voicemail_detection_context_aggregator = voicemail_detection_llm.create_context_aggregator(
|
||||
voicemail_detection_context
|
||||
)
|
||||
context_switcher = ContextSwitcher(
|
||||
voicemail_detection_llm, voicemail_detection_context_aggregator.user()
|
||||
)
|
||||
handlers = FunctionHandlers(context_switcher)
|
||||
|
||||
context_aggregator = llm.create_context_aggregator(context)
|
||||
|
||||
pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
context_aggregator.user(), # User responses
|
||||
llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
voicemail_detection_llm.register_function(
|
||||
"switch_to_voicemail_response", handlers.voicemail_response
|
||||
)
|
||||
voicemail_detection_llm.register_function(
|
||||
"switch_to_human_conversation", handlers.human_conversation
|
||||
)
|
||||
voicemail_detection_llm.register_function(
|
||||
"terminate_call",
|
||||
lambda *args, **kwargs: terminate_call(*args, **kwargs, call_state=call_state),
|
||||
)
|
||||
|
||||
task = PipelineTask(
|
||||
pipeline,
|
||||
PipelineParams(allow_interruptions=True),
|
||||
voicemail_detection_audio_collector = UserAudioCollector(
|
||||
voicemail_detection_context, voicemail_detection_context_aggregator.user()
|
||||
)
|
||||
|
||||
voicemail_detection_pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
voicemail_detection_audio_collector, # Collect audio frames
|
||||
voicemail_detection_context_aggregator.user(), # User responses
|
||||
voicemail_detection_llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
voicemail_detection_context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
voicemail_detection_pipeline_task = PipelineTask(
|
||||
voicemail_detection_pipeline,
|
||||
params=PipelineParams(allow_interruptions=True),
|
||||
)
|
||||
|
||||
if dialout_number:
|
||||
@@ -200,25 +339,116 @@ If the human is finished, say:
|
||||
# machine to say something like 'Leave a message after the beep', or for the user to say 'Hello?'.
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
logger.debug("Detect voicemail; capturing participant transcription")
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
else:
|
||||
logger.debug("no dialout number; assuming dialin")
|
||||
logger.debug("+++++ No dialout number; assuming dialin")
|
||||
|
||||
# Different handlers for dialin
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
# This event is not firing for some reason
|
||||
await transport.capture_participant_transcription(participant["id"])
|
||||
# For the dialin case, we want the bot to answer the phone and greet the user. We
|
||||
# can prompt the bot to speak by putting the context into the pipeline.
|
||||
await task.queue_frames([context_aggregator.user().get_context_frame()])
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await task.cancel()
|
||||
dialin_instructions = """Always call the function switch_to_human_conversation"""
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": dialin_instructions,
|
||||
}
|
||||
]
|
||||
voicemail_detection_context_aggregator.user().set_messages(messages)
|
||||
await voicemail_detection_pipeline_task.queue_frames(
|
||||
[voicemail_detection_context_aggregator.user().get_context_frame()]
|
||||
)
|
||||
|
||||
runner = PipelineRunner()
|
||||
|
||||
await runner.run(task)
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
call_state.participant_left_early = True
|
||||
await voicemail_detection_pipeline_task.queue_frame(EndFrame())
|
||||
|
||||
print("!!! starting voicemail detection pipeline")
|
||||
await runner.run(voicemail_detection_pipeline_task)
|
||||
print("!!! Done with voicemail detection pipeline")
|
||||
|
||||
if call_state.participant_left_early or call_state.bot_terminated_call:
|
||||
if call_state.participant_left_early:
|
||||
print("!!! Participant left early; terminating call")
|
||||
elif call_state.bot_terminated_call:
|
||||
print("!!! Bot terminated call; not proceeding to human conversation")
|
||||
return
|
||||
|
||||
### HUMAN CONVERSATION PIPELINE
|
||||
|
||||
human_conversation_system_instruction = """You are Chatbot talking to a human. Be friendly and helpful.
|
||||
|
||||
Start with: "Hello! I'm a friendly chatbot. How can I help you today?"
|
||||
|
||||
Keep your responses brief and to the point. Listen to what the person says.
|
||||
|
||||
When the person indicates they're done with the conversation by saying something like:
|
||||
- "Goodbye"
|
||||
- "That's all"
|
||||
- "I'm done"
|
||||
- "Thank you, that's all I needed"
|
||||
|
||||
THEN say: "Thank you for chatting. Goodbye!" and call the terminate_call function."""
|
||||
|
||||
human_conversation_llm = GoogleLLMService(
|
||||
model="models/gemini-2.0-flash-001",
|
||||
api_key=os.getenv("GOOGLE_API_KEY"),
|
||||
system_instruction=human_conversation_system_instruction,
|
||||
tools=tools,
|
||||
)
|
||||
human_conversation_context = GoogleLLMContext()
|
||||
|
||||
human_conversation_context_aggregator = human_conversation_llm.create_context_aggregator(
|
||||
human_conversation_context
|
||||
)
|
||||
|
||||
human_conversation_llm.register_function(
|
||||
"terminate_call",
|
||||
lambda *args, **kwargs: terminate_call(*args, **kwargs, call_state=call_state),
|
||||
)
|
||||
|
||||
human_conversation_pipeline = Pipeline(
|
||||
[
|
||||
transport.input(), # Transport user input
|
||||
stt,
|
||||
human_conversation_context_aggregator.user(), # User responses
|
||||
human_conversation_llm, # LLM
|
||||
tts, # TTS
|
||||
transport.output(), # Transport bot output
|
||||
human_conversation_context_aggregator.assistant(), # Assistant spoken responses
|
||||
]
|
||||
)
|
||||
|
||||
human_conversation_pipeline_task = PipelineTask(
|
||||
human_conversation_pipeline,
|
||||
params=PipelineParams(allow_interruptions=True),
|
||||
)
|
||||
|
||||
@transport.event_handler("on_participant_left")
|
||||
async def on_participant_left(transport, participant, reason):
|
||||
await voicemail_detection_pipeline_task.queue_frame(EndFrame())
|
||||
await human_conversation_pipeline_task.queue_frame(EndFrame())
|
||||
|
||||
print("!!! starting human conversation pipeline")
|
||||
human_conversation_context_aggregator.user().set_messages(
|
||||
[
|
||||
{
|
||||
"role": "system",
|
||||
"content": human_conversation_system_instruction,
|
||||
}
|
||||
]
|
||||
)
|
||||
await human_conversation_pipeline_task.queue_frames(
|
||||
[human_conversation_context_aggregator.user().get_context_frame()]
|
||||
)
|
||||
await runner.run(human_conversation_pipeline_task)
|
||||
|
||||
print("!!! Done with human conversation pipeline")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -77,7 +77,7 @@ async def main(room_url: str, token: str, callId: str, sipUri: str):
|
||||
]
|
||||
)
|
||||
|
||||
task = PipelineTask(pipeline, PipelineParams(allow_interruptions=True))
|
||||
task = PipelineTask(pipeline, params=PipelineParams(allow_interruptions=True))
|
||||
|
||||
@transport.event_handler("on_first_participant_joined")
|
||||
async def on_first_participant_joined(transport, participant):
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user