Merge pull request #69 from daily-co/add-github-linting-workflow

github: add linting workflow
This commit is contained in:
Aleix Conchillo Flaqué
2024-03-19 02:46:50 +08:00
committed by GitHub
45 changed files with 432 additions and 341 deletions

32
.github/workflows/lint.yaml vendored Normal file
View File

@@ -0,0 +1,32 @@
name: lint
on:
workflow_dispatch:
push:
branches:
- main
pull_request:
branches:
- "**"
paths-ignore:
- "docs/**"
concurrency:
group: build-lint-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
autopep8:
name: "Formatting lints"
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: autopep8
id: autopep8
uses: peter-evans/autopep8@v2
with:
args: --exit-code -r -d -a -a src/
- name: Fail if autopep8 requires changes
if: steps.autopep8.outputs.exit-code == 2
run: exit 1

View File

@@ -57,7 +57,8 @@ class ResponseAggregator(FrameProcessor):
# Sometimes VAD triggers quickly on and off. If we don't get any transcription,
# it creates empty LLM message queue frames
if len(self.aggregation) > 0:
self.messages.append({"role": self._role, "content": self.aggregation})
self.messages.append(
{"role": self._role, "content": self.aggregation})
self.aggregation = ""
yield self._end_frame()
yield LLMMessagesQueueFrame(self.messages)
@@ -110,7 +111,8 @@ class LLMContextAggregator(AIService):
self.pass_through = pass_through
async def process_frame(self, frame: Frame) -> AsyncGenerator[Frame, None]:
# We don't do anything with non-text frames, pass it along to next in the pipeline.
# We don't do anything with non-text frames, pass it along to next in
# the pipeline.
if not isinstance(frame, TextFrame):
yield frame
return
@@ -132,7 +134,8 @@ class LLMContextAggregator(AIService):
# though we check it above
self.sentence += frame.text
if self.sentence.endswith((".", "?", "!")):
self.messages.append({"role": self.role, "content": self.sentence})
self.messages.append(
{"role": self.role, "content": self.sentence})
self.sentence = ""
yield LLMMessagesQueueFrame(self.messages)
else:
@@ -144,17 +147,24 @@ class LLMContextAggregator(AIService):
class LLMUserContextAggregator(LLMContextAggregator):
def __init__(
self, messages: list[dict], bot_participant_id=None, complete_sentences=True
):
self,
messages: list[dict],
bot_participant_id=None,
complete_sentences=True):
super().__init__(
messages, "user", bot_participant_id, complete_sentences, pass_through=False
)
messages,
"user",
bot_participant_id,
complete_sentences,
pass_through=False)
class LLMAssistantContextAggregator(LLMContextAggregator):
def __init__(
self, messages: list[dict], bot_participant_id=None, complete_sentences=True
):
self,
messages: list[dict],
bot_participant_id=None,
complete_sentences=True):
super().__init__(
messages,
"assistant",
@@ -328,7 +338,8 @@ class ParallelPipeline(FrameProcessor):
continue
seen_ids.add(id(frame))
# Skip passing along EndParallelPipeQueueFrame, because we use them for our own flow control.
# Skip passing along EndParallelPipeQueueFrame, because we use them
# for our own flow control.
if not isinstance(frame, EndPipeFrame):
yield frame

View File

@@ -13,7 +13,7 @@ class ControlFrame(Frame):
# Control frames should contain no instance data, so
# equality is based solely on the class.
def __eq__(self, other):
return type(other) == self.__class__
return isinstance(other, self.__class__)
class StartFrame(ControlFrame):

View File

@@ -6,7 +6,8 @@ from dailyai.pipeline.pipeline import Pipeline
class SequentialMergePipeline(Pipeline):
"""This class merges the sink queues from a list of pipelines. Frames from
each pipeline's sink are merged in the order of pipelines in the list."""
def __init__(self, pipelines:List[Pipeline]):
def __init__(self, pipelines: List[Pipeline]):
super().__init__([])
self.pipelines = pipelines
@@ -14,7 +15,9 @@ class SequentialMergePipeline(Pipeline):
for pipeline in self.pipelines:
while True:
frame = await pipeline.sink.get()
if isinstance(frame, EndFrame) or isinstance(frame, EndPipeFrame):
if isinstance(
frame, EndFrame) or isinstance(
frame, EndPipeFrame):
break
await self.sink.put(frame)

View File

@@ -69,7 +69,10 @@ class OpenAIContextAggregator(FrameProcessor):
else:
yield frame
def string_aggregator(self, frame: Frame, aggregation: str | None) -> str | None:
def string_aggregator(
self,
frame: Frame,
aggregation: str | None) -> str | None:
if not isinstance(frame, TextFrame):
raise TypeError(
"Frame must be a TextFrame instance to be aggregated by a string aggregator."
@@ -94,7 +97,7 @@ class OpenAIUserContextAggregator(OpenAIContextAggregator):
class OpenAIAssistantContextAggregator(OpenAIContextAggregator):
def __init__(self, context:OpenAILLMContext):
def __init__(self, context: OpenAILLMContext):
super().__init__(
context,
aggregator=self.string_aggregator,

View File

@@ -89,7 +89,8 @@ class Pipeline:
):
break
except asyncio.CancelledError:
# this means there's been an interruption, do any cleanup necessary here.
# this means there's been an interruption, do any cleanup necessary
# here.
for processor in self.processors:
await processor.interrupted()
pass
@@ -107,4 +108,3 @@ class Pipeline:
yield final_frame
else:
yield initial_frame

View File

@@ -28,6 +28,7 @@ class AIService(FrameProcessor):
def __init__(self):
self.logger = logging.getLogger("dailyai")
class LLMService(AIService):
"""This class is a no-op but serves as a base class for LLM services."""
@@ -76,7 +77,8 @@ class TTSService(AIService):
async for audio_chunk in self.run_tts(text):
yield AudioFrame(audio_chunk)
# note we pass along the text frame *after* the audio, so the text frame is completed after the audio is processed.
# note we pass along the text frame *after* the audio, so the text
# frame is completed after the audio is processed.
yield TextFrame(text)

View File

@@ -9,7 +9,11 @@ from dailyai.services.ai_services import LLMService
class AnthropicLLMService(LLMService):
def __init__(self, api_key, model="claude-3-opus-20240229", max_tokens=1024):
def __init__(
self,
api_key,
model="claude-3-opus-20240229",
max_tokens=1024):
super().__init__()
self.client = AsyncAnthropic(api_key=api_key)
self.model = model

View File

@@ -44,8 +44,7 @@ class AzureTTSService(TTSService):
"<mstts:express-as style='lyrical' styledegree='2' role='SeniorFemale'>"
"<prosody rate='1.05'>"
f"{sentence}"
"</prosody></mstts:express-as></voice></speak> "
)
"</prosody></mstts:express-as></voice></speak> ")
result = await asyncio.to_thread(self.speech_synthesizer.speak_ssml, (ssml))
self.logger.info("Got azure tts result")
if result.reason == ResultReason.SynthesizingAudioCompleted:
@@ -55,16 +54,22 @@ class AzureTTSService(TTSService):
elif result.reason == ResultReason.Canceled:
cancellation_details = result.cancellation_details
self.logger.info(
"Speech synthesis canceled: {}".format(cancellation_details.reason)
)
"Speech synthesis canceled: {}".format(
cancellation_details.reason))
if cancellation_details.reason == CancellationReason.Error:
self.logger.info(
"Error details: {}".format(cancellation_details.error_details)
)
"Error details: {}".format(
cancellation_details.error_details))
class AzureLLMService(BaseOpenAILLMService):
def __init__(self, *, api_key, endpoint, api_version="2023-12-01-preview", model):
def __init__(
self,
*,
api_key,
endpoint,
api_version="2023-12-01-preview",
model):
self._endpoint = endpoint
self._api_version = api_version
@@ -101,7 +106,9 @@ class AzureImageGenServiceREST(ImageGenService):
async def run_image_gen(self, sentence) -> tuple[str, bytes]:
url = f"{self._azure_endpoint}openai/images/generations:submit?api-version={self._api_version}"
headers = {"api-key": self._api_key, "Content-Type": "application/json"}
headers = {
"api-key": self._api_key,
"Content-Type": "application/json"}
body = {
# Enter your prompt text here
"prompt": sentence,
@@ -112,7 +119,8 @@ class AzureImageGenServiceREST(ImageGenService):
url, headers=headers, json=body
) as submission:
# We never get past this line, because this header isn't
# defined on a 429 response, but something is eating our exceptions!
# defined on a 429 response, but something is eating our
# exceptions!
operation_location = submission.headers["operation-location"]
status = ""
attempts_left = 120
@@ -130,8 +138,7 @@ class AzureImageGenServiceREST(ImageGenService):
status = json_response["status"]
image_url = (
json_response["result"]["data"][0]["url"] if json_response else None
)
json_response["result"]["data"][0]["url"] if json_response else None)
if not image_url:
raise Exception("Image generation failed")
# Load the image from the url

View File

@@ -127,12 +127,14 @@ class BaseTransportService:
self._logger: logging.Logger = logging.getLogger()
async def run(self, pipeline:Pipeline | None=None, override_pipeline_source_queue=True):
async def run(self, pipeline: Pipeline | None = None, override_pipeline_source_queue=True):
self._prerun()
async_output_queue_marshal_task = asyncio.create_task(self._marshal_frames())
async_output_queue_marshal_task = asyncio.create_task(
self._marshal_frames())
self._camera_thread = threading.Thread(target=self._run_camera, daemon=True)
self._camera_thread = threading.Thread(
target=self._run_camera, daemon=True)
self._camera_thread.start()
self._frame_consumer_thread = threading.Thread(
@@ -182,7 +184,7 @@ class BaseTransportService:
if self._vad_enabled:
self._vad_thread.join()
async def run_pipeline(self, pipeline:Pipeline, override_pipeline_source_queue=True):
async def run_pipeline(self, pipeline: Pipeline, override_pipeline_source_queue=True):
pipeline.set_sink(self.send_queue)
if override_pipeline_source_queue:
pipeline.set_source(self.receive_queue)
@@ -217,7 +219,8 @@ class BaseTransportService:
break
if post_processor:
post_process_task = asyncio.create_task(post_process(post_processor))
post_process_task = asyncio.create_task(
post_process(post_processor))
started = False
@@ -244,7 +247,7 @@ class BaseTransportService:
await asyncio.gather(pipeline_task, post_process_task)
async def say(self, text:str, tts:TTSService):
async def say(self, text: str, tts: TTSService):
"""Say a phrase. Use with caution; this bypasses any running pipelines."""
async for frame in tts.process_frame(TextFrame(text)):
await self.send_queue.put(frame)
@@ -290,7 +293,8 @@ class BaseTransportService:
audio_chunk = self.read_audio_frames(self._vad_samples)
audio_int16 = np.frombuffer(audio_chunk, np.int16)
audio_float32 = int2float(audio_int16)
new_confidence = model(torch.from_numpy(audio_float32), 16000).item()
new_confidence = model(
torch.from_numpy(audio_float32), 16000).item()
speaking = new_confidence > 0.5
if speaking:
@@ -320,8 +324,8 @@ class BaseTransportService:
):
if self._loop:
asyncio.run_coroutine_threadsafe(
self.receive_queue.put(UserStartedSpeakingFrame()), self._loop
)
self.receive_queue.put(
UserStartedSpeakingFrame()), self._loop)
# self.interrupt()
self._vad_state = VADState.SPEAKING
self._vad_starting_count = 0
@@ -331,8 +335,8 @@ class BaseTransportService:
):
if self._loop:
asyncio.run_coroutine_threadsafe(
self.receive_queue.put(UserStoppedSpeakingFrame()), self._loop
)
self.receive_queue.put(
UserStoppedSpeakingFrame()), self._loop)
self._vad_state = VADState.QUIET
self._vad_stopping_count = 0
@@ -370,7 +374,9 @@ class BaseTransportService:
self.receive_queue.put(frame), self._loop
)
asyncio.run_coroutine_threadsafe(self.receive_queue.put(EndFrame()), self._loop)
asyncio.run_coroutine_threadsafe(
self.receive_queue.put(
EndFrame()), self._loop)
def _set_image(self, image: bytes):
self._images = itertools.cycle([image])
@@ -378,7 +384,7 @@ class BaseTransportService:
def _set_images(self, images: list[bytes], start_frame=0):
self._images = itertools.cycle(images)
def send_app_message(self, message: Any, participantId:str|None):
def send_app_message(self, message: Any, participantId: str | None):
""" Child classes should override this to send a custom message to the room. """
pass
@@ -401,17 +407,18 @@ class BaseTransportService:
largest_write_size = 8000
while True:
try:
frames_or_frame: Frame | list[Frame] = self._threadsafe_send_queue.get()
frames_or_frame: Frame | list[Frame] = self._threadsafe_send_queue.get(
)
if (
isinstance(frames_or_frame, AudioFrame)
and len(frames_or_frame.data) > largest_write_size
):
# subdivide large audio frames to enable interruption
frames = []
for i in range(0, len(frames_or_frame.data), largest_write_size):
frames.append(
AudioFrame(frames_or_frame.data[i : i + largest_write_size])
)
for i in range(0, len(frames_or_frame.data),
largest_write_size):
frames.append(AudioFrame(
frames_or_frame.data[i: i + largest_write_size]))
elif isinstance(frames_or_frame, Frame):
frames: list[Frame] = [frames_or_frame]
elif isinstance(frames_or_frame, list):
@@ -430,7 +437,8 @@ class BaseTransportService:
)
return
# if interrupted, we just pull frames off the queue and discard them
# if interrupted, we just pull frames off the queue and
# discard them
if not self._is_interrupted.is_set():
if frame:
if isinstance(frame, AudioFrame):
@@ -441,14 +449,16 @@ class BaseTransportService:
len(b) % smallest_write_size
)
if truncated_length:
self.write_frame_to_mic(bytes(b[:truncated_length]))
self.write_frame_to_mic(
bytes(b[:truncated_length]))
b = b[truncated_length:]
elif isinstance(frame, ImageFrame):
self._set_image(frame.image)
elif isinstance(frame, SpriteFrame):
self._set_images(frame.images)
elif isinstance(frame, SendAppMessageFrame):
self.send_app_message(frame.message, frame.participantId)
self.send_app_message(
frame.message, frame.participantId)
elif len(b):
self.write_frame_to_mic(bytes(b))
b = bytearray()
@@ -457,7 +467,8 @@ class BaseTransportService:
# can cause static in the audio stream.
if len(b):
truncated_length = len(b) - (len(b) % 160)
self.write_frame_to_mic(bytes(b[:truncated_length]))
self.write_frame_to_mic(
bytes(b[:truncated_length]))
b = bytearray()
if isinstance(frame, StartFrame):
@@ -479,5 +490,6 @@ class BaseTransportService:
b = bytearray()
except Exception as e:
self._logger.error(f"Exception in frame_consumer: {e}, {len(b)}")
self._logger.error(
f"Exception in frame_consumer: {e}, {len(b)}")
raise e

View File

@@ -48,7 +48,8 @@ class DailyTransportService(BaseTransportService, EventHandler):
start_transcription: bool = False,
**kwargs,
):
super().__init__(**kwargs) # This will call BaseTransportService.__init__ method, not EventHandler
# This will call BaseTransportService.__init__ method, not EventHandler
super().__init__(**kwargs)
self._room_url: str = room_url
self._bot_name: str = bot_name
@@ -83,9 +84,11 @@ class DailyTransportService(BaseTransportService, EventHandler):
for handler in self._event_handlers[event_name]:
if inspect.iscoroutinefunction(handler):
if self._loop:
future = asyncio.run_coroutine_threadsafe(handler(*args, **kwargs), self._loop)
future = asyncio.run_coroutine_threadsafe(
handler(*args, **kwargs), self._loop)
# wait for the coroutine to finish. This will also raise any exceptions raised by the coroutine.
# wait for the coroutine to finish. This will also
# raise any exceptions raised by the coroutine.
future.result()
else:
raise Exception(
@@ -98,7 +101,8 @@ class DailyTransportService(BaseTransportService, EventHandler):
def add_event_handler(self, event_name: str, handler):
if not event_name.startswith("on_"):
raise Exception(f"Event handler {event_name} must start with 'on_'")
raise Exception(
f"Event handler {event_name} must start with 'on_'")
methods = inspect.getmembers(self, predicate=inspect.ismethod)
if event_name not in [method[0] for method in methods]:
@@ -111,7 +115,8 @@ class DailyTransportService(BaseTransportService, EventHandler):
handler, self)]
setattr(self, event_name, partial(self._patch_method, event_name))
else:
self._event_handlers[event_name].append(types.MethodType(handler, self))
self._event_handlers[event_name].append(
types.MethodType(handler, self))
def event_handler(self, event_name: str):
def decorator(handler):
@@ -148,8 +153,7 @@ class DailyTransportService(BaseTransportService, EventHandler):
if self._camera_enabled:
self.camera: VirtualCameraDevice = Daily.create_camera_device(
"camera", width=self._camera_width, height=self._camera_height, color_format="RGB"
)
"camera", width=self._camera_width, height=self._camera_height, color_format="RGB")
if self._speaker_enabled or self._vad_enabled:
self._speaker: VirtualSpeakerDevice = Daily.create_speaker_device(
@@ -249,7 +253,7 @@ class DailyTransportService(BaseTransportService, EventHandler):
if len(self.client.participants()) < self._min_others_count + 1:
self._stop_threads.set()
def on_app_message(self, message:Any, sender:str):
def on_app_message(self, message: Any, sender: str):
if self._loop:
frame = ReceivedAppMessageFrame(message, sender)
print(frame)
@@ -265,8 +269,10 @@ class DailyTransportService(BaseTransportService, EventHandler):
elif "session_id" in message:
participantId = message["session_id"]
if self._my_participant_id and participantId != self._my_participant_id:
frame = TranscriptionQueueFrame(message["text"], participantId, message["timestamp"])
asyncio.run_coroutine_threadsafe(self.receive_queue.put(frame), self._loop)
frame = TranscriptionQueueFrame(
message["text"], participantId, message["timestamp"])
asyncio.run_coroutine_threadsafe(
self.receive_queue.put(frame), self._loop)
def on_transcription_error(self, message):
self._logger.error(f"Transcription error: {message}")

View File

@@ -25,7 +25,9 @@ class DeepgramAIService(TTSService):
self.logger.info(f"Running deepgram tts for {sentence}")
base_url = "https://api.beta.deepgram.com/v1/speak"
request_url = f"{base_url}?model={self._voice}&encoding=linear16&container=none&sample_rate={self._sample_rate}"
headers = {"authorization": f"token {self._api_key}", "Content-Type": "application/json"}
headers = {
"authorization": f"token {self._api_key}",
"Content-Type": "application/json"}
data = {"text": sentence}
async with self._aiohttp_session.post(

View File

@@ -9,7 +9,12 @@ from dailyai.services.ai_services import TTSService
class DeepgramTTSService(TTSService):
def __init__(self, *, aiohttp_session, api_key, voice="alpha-asteria-en-v2"):
def __init__(
self,
*,
aiohttp_session,
api_key,
voice="alpha-asteria-en-v2"):
super().__init__()
self._voice = voice

View File

@@ -28,7 +28,9 @@ class ElevenLabsTTSService(TTSService):
async def run_tts(self, sentence) -> AsyncGenerator[bytes, None]:
url = f"https://api.elevenlabs.io/v1/text-to-speech/{self._voice_id}/stream"
payload = {"text": sentence, "model_id": self._model}
querystring = {"output_format": "pcm_16000", "optimize_streaming_latency": 2}
querystring = {
"output_format": "pcm_16000",
"optimize_streaming_latency": 2}
headers = {
"xi-api-key": self._api_key,
"Content-Type": "application/json",

View File

@@ -33,7 +33,7 @@ class FalImageGenService(ImageGenService):
def get_image_url(sentence, size):
handler = fal.apps.submit(
"110602490-fast-sdxl",
#"fal-ai/fast-sdxl",
# "fal-ai/fast-sdxl",
arguments={"prompt": sentence},
)
for event in handler.iter_events():

View File

@@ -15,13 +15,15 @@ class LocalTransportService(BaseTransportService):
self._tk_root = kwargs.get("tk_root") or None
if self._camera_enabled and not self._tk_root:
raise ValueError("If camera is enabled, a tkinter root must be provided")
raise ValueError(
"If camera is enabled, a tkinter root must be provided")
if self._speaker_enabled:
self._speaker_buffer_pending = bytearray()
async def _write_frame_to_tkinter(self, frame: bytes):
data = f"P6 {self._camera_width} {self._camera_height} 255 ".encode() + frame
data = f"P6 {self._camera_width} {self._camera_height} 255 ".encode() + \
frame
photo = tk.PhotoImage(
width=self._camera_width,
height=self._camera_height,
@@ -29,7 +31,8 @@ class LocalTransportService(BaseTransportService):
format="PPM")
self._image_label.config(image=photo)
# This holds a reference to the photo, preventing it from being garbage collected.
# This holds a reference to the photo, preventing it from being garbage
# collected.
self._image_label.image = photo # type: ignore
def write_frame_to_camera(self, frame: bytes):
@@ -61,8 +64,13 @@ class LocalTransportService(BaseTransportService):
if self._camera_enabled:
# Start with a neutral gray background.
array = np.ones((1024, 1024, 3)) * 128
data = f"P5 {1024} {1024} 255 ".encode() + array.astype(np.uint8).tobytes()
photo = tk.PhotoImage(width=1024, height=1024, data=data, format="PPM")
data = f"P5 {1024} {1024} 255 ".encode(
) + array.astype(np.uint8).tobytes()
photo = tk.PhotoImage(
width=1024,
height=1024,
data=data,
format="PPM")
self._image_label = tk.Label(self._tk_root, image=photo)
self._image_label.pack()

View File

@@ -110,7 +110,8 @@ class BaseOpenAILLMService(LLMService):
yield LLMFunctionStartFrame(function_name=tool_call.function.name)
if tool_call.function and tool_call.function.arguments:
# Keep iterating through the response to collect all the argument fragments and
# yield a complete LLMFunctionCallFrame after run_llm_async completes
# yield a complete LLMFunctionCallFrame after run_llm_async
# completes
arguments += tool_call.function.arguments
elif chunk.choices[0].delta.content:
yield TextFrame(chunk.choices[0].delta.content)

View File

@@ -16,7 +16,8 @@ class OpenAILLMContext:
tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN
):
self.messages: List[ChatCompletionMessageParam] = messages if messages else []
self.messages: List[ChatCompletionMessageParam] = messages if messages else [
]
self.tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = tool_choice
self.tools: List[ChatCompletionToolParam] | NotGiven = tools
@@ -25,13 +26,13 @@ class OpenAILLMContext:
context = OpenAILLMContext()
for message in messages:
context.add_message({
"content":message["content"],
"role":message["role"],
"name":message["name"] if "name" in message else message["role"]
"content": message["content"],
"role": message["role"],
"name": message["name"] if "name" in message else message["role"]
})
return context
#def __deepcopy__(self, memo):
# def __deepcopy__(self, memo):
def add_message(self, message: ChatCompletionMessageParam):
self.messages.append(message)
@@ -44,9 +45,10 @@ class OpenAILLMContext:
):
self.tool_choice = tool_choice
def set_tools(self, tools:List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN):
def set_tools(
self,
tools: List[ChatCompletionToolParam] | NotGiven = NOT_GIVEN):
if tools != NOT_GIVEN and len(tools) == 0:
tools = NOT_GIVEN
self.tools = tools

View File

@@ -17,7 +17,10 @@ class CloudflareAIService(AIService):
# base endpoint, used by the others
def run(self, model, input):
response = requests.post(f"{self.api_base_url}{model}", headers=self.headers, json=input)
response = requests.post(
f"{self.api_base_url}{model}",
headers=self.headers,
json=input)
return response.json()
# https://developers.cloudflare.com/workers-ai/models/llm/
@@ -41,7 +44,8 @@ class CloudflareAIService(AIService):
# https://developers.cloudflare.com/workers-ai/models/sentiment-analysis/
def run_text_sentiment(self, sentence):
return self.run("@cf/huggingface/distilbert-sst-2-int8", {"text": sentence})
return self.run("@cf/huggingface/distilbert-sst-2-int8",
{"text": sentence})
# https://developers.cloudflare.com/workers-ai/models/image-classification/
def run_image_classification(self, image_url):

View File

@@ -10,7 +10,7 @@ from openai.types.chat import (
ChatCompletionSystemMessageParam,
)
if __name__=="__main__":
if __name__ == "__main__":
async def test_chat():
llm = AzureLLMService(
api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
@@ -19,8 +19,7 @@ if __name__=="__main__":
)
context = OpenAILLMContext()
message: ChatCompletionSystemMessageParam = ChatCompletionSystemMessageParam(
content="Please tell the world hello.", name="system", role="system"
)
content="Please tell the world hello.", name="system", role="system")
context.add_message(message)
frame = OpenAILLMContextFrame(context)
async for s in llm.process_frame(frame):

View File

@@ -9,13 +9,12 @@ from openai.types.chat import (
)
from dailyai.services.ollama_ai_services import OLLamaLLMService
if __name__=="__main__":
if __name__ == "__main__":
async def test_chat():
llm = OLLamaLLMService()
context = OpenAILLMContext()
message: ChatCompletionSystemMessageParam = ChatCompletionSystemMessageParam(
content="Please tell the world hello.", name="system", role="system"
)
content="Please tell the world hello.", name="system", role="system")
context.add_message(message)
frame = OpenAILLMContextFrame(context)
async for s in llm.process_frame(frame):

View File

@@ -18,7 +18,7 @@ if __name__ == "__main__":
tools = [
ChatCompletionToolParam(
type="function",
function= {
function={
"name": "get_current_weather",
"description": "Get the current weather",
"parameters": {
@@ -30,15 +30,17 @@ if __name__ == "__main__":
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"enum": [
"celsius",
"fahrenheit"],
"description": "The temperature unit to use. Infer this from the users location.",
},
},
"required": ["location", "format"],
"required": [
"location",
"format"],
},
}
)
]
})]
api_key = os.getenv("OPENAI_API_KEY")
@@ -70,8 +72,7 @@ if __name__ == "__main__":
)
context = OpenAILLMContext()
message: ChatCompletionSystemMessageParam = ChatCompletionSystemMessageParam(
content="Please tell the world hello.", name="system", role="system"
)
content="Please tell the world hello.", name="system", role="system")
context.add_message(message)
frame = OpenAILLMContextFrame(context)
async for s in llm.process_frame(frame):

View File

@@ -45,10 +45,9 @@ class TestDailyFrameAggregators(unittest.IsolatedAsyncioTestCase):
async def test_gated_accumulator(self):
gated_aggregator = GatedAggregator(
gate_open_fn=lambda frame: isinstance(frame, ImageFrame),
gate_close_fn=lambda frame: isinstance(frame, LLMResponseStartFrame),
start_open=False,
)
gate_open_fn=lambda frame: isinstance(
frame, ImageFrame), gate_close_fn=lambda frame: isinstance(
frame, LLMResponseStartFrame), start_open=False, )
frames = [
LLMResponseStartFrame(),
@@ -76,12 +75,14 @@ class TestDailyFrameAggregators(unittest.IsolatedAsyncioTestCase):
async def test_parallel_pipeline(self):
async def slow_add(sleep_time:float, name:str, x: str):
async def slow_add(sleep_time: float, name: str, x: str):
await asyncio.sleep(sleep_time)
return ":".join([x, name])
pipe1_annotation = StatelessTextTransformer(functools.partial(slow_add, 0.1, 'pipe1'))
pipe2_annotation = StatelessTextTransformer(functools.partial(slow_add, 0.2, 'pipe2'))
pipe1_annotation = StatelessTextTransformer(
functools.partial(slow_add, 0.1, 'pipe1'))
pipe2_annotation = StatelessTextTransformer(
functools.partial(slow_add, 0.2, 'pipe2'))
sentence_aggregator = SentenceAggregator()
add_dots = StatelessTextTransformer(lambda x: x + ".")

View File

@@ -32,7 +32,8 @@ async def main(room_url):
pipeline = Pipeline([tts])
# Register an event handler so we can play the audio when the participant joins.
# Register an event handler so we can play the audio when the
# participant joins.
@transport.event_handler("on_participant_joined")
async def on_participant_joined(transport, participant):
if participant["info"]["isLocal"]:

View File

@@ -10,6 +10,7 @@ logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
logger = logging.getLogger("dailyai")
logger.setLevel(logging.DEBUG)
async def main():
async with aiohttp.ClientSession() as session:
meeting_duration_minutes = 1

View File

@@ -33,17 +33,16 @@ async def main(room_url):
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
)
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
model="gpt-4-turbo-preview")
messages = [
{
"role": "system",
"content": "You are an LLM in a WebRTC session, and this is a 'hello world' demo. Say hello to the world.",
}
]
}]
pipeline= Pipeline([llm, tts])
pipeline = Pipeline([llm, tts])
@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport):

View File

@@ -40,7 +40,8 @@ async def main(room_url):
async def on_first_other_participant_joined(transport):
# Note that we do not put an EndFrame() item in the pipeline for this demo.
# This means that the bot will stay in the channel until it times out.
# An EndFrame() in the pipeline would cause the transport to shut down.
# An EndFrame() in the pipeline would cause the transport to shut
# down.
await pipeline.queue_frames(
[TextFrame("a cat in the style of picasso")]
)

View File

@@ -39,9 +39,8 @@ async def main():
)
image_task = asyncio.create_task(
imagegen.run_to_queue(
transport.send_queue, [TextFrame("a cat in the style of picasso")]
)
)
transport.send_queue, [
TextFrame("a cat in the style of picasso")]))
async def run_tk():
while not transport._stop_threads.is_set():

View File

@@ -49,7 +49,8 @@ async def main(room_url: str):
voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
)
messages = [{"role": "system", "content": "tell the user a joke about llamas"}]
messages = [{"role": "system",
"content": "tell the user a joke about llamas"}]
# Start a task to run the LLM to create a joke, and convert the LLM output to audio frames. This task
# will run in parallel with generating and speaking the audio for static text, so there's no delay to
@@ -65,7 +66,8 @@ async def main(room_url: str):
]
)
merge_pipeline = SequentialMergePipeline([simple_tts_pipeline, llm_pipeline])
merge_pipeline = SequentialMergePipeline(
[simple_tts_pipeline, llm_pipeline])
await asyncio.gather(
transport.run(merge_pipeline),

View File

@@ -79,8 +79,8 @@ async def main(room_url):
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
)
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
model="gpt-4-turbo-preview")
imagegen = FalImageGenService(
image_size="square_hd",
@@ -90,10 +90,9 @@ async def main(room_url):
)
gated_aggregator = GatedAggregator(
gate_open_fn=lambda frame: isinstance(frame, ImageFrame),
gate_close_fn=lambda frame: isinstance(frame, LLMResponseStartFrame),
start_open=False,
)
gate_open_fn=lambda frame: isinstance(
frame, ImageFrame), gate_close_fn=lambda frame: isinstance(
frame, LLMResponseStartFrame), start_open=False, )
sentence_aggregator = SentenceAggregator()
month_prepender = MonthPrepender()

View File

@@ -38,8 +38,8 @@ async def main(room_url):
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
)
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
model="gpt-4-turbo-preview")
dalle = FalImageGenService(
image_size="1024x1024",
@@ -49,7 +49,8 @@ async def main(room_url):
)
# Get a complete audio chunk from the given text. Splitting this into its own
# coroutine lets us ensure proper ordering of the audio chunks on the send queue.
# coroutine lets us ensure proper ordering of the audio chunks on the
# send queue.
async def get_all_audio(text):
all_audio = bytearray()
async for audio in tts.run_tts(text):
@@ -71,7 +72,8 @@ async def main(room_url):
to_speak = f"{month}: {image_description}"
audio_task = asyncio.create_task(get_all_audio(to_speak))
image_task = asyncio.create_task(dalle.run_image_gen(image_description))
image_task = asyncio.create_task(
dalle.run_image_gen(image_description))
(audio, image_data) = await asyncio.gather(audio_task, image_task)
return {
@@ -100,7 +102,8 @@ async def main(room_url):
async def show_images():
# This will play the months in the order they're completed. The benefit
# is we'll have as little delay as possible before the first month, and
# likely no delay between months, but the months won't display in order.
# likely no delay between months, but the months won't display in
# order.
for month_data_task in asyncio.as_completed(month_tasks):
data = await month_data_task
if data:
@@ -122,7 +125,9 @@ async def main(room_url):
tk_root.update_idletasks()
await asyncio.sleep(0.1)
month_tasks = [asyncio.create_task(get_month_data(month)) for month in months]
month_tasks = [
asyncio.create_task(
get_month_data(month)) for month in months]
await asyncio.gather(transport.run(), show_images(), run_tk())
@@ -130,8 +135,11 @@ async def main(room_url):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simple Daily Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=True, help="URL of the Daily room to join"
)
"-u",
"--url",
type=str,
required=True,
help="URL of the Daily room to join")
args, unknown = parser.parse_known_args()

View File

@@ -41,8 +41,8 @@ async def main(room_url: str, token):
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
)
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
model="gpt-4-turbo-preview")
fl = FrameLogger("Inner")
fl2 = FrameLogger("Outer")
messages = [
@@ -52,7 +52,8 @@ async def main(room_url: str, token):
},
]
tma_in = LLMUserContextAggregator(messages, transport._my_participant_id)
tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
@@ -70,7 +71,8 @@ async def main(room_url: str, token):
@transport.event_handler("on_first_other_participant_joined")
async def on_first_other_participant_joined(transport):
# Kick off the conversation.
messages.append({"role": "system", "content": "Please introduce yourself to the user."})
messages.append(
{"role": "system", "content": "Please introduce yourself to the user."})
await pipeline.queue_frames([LLMMessagesQueueFrame(messages)])
transport.transcription_settings["extra"]["endpointing"] = True

View File

@@ -61,8 +61,8 @@ async def main(room_url: str, token):
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
)
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
model="gpt-4-turbo-preview")
img = FalImageGenService(
image_size="1024x1024",
@@ -97,14 +97,15 @@ async def main(room_url: str, token):
},
]
tma_in = LLMUserContextAggregator(messages, transport._my_participant_id)
tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)
image_sync_aggregator = ImageSyncAggregator(
os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
os.path.join(os.path.dirname(__file__), "assets", "waiting.png"),
)
os.path.join(
os.path.dirname(__file__), "assets", "speaking.png"), os.path.join(
os.path.dirname(__file__), "assets", "waiting.png"), )
await tts.run_to_queue(
transport.send_queue,
image_sync_aggregator.run(

View File

@@ -42,8 +42,8 @@ async def main(room_url: str, token):
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
)
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
model="gpt-4-turbo-preview")
pipeline = Pipeline([FrameLogger(), llm, FrameLogger(), tts])

View File

@@ -56,7 +56,8 @@ talking_list = [sprites["sc-default.png"], sprites["sc-talk.png"]]
talking = [random.choice(talking_list) for x in range(30)]
talking_frame = SpriteFrame(images=talking)
# TODO: Support "thinking" as soon as we get a valid transcript, while LLM is processing
# TODO: Support "thinking" as soon as we get a valid transcript, while LLM
# is processing
thinking_list = [
sprites["sc-think-1.png"],
sprites["sc-think-2.png"],
@@ -130,8 +131,8 @@ async def main(room_url: str, token):
transport._camera_height = 1280
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
)
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
model="gpt-4-turbo-preview")
tts = ElevenLabsTTSService(
aiohttp_session=session,
@@ -155,7 +156,8 @@ async def main(room_url: str, token):
},
]
tma_in = LLMUserContextAggregator(messages, transport._my_participant_id)
tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)

View File

@@ -81,8 +81,8 @@ async def main(room_url: str, token):
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
)
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
model="gpt-4-turbo-preview")
tts = ElevenLabsTTSService(
aiohttp_session=session,
@@ -103,7 +103,8 @@ async def main(room_url: str, token):
},
]
tma_in = LLMUserContextAggregator(messages, transport._my_participant_id)
tma_in = LLMUserContextAggregator(
messages, transport._my_participant_id)
tma_out = LLMAssistantContextAggregator(
messages, transport._my_participant_id
)

View File

@@ -57,8 +57,11 @@ async def main(room_url: str):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simple Daily Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=True, help="URL of the Daily room to join"
)
"-u",
"--url",
type=str,
required=True,
help="URL of the Daily room to join")
args, unknown = parser.parse_known_args()
asyncio.run(main(args.url))

View File

@@ -45,14 +45,17 @@ async def main(room_url: str, token):
print(f"finder: {finder}")
if finder >= 0:
async for audio in tts.run_tts(f"Resetting."):
transport.output_queue.put(Frame(FrameType.AUDIO_FRAME, audio))
transport.output_queue.put(
Frame(FrameType.AUDIO_FRAME, audio))
sentence = ""
continue
# todo: we could differentiate between transcriptions from different participants
# todo: we could differentiate between transcriptions from
# different participants
sentence += f" {message['text']}"
print(f"sentence is now: {sentence}")
# TODO: Cache this audio
phrase = random.choice(["OK.", "Got it.", "Sure.", "You bet.", "Sure thing."])
phrase = random.choice(
["OK.", "Got it.", "Sure.", "You bet.", "Sure thing."])
async for audio in tts.run_tts(phrase):
transport.output_queue.put(Frame(FrameType.AUDIO_FRAME, audio))
img_result = img.run_image_gen(sentence, "1024x1024")
@@ -82,8 +85,11 @@ async def main(room_url: str, token):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simple Daily Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=True, help="URL of the Daily room to join"
)
"-u",
"--url",
type=str,
required=True,
help="URL of the Daily room to join")
parser.add_argument(
"-k",
"--apikey",
@@ -94,20 +100,25 @@ if __name__ == "__main__":
args, unknown = parser.parse_known_args()
# Create a meeting token for the given room with an expiration 1 hour in the future.
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
room_name: str = urllib.parse.urlparse(args.url).path[1:]
expiration: float = time.time() + 60 * 60
res: requests.Response = requests.post(
f"https://api.daily.co/v1/meeting-tokens",
headers={"Authorization": f"Bearer {args.apikey}"},
headers={
"Authorization": f"Bearer {args.apikey}"},
json={
"properties": {"room_name": room_name, "is_owner": True, "exp": expiration}
},
"properties": {
"room_name": room_name,
"is_owner": True,
"exp": expiration}},
)
if res.status_code != 200:
raise Exception(f"Failed to create meeting token: {res.status_code} {res.text}")
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
token: str = res.json()["token"]

View File

@@ -24,7 +24,8 @@ def get_meeting_token(room_name, daily_api_key, token_expiry):
'is_owner': True,
'exp': token_expiry}})
if res.status_code != 200:
return jsonify({'error': 'Unable to create meeting token', 'detail': res.text}), 500
return jsonify(
{'error': 'Unable to create meeting token', 'detail': res.text}), 500
meeting_token = res.json()['token']
return meeting_token

View File

@@ -14,14 +14,16 @@ load_dotenv()
app = Flask(__name__)
CORS(app)
print(f"I loaded an environment, and my FAL_KEY_ID is {os.getenv('FAL_KEY_ID')}")
print(
f"I loaded an environment, and my FAL_KEY_ID is {os.getenv('FAL_KEY_ID')}")
def start_bot(bot_path, args=None):
daily_api_key = os.getenv("DAILY_API_KEY")
api_path = os.getenv("DAILY_API_PATH") or "https://api.daily.co/v1"
timeout = int(os.getenv("DAILY_ROOM_TIMEOUT") or os.getenv("DAILY_BOT_MAX_DURATION") or 300)
timeout = int(os.getenv("DAILY_ROOM_TIMEOUT")
or os.getenv("DAILY_BOT_MAX_DURATION") or 300)
exp = time.time() + timeout
res = requests.post(
f"{api_path}/rooms",
@@ -59,14 +61,13 @@ def start_bot(bot_path, args=None):
extra_args = ""
proc = subprocess.Popen(
[
f"python {bot_path} -u {room_url} -t {meeting_token} -k {daily_api_key} {extra_args}"
],
[f"python {bot_path} -u {room_url} -t {meeting_token} -k {daily_api_key} {extra_args}"],
shell=True,
bufsize=1,
)
# Don't return until the bot has joined the room, but wait for at most 2 seconds.
# Don't return until the bot has joined the room, but wait for at most 2
# seconds.
attempts = 0
while attempts < 20:
time.sleep(0.1)
@@ -82,11 +83,13 @@ def start_bot(bot_path, args=None):
# Additional client config
config = {}
if os.getenv("CLIENT_VAD_TIMEOUT_SEC"):
config['vad_timeout_sec'] = float(os.getenv("DAILY_CLIENT_VAD_TIMEOUT_SEC"))
config['vad_timeout_sec'] = float(
os.getenv("DAILY_CLIENT_VAD_TIMEOUT_SEC"))
else:
config['vad_timeout_sec'] = 1.5
# return jsonify({"room_url": room_url, "token": meeting_token, "config": config}), 200
# return jsonify({"room_url": room_url, "token": meeting_token, "config":
# config}), 200
return redirect(room_url, code=301)

View File

@@ -109,8 +109,8 @@ async def main(room_url: str, token):
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
)
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
model="gpt-4-turbo-preview")
ta = TalkingAnimation()
ai = AnimationInitializer()

View File

@@ -61,161 +61,101 @@ for file in sound_files:
sounds[file] = audio_file.readframes(-1)
steps = [
{
"prompt": "Start by introducing yourself. Then, ask the user to confirm their identity by telling you their birthday, including the year. When they answer with their birthday, call the verify_birthday function.",
"run_async": False,
"failed": "The user provided an incorrect birthday. Ask them for their birthday again. When they answer, call the verify_birthday function.",
"tools": [
{
"type": "function",
"function": {
"name": "verify_birthday",
"description": "Use this function to verify the user has provided their correct birthday.",
"parameters": {
"type": "object",
"properties": {
"birthday": {
"type": "string",
"description": "The user's birthdate, including the year. The user can provide it in any format, but convert it to YYYY-MM-DD format to call this function.",
}
},
},
},
}
],
},
{
"prompt": "Next, thank the user for confirming their identity, then ask the user to list their current prescriptions. Each prescription needs to have a medication name and a dosage. Do not call the list_prescriptions function with any unknown dosages.",
"run_async": True,
"tools": [
{
"type": "function",
"function": {
"name": "list_prescriptions",
"description": "Once the user has provided a list of their prescription medications, call this function.",
"parameters": {
"type": "object",
"properties": {
"prescriptions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"medication": {
"type": "string",
"description": "The medication's name",
},
"dosage": {
"type": "string",
"description": "The prescription's dosage",
},
},
},
}
},
},
},
}
],
},
{
"prompt": "Next, ask the user if they have any allergies. Once they have listed their allergies or confirmed they don't have any, call the list_allergies function.",
"run_async": True,
"tools": [
{
"type": "function",
"function": {
"name": "list_allergies",
"description": "Once the user has provided a list of their allergies, call this function.",
"parameters": {
"type": "object",
"properties": {
"allergies": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "What the user is allergic to",
}
},
},
}
},
},
},
}
],
},
{
"prompt": "Now ask the user if they have any medical conditions the doctor should know about. Once they've answered the question, call the list_conditions function.",
"run_async": True,
"tools": [
{
"type": "function",
"function": {
"name": "list_conditions",
"description": "Once the user has provided a list of their medical conditions, call this function.",
"parameters": {
"type": "object",
"properties": {
"conditions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The user's medical condition",
}
},
},
}
},
},
},
},
],
},
{
"prompt": "Finally, ask the user the reason for their doctor visit today. Once they answer, call the list_visit_reasons function.",
"run_async": True,
"tools": [
{
"type": "function",
"function": {
"name": "list_visit_reasons",
"description": "Once the user has provided a list of the reasons they are visiting a doctor today, call this function.",
"parameters": {
"type": "object",
"properties": {
"visit_reasons": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "The user's reason for visiting the doctor",
}
},
},
}
},
},
},
}
],
},
{
"prompt": "Now, thank the user and end the conversation.",
"run_async": True,
"tools": [],
},
{"prompt": "", "run_async": True, "tools": []},
]
steps = [{"prompt": "Start by introducing yourself. Then, ask the user to confirm their identity by telling you their birthday, including the year. When they answer with their birthday, call the verify_birthday function.",
"run_async": False,
"failed": "The user provided an incorrect birthday. Ask them for their birthday again. When they answer, call the verify_birthday function.",
"tools": [{"type": "function",
"function": {"name": "verify_birthday",
"description": "Use this function to verify the user has provided their correct birthday.",
"parameters": {"type": "object",
"properties": {"birthday": {"type": "string",
"description": "The user's birthdate, including the year. The user can provide it in any format, but convert it to YYYY-MM-DD format to call this function.",
}},
},
},
}],
},
{"prompt": "Next, thank the user for confirming their identity, then ask the user to list their current prescriptions. Each prescription needs to have a medication name and a dosage. Do not call the list_prescriptions function with any unknown dosages.",
"run_async": True,
"tools": [{"type": "function",
"function": {"name": "list_prescriptions",
"description": "Once the user has provided a list of their prescription medications, call this function.",
"parameters": {"type": "object",
"properties": {"prescriptions": {"type": "array",
"items": {"type": "object",
"properties": {"medication": {"type": "string",
"description": "The medication's name",
},
"dosage": {"type": "string",
"description": "The prescription's dosage",
},
},
},
}},
},
},
}],
},
{"prompt": "Next, ask the user if they have any allergies. Once they have listed their allergies or confirmed they don't have any, call the list_allergies function.",
"run_async": True,
"tools": [{"type": "function",
"function": {"name": "list_allergies",
"description": "Once the user has provided a list of their allergies, call this function.",
"parameters": {"type": "object",
"properties": {"allergies": {"type": "array",
"items": {"type": "object",
"properties": {"name": {"type": "string",
"description": "What the user is allergic to",
}},
},
}},
},
},
}],
},
{"prompt": "Now ask the user if they have any medical conditions the doctor should know about. Once they've answered the question, call the list_conditions function.",
"run_async": True,
"tools": [{"type": "function",
"function": {"name": "list_conditions",
"description": "Once the user has provided a list of their medical conditions, call this function.",
"parameters": {"type": "object",
"properties": {"conditions": {"type": "array",
"items": {"type": "object",
"properties": {"name": {"type": "string",
"description": "The user's medical condition",
}},
},
}},
},
},
},
],
},
{"prompt": "Finally, ask the user the reason for their doctor visit today. Once they answer, call the list_visit_reasons function.",
"run_async": True,
"tools": [{"type": "function",
"function": {"name": "list_visit_reasons",
"description": "Once the user has provided a list of the reasons they are visiting a doctor today, call this function.",
"parameters": {"type": "object",
"properties": {"visit_reasons": {"type": "array",
"items": {"type": "object",
"properties": {"name": {"type": "string",
"description": "The user's reason for visiting the doctor",
}},
},
}},
},
},
}],
},
{"prompt": "Now, thank the user and end the conversation.",
"run_async": True,
"tools": [],
},
{"prompt": "",
"run_async": True,
"tools": []},
]
current_step = 0
@@ -299,10 +239,12 @@ class ChecklistProcessor(AIService):
elif isinstance(frame, LLMFunctionCallFrame):
if frame.function_name and frame.arguments:
print(f"--> Calling function: {frame.function_name} with arguments:")
print(
f"--> Calling function: {frame.function_name} with arguments:")
pretty_json = re.sub(
"\n", "\n ", json.dumps(json.loads(frame.arguments), indent=2)
)
"\n", "\n ", json.dumps(
json.loads(
frame.arguments), indent=2))
print(f"--> {pretty_json}\n")
if frame.function_name not in self._functions:
raise Exception(

View File

@@ -146,7 +146,8 @@ class StoryProcessor(FrameProcessor):
self._story.append(self._text)
yield StoryPageFrame(self._text)
else:
# After the prompt thing, we'll catch an LLM end to get the last bit
# After the prompt thing, we'll catch an LLM end to get the
# last bit
pass
elif isinstance(frame, LLMResponseEndFrame):
yield ImageFrame(None, images["grandma-writing.png"])
@@ -251,7 +252,8 @@ async def main(room_url: str, token):
}
]
lca = LLMAssistantContextAggregator(messages)
local_pipeline = Pipeline([llm, lca, tts], sink=transport.send_queue)
local_pipeline = Pipeline(
[llm, lca, tts], sink=transport.send_queue)
await local_pipeline.queue_frames(
[
ImageFrame(None, images["grandma-listening.png"]),

View File

@@ -30,7 +30,8 @@ It also isn't saving what the user or bot says into the context object for use i
"""
# We need to use a custom service here to yield LLM frames without saving any context
# We need to use a custom service here to yield LLM frames without saving
# any context
class TranslationProcessor(FrameProcessor):
def __init__(self, language):
self._language = language
@@ -68,8 +69,8 @@ async def main(room_url: str, token):
voice="es-ES-AlvaroNeural",
)
llm = OpenAILLMService(
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"), model="gpt-4-turbo-preview"
)
api_key=os.getenv("OPENAI_CHATGPT_API_KEY"),
model="gpt-4-turbo-preview")
sa = SentenceAggregator()
tp = TranslationProcessor("Spanish")
pipeline = Pipeline([sa, tp, llm, tts])

View File

@@ -11,8 +11,11 @@ load_dotenv()
def configure():
parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
parser.add_argument(
"-u", "--url", type=str, required=False, help="URL of the Daily room to join"
)
"-u",
"--url",
type=str,
required=False,
help="URL of the Daily room to join")
parser.add_argument(
"-k",
"--apikey",
@@ -33,20 +36,25 @@ def configure():
if not key:
raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
# Create a meeting token for the given room with an expiration 1 hour in the future.
# Create a meeting token for the given room with an expiration 1 hour in
# the future.
room_name: str = urllib.parse.urlparse(url).path[1:]
expiration: float = time.time() + 60 * 60
res: requests.Response = requests.post(
f"https://api.daily.co/v1/meeting-tokens",
headers={"Authorization": f"Bearer {key}"},
headers={
"Authorization": f"Bearer {key}"},
json={
"properties": {"room_name": room_name, "is_owner": True, "exp": expiration}
},
"properties": {
"room_name": room_name,
"is_owner": True,
"exp": expiration}},
)
if res.status_code != 200:
raise Exception(f"Failed to create meeting token: {res.status_code} {res.text}")
raise Exception(
f"Failed to create meeting token: {res.status_code} {res.text}")
token: str = res.json()["token"]