diff --git a/examples/foundational/19-openai-realtime.py b/examples/foundational/19-openai-realtime.py index 0d59782f1..d10fbc129 100644 --- a/examples/foundational/19-openai-realtime.py +++ b/examples/foundational/19-openai-realtime.py @@ -37,7 +37,10 @@ from pipecat.services.openai.realtime.events import ( SemanticTurnDetection, SessionProperties, ) -from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService +from pipecat.services.openai.realtime.llm import ( + OpenAIRealtimeLLMService, + OpenAIRealtimeLLMSettings, +) from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -137,22 +140,10 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") - session_properties = SessionProperties( - audio=AudioConfiguration( - input=AudioInput( - transcription=InputAudioTranscription(), - # Set openai TurnDetection parameters. Not setting this at all will turn it - # on by default - turn_detection=SemanticTurnDetection(), - # Or set to False to disable openai turn detection and use transport VAD - # turn_detection=False, - noise_reduction=InputAudioNoiseReduction(type="near_field"), - ) - ), - # In this example we provide tools through the context, but you could - # alternatively provide them here. - # tools=tools, - instructions="""You are a helpful and friendly AI. + llm = OpenAIRealtimeLLMService( + api_key=os.getenv("OPENAI_API_KEY"), + settings=OpenAIRealtimeLLMSettings( + system_instruction="""You are a helpful and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and @@ -166,11 +157,23 @@ You are participating in a voice conversation. Keep your responses concise, shor unless specifically asked to elaborate on a topic. Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""", - ) - - llm = OpenAIRealtimeLLMService( - api_key=os.getenv("OPENAI_API_KEY"), - session_properties=session_properties, + session_properties=SessionProperties( + audio=AudioConfiguration( + input=AudioInput( + transcription=InputAudioTranscription(), + # Set openai TurnDetection parameters. Not setting this at all will turn it + # on by default + turn_detection=SemanticTurnDetection(), + # Or set to False to disable openai turn detection and use transport VAD + # turn_detection=False, + noise_reduction=InputAudioNoiseReduction(type="near_field"), + ) + ), + # In this example we provide tools through the context, but you could + # alternatively provide them here. + # tools=tools, + ), + ), ) # you can either register a single function for all function calls, or specific functions diff --git a/examples/foundational/19a-azure-realtime.py b/examples/foundational/19a-azure-realtime.py index 6883a1f09..8d52a1c12 100644 --- a/examples/foundational/19a-azure-realtime.py +++ b/examples/foundational/19a-azure-realtime.py @@ -30,6 +30,7 @@ from pipecat.services.openai.realtime.events import ( InputAudioTranscription, SessionProperties, ) +from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMSettings from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -111,19 +112,11 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") - session_properties = SessionProperties( - audio=AudioConfiguration( - input=AudioInput( - transcription=InputAudioTranscription(model="whisper-1"), - # Set openai TurnDetection parameters. Not setting this at all will turn it - # on by default - # turn_detection=TurnDetection(silence_duration_ms=1000), - # Or set to False to disable openai turn detection and use transport VAD - # turn_detection=False, - ) - ), - # tools=tools, - instructions="""You are a helpful and friendly AI. + llm = AzureRealtimeLLMService( + api_key=os.getenv("AZURE_REALTIME_API_KEY"), + base_url=os.getenv("AZURE_REALTIME_BASE_URL"), + settings=OpenAIRealtimeLLMSettings( + system_instruction="""You are a helpful and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and @@ -141,12 +134,20 @@ You have access to the following tools: - get_restaurant_recommendation: Get a restaurant recommendation for a given location. Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""", - ) - - llm = AzureRealtimeLLMService( - api_key=os.getenv("AZURE_REALTIME_API_KEY"), - base_url=os.getenv("AZURE_REALTIME_BASE_URL"), - session_properties=session_properties, + session_properties=SessionProperties( + audio=AudioConfiguration( + input=AudioInput( + transcription=InputAudioTranscription(model="whisper-1"), + # Set openai TurnDetection parameters. Not setting this at all will turn it + # on by default + # turn_detection=TurnDetection(silence_duration_ms=1000), + # Or set to False to disable openai turn detection and use transport VAD + # turn_detection=False, + ) + ), + # tools=tools, + ), + ), ) # you can either register a single function for all function calls, or specific functions diff --git a/examples/foundational/19b-openai-realtime-text.py b/examples/foundational/19b-openai-realtime-text.py index 98dc81c3b..e03381642 100644 --- a/examples/foundational/19b-openai-realtime-text.py +++ b/examples/foundational/19b-openai-realtime-text.py @@ -32,7 +32,10 @@ from pipecat.services.openai.realtime.events import ( SemanticTurnDetection, SessionProperties, ) -from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService +from pipecat.services.openai.realtime.llm import ( + OpenAIRealtimeLLMService, + OpenAIRealtimeLLMSettings, +) from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -113,21 +116,10 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") - session_properties = SessionProperties( - audio=AudioConfiguration( - input=AudioInput( - transcription=InputAudioTranscription(), - # Set openai TurnDetection parameters. Not setting this at all will turn it - # on by default - turn_detection=SemanticTurnDetection(), - # Or set to False to disable openai turn detection and use transport VAD - # turn_detection=False, - noise_reduction=InputAudioNoiseReduction(type="near_field"), - ) - ), - output_modalities=["text"], - # tools=tools, - instructions="""You are a helpful and friendly AI. + llm = OpenAIRealtimeLLMService( + api_key=os.getenv("OPENAI_API_KEY"), + settings=OpenAIRealtimeLLMSettings( + system_instruction="""You are a helpful and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and @@ -145,11 +137,22 @@ You have access to the following tools: - get_restaurant_recommendation: Get a restaurant recommendation for a given location. Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""", - ) - - llm = OpenAIRealtimeLLMService( - api_key=os.getenv("OPENAI_API_KEY"), - session_properties=session_properties, + session_properties=SessionProperties( + audio=AudioConfiguration( + input=AudioInput( + transcription=InputAudioTranscription(), + # Set openai TurnDetection parameters. Not setting this at all will turn it + # on by default + turn_detection=SemanticTurnDetection(), + # Or set to False to disable openai turn detection and use transport VAD + # turn_detection=False, + noise_reduction=InputAudioNoiseReduction(type="near_field"), + ) + ), + output_modalities=["text"], + # tools=tools, + ), + ), ) tts = CartesiaTTSService( diff --git a/examples/foundational/19c-openai-realtime-live-video.py b/examples/foundational/19c-openai-realtime-live-video.py index 3f091f712..31088ff0f 100644 --- a/examples/foundational/19c-openai-realtime-live-video.py +++ b/examples/foundational/19c-openai-realtime-live-video.py @@ -32,7 +32,10 @@ from pipecat.services.openai.realtime.events import ( SemanticTurnDetection, SessionProperties, ) -from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService +from pipecat.services.openai.realtime.llm import ( + OpenAIRealtimeLLMService, + OpenAIRealtimeLLMSettings, +) from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams @@ -60,22 +63,10 @@ transport_params = { async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): logger.info(f"Starting bot") - session_properties = SessionProperties( - audio=AudioConfiguration( - input=AudioInput( - transcription=InputAudioTranscription(), - # Set openai TurnDetection parameters. Not setting this at all will turn it - # on by default - turn_detection=SemanticTurnDetection(), - # Or set to False to disable openai turn detection and use transport VAD - # turn_detection=False, - noise_reduction=InputAudioNoiseReduction(type="near_field"), - ) - ), - # In this example we provide tools through the context, but you could - # alternatively provide them here. - # tools=tools, - instructions="""You are a helpful and friendly AI. + llm = OpenAIRealtimeLLMService( + api_key=os.getenv("OPENAI_API_KEY"), + settings=OpenAIRealtimeLLMSettings( + system_instruction="""You are a helpful and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and @@ -89,11 +80,23 @@ You are participating in a voice conversation. Keep your responses concise, shor unless specifically asked to elaborate on a topic. Remember, your responses should be short. Just one or two sentences, usually. Respond in English.""", - ) - - llm = OpenAIRealtimeLLMService( - api_key=os.getenv("OPENAI_API_KEY"), - session_properties=session_properties, + session_properties=SessionProperties( + audio=AudioConfiguration( + input=AudioInput( + transcription=InputAudioTranscription(), + # Set openai TurnDetection parameters. Not setting this at all will turn it + # on by default + turn_detection=SemanticTurnDetection(), + # Or set to False to disable openai turn detection and use transport VAD + # turn_detection=False, + noise_reduction=InputAudioNoiseReduction(type="near_field"), + ) + ), + # In this example we provide tools through the context, but you could + # alternatively provide them here. + # tools=tools, + ), + ), ) # Create a standard OpenAI LLM context object using the normal messages format. The diff --git a/examples/foundational/20b-persistent-context-openai-realtime.py b/examples/foundational/20b-persistent-context-openai-realtime.py index a24ff39e5..b85f8c319 100644 --- a/examples/foundational/20b-persistent-context-openai-realtime.py +++ b/examples/foundational/20b-persistent-context-openai-realtime.py @@ -33,7 +33,10 @@ from pipecat.services.openai.realtime.events import ( SessionProperties, TurnDetection, ) -from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService +from pipecat.services.openai.realtime.llm import ( + OpenAIRealtimeLLMService, + OpenAIRealtimeLLMSettings, +) from pipecat.transports.base_transport import BaseTransport, TransportParams from pipecat.transports.daily.transport import DailyParams from pipecat.transports.websocket.fastapi import FastAPIWebsocketParams @@ -173,19 +176,10 @@ async def run_bot(transport: BaseTransport, runner_args: RunnerArguments): stt = DeepgramSTTService(api_key=os.getenv("DEEPGRAM_API_KEY")) - session_properties = SessionProperties( - audio=AudioConfiguration( - input=AudioInput( - transcription=InputAudioTranscription(), - # Set openai TurnDetection parameters. Not setting this at all will turn it - # on by default - turn_detection=TurnDetection(silence_duration_ms=1000), - # Or set to False to disable openai turn detection and use transport VAD - # turn_detection=False, - ) - ), - # tools=tools, - instructions="""Your knowledge cutoff is 2023-10. You are a helpful and friendly AI. + llm = OpenAIRealtimeLLMService( + api_key=os.getenv("OPENAI_API_KEY"), + settings=OpenAIRealtimeLLMSettings( + system_instruction="""Your knowledge cutoff is 2023-10. You are a helpful and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and @@ -199,11 +193,20 @@ You are participating in a voice conversation. Keep your responses concise, shor unless specifically asked to elaborate on a topic. Remember, your responses should be short. Just one or two sentences, usually.""", - ) - - llm = OpenAIRealtimeLLMService( - api_key=os.getenv("OPENAI_API_KEY"), - session_properties=session_properties, + session_properties=SessionProperties( + audio=AudioConfiguration( + input=AudioInput( + transcription=InputAudioTranscription(), + # Set openai TurnDetection parameters. Not setting this at all will turn it + # on by default + turn_detection=TurnDetection(silence_duration_ms=1000), + # Or set to False to disable openai turn detection and use transport VAD + # turn_detection=False, + ) + ), + # tools=tools, + ), + ), ) # you can either register a single function for all function calls, or specific functions