Formatting adjusted and the encoding selection moved from TelnyFrameSerilaizer to websocket_endpoint function in server.py

This commit is contained in:
Rafal Skorski
2025-01-30 12:52:30 +01:00
parent 9c22bd8df1
commit b93e4ab9cb
5 changed files with 24 additions and 22 deletions

View File

@@ -76,7 +76,7 @@ This project is a FastAPI-based chatbot that integrates with Telnyx to handle We
```
- In `templates/streams.xml`, replace `<your server url>` with your ngrok URL (without `https://`)
- The final URL should look like: `wss://abc123.ngrok.io/ws`
- The encoding (`bidirectionalCodec`) should be `PCMU` or `PCMA` depending on your needs. Based on selected encoding, set the outbound_encoding in the `TelnyxFrameSerializer` in `src/pipecat/serializers/telnyx.py`
- The encoding (`bidirectionalCodec`) should be `PCMU` or `PCMA` depending on your needs. Based on selected encoding, set the outbound_encoding in `server.py` when the bot is initialized.
- The inbound encoding can be controlled from the application configuration for inbound calls and dial/transfer commands for outbound calls.
## Running the Application

View File

@@ -34,7 +34,7 @@ logger.remove(0)
logger.add(sys.stderr, level="DEBUG")
async def run_bot(websocket_client, stream_id, encoding):
async def run_bot(websocket_client, stream_id, outbound_encoding, inbound_encoding):
transport = FastAPIWebsocketTransport(
websocket=websocket_client,
params=FastAPIWebsocketParams(
@@ -43,7 +43,7 @@ async def run_bot(websocket_client, stream_id, encoding):
vad_enabled=True,
vad_analyzer=SileroVADAnalyzer(),
vad_audio_passthrough=True,
serializer=TelnyxFrameSerializer(stream_id, encoding),
serializer=TelnyxFrameSerializer(stream_id, outbound_encoding, inbound_encoding),
),
)
@@ -55,9 +55,7 @@ async def run_bot(websocket_client, stream_id, encoding):
api_key=os.getenv("ELEVENLABS_API_KEY"),
voice_id="CwhRBWXzGAHq8TQ4Fs17",
output_format="pcm_24000",
params=ElevenLabsTTSService.InputParams(
language=Language.EN
)
params=ElevenLabsTTSService.InputParams(language=Language.EN),
)
messages = [

View File

@@ -16,13 +16,11 @@ app.add_middleware(
allow_headers=["*"],
)
@app.post("/")
async def start_call():
print("POST TeXML")
return HTMLResponse(content=open("templates/streams.xml").read(), media_type="application/xml")
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
@@ -31,10 +29,9 @@ async def websocket_endpoint(websocket: WebSocket):
call_data = json.loads(await start_data.__anext__())
print(call_data, flush=True)
stream_id = call_data["stream_id"]
encoding = call_data["start"]["media_format"]["encoding"]
outbound_encoding = call_data["start"]["media_format"]["encoding"]
print("WebSocket connection accepted")
await run_bot(websocket, stream_id, encoding)
await run_bot(websocket, stream_id, outbound_encoding, "PCMU")
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8765)

View File

@@ -84,6 +84,7 @@ def ulaw_to_pcm(ulaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
return out_pcm_bytes
def pcm_to_ulaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
# Resample
in_pcm_bytes = resample_audio(pcm_bytes, in_sample_rate, out_sample_rate)
@@ -93,8 +94,8 @@ def pcm_to_ulaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
return ulaw_bytes
def alaw_to_pcm(alaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int) -> bytes:
def alaw_to_pcm(alaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int) -> bytes:
# Convert a-law to PCM
in_pcm_bytes = audioop.alaw2lin(alaw_bytes, 2)
@@ -103,6 +104,7 @@ def alaw_to_pcm(alaw_bytes: bytes, in_sample_rate: int, out_sample_rate: int) ->
return out_pcm_bytes
def pcm_to_alaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
# Resample
in_pcm_bytes = resample_audio(pcm_bytes, in_sample_rate, out_sample_rate)
@@ -110,4 +112,4 @@ def pcm_to_alaw(pcm_bytes: bytes, in_sample_rate: int, out_sample_rate: int):
# Convert PCM to μ-law
alaw_bytes = audioop.lin2alaw(in_pcm_bytes, 2)
return alaw_bytes
return alaw_bytes

View File

@@ -28,9 +28,10 @@ class TelnyxFrameSerializer(FrameSerializer):
inbound_encoding: str = "PCMU"
outbound_encoding: str = "PCMU"
def __init__(self, stream_id: str, encoding: str, params: InputParams = InputParams()):
def __init__(self, stream_id: str, outbound_encoding: str, inbound_encoding: str, params: InputParams = InputParams()):
self._stream_id = stream_id
params.inbound_encoding = encoding
params.outbound_encoding = outbound_encoding
params.inbound_encoding = inbound_encoding
self._params = params
@property
@@ -41,10 +42,14 @@ class TelnyxFrameSerializer(FrameSerializer):
if isinstance(frame, AudioRawFrame):
data = frame.audio
if self._params.outbound_encoding == "PCMU":
serialized_data = pcm_to_ulaw(data, frame.sample_rate, self._params.telnyx_sample_rate)
elif self._params.outbound_encoding == "PCMA":
serialized_data = pcm_to_alaw(data, frame.sample_rate, self._params.telnyx_sample_rate)
if self._params.inbound_encoding == "PCMU":
serialized_data = pcm_to_ulaw(
data, frame.sample_rate, self._params.telnyx_sample_rate
)
elif self._params.inbound_encoding == "PCMA":
serialized_data = pcm_to_alaw(
data, frame.sample_rate, self._params.telnyx_sample_rate
)
else:
raise ValueError(f"Unsupported encoding: {self._params.encoding}")
@@ -67,11 +72,11 @@ class TelnyxFrameSerializer(FrameSerializer):
payload_base64 = message["media"]["payload"]
payload = base64.b64decode(payload_base64)
if self._params.inbound_encoding == "PCMU":
if self._params.outbound_encoding == "PCMU":
deserialized_data = ulaw_to_pcm(
payload, self._params.telnyx_sample_rate, self._params.sample_rate
)
elif self._params.inbound_encoding == "PCMA":
elif self._params.outbound_encoding == "PCMA":
deserialized_data = alaw_to_pcm(
payload, self._params.telnyx_sample_rate, self._params.sample_rate
)