update CHANGELOG for 0.0.17

update linux-py3.10-requirements.txt
Merge pull request #150 from pipecat-ai/khk-gemini
2024-05-19 19:27:20 -07:00 · 2024-05-19 19:27:04 -07:00 · 2024-05-20 10:24:31 +08:00 · 2024-05-19 12:44:45 -07:00 · 2024-05-19 12:33:57 -07:00 · 2024-05-19 11:13:39 -07:00
310 changed files with 17806 additions and 3293 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,30 @@
+# flyctl launch added from .gitignore
+**/.vscode
+**/env
+**/__pycache__
+**/*~
+**/venv
+#*#
+
+# Distribution / packaging
+**/.Python
+**/build
+**/develop-eggs
+**/dist
+**/downloads
+**/eggs
+**/.eggs
+**/lib
+**/lib64
+**/parts
+**/sdist
+**/var
+**/wheels
+**/share/python-wheels
+**/*.egg-info
+**/.installed.cfg
+**/*.egg
+**/MANIFEST
+**/.DS_Store
+**/.env
+fly.toml
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -0,0 +1,44 @@
+name: build
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - "**"
+    paths-ignore:
+      - "docs/**"
+
+concurrency:
+  group: build-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    name: "Build and Install"
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        id: setup_python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Setup virtual environment
+        run: |
+          python -m venv .venv
+      - name: Install basic Python dependencies
+        run: |
+          source .venv/bin/activate
+          python -m pip install --upgrade pip
+          pip install -r dev-requirements.txt
+      - name: Build project
+        run: |
+          source .venv/bin/activate
+          python -m build
+      - name: Install project and other Python dependencies
+        run: |
+          source .venv/bin/activate
+          pip install --editable .
--- a/.github/workflows/lint.yaml
+++ b/.github/workflows/lint.yaml
@@ -0,0 +1,44 @@
+name: lint
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - "**"
+    paths-ignore:
+      - "docs/**"
+
+concurrency:
+  group: build-lint-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  autopep8:
+    name: "Formatting lints"
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Setup virtual environment
+        run: |
+          python -m venv .venv
+      - name: Install development Python dependencies
+        run: |
+          source .venv/bin/activate
+          python -m pip install --upgrade pip
+          pip install -r dev-requirements.txt
+      - name: autopep8
+        id: autopep8
+        run: |
+          source .venv/bin/activate
+          autopep8 --max-line-length 100 --exit-code -r -d --exclude "*_pb2.py" -a -a src/
+      - name: Fail if autopep8 requires changes
+        if: steps.autopep8.outputs.exit-code == 2
+        run: exit 1
--- a/.github/workflows/publish.yaml
+++ b/.github/workflows/publish.yaml
@@ -0,0 +1,84 @@
+name: publish
+
+on:
+  workflow_dispatch:
+    inputs:
+      gitref:
+        type: string
+        description: "what git ref to build"
+        required: true
+
+jobs:
+  build:
+    name: "Build and upload wheels"
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.gitref }}
+      - name: Set up Python
+        id: setup_python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Setup virtual environment
+        run: |
+          python -m venv .venv
+      - name: Install basic Python dependencies
+        run: |
+          source .venv/bin/activate
+          python -m pip install --upgrade pip
+          pip install -r dev-requirements.txt
+      - name: Build project
+        run: |
+          source .venv/bin/activate
+          python -m build
+      - name: Upload wheels
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels
+          path: ./dist
+
+  publish-to-pypi:
+    name: "Publish to PyPI"
+    runs-on: ubuntu-latest
+    needs: [ build ]
+    environment:
+      name: pypi
+      url: https://pypi.org/p/pipecat-ai
+    permissions:
+      id-token: write
+    steps:
+      - name: Download wheels
+        uses: actions/download-artifact@v4
+        with:
+          name: wheels
+          path: ./dist
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          verbose: true
+          print-hash: true
+
+  publish-to-test-pypi:
+    name: "Publish to Test PyPI"
+    runs-on: ubuntu-latest
+    needs: [ build ]
+    environment:
+      name: testpypi
+      url: https://pypi.org/p/pipecat-ai
+    permissions:
+      id-token: write
+    steps:
+      - name: Download wheels
+        uses: actions/download-artifact@v4
+        with:
+          name: wheels
+          path: ./dist
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          verbose: true
+          print-hash: true
+          repository-url: https://test.pypi.org/legacy/
--- a/.github/workflows/publish_test.yaml
+++ b/.github/workflows/publish_test.yaml
@@ -0,0 +1,63 @@
+name: publish-test
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+
+jobs:
+  build:
+    name: "Build and upload wheels"
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.inputs.gitref }}
+          fetch-tags: true
+          fetch-depth: 100
+      - name: Set up Python
+        id: setup_python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Setup virtual environment
+        run: |
+          python -m venv .venv
+      - name: Install basic Python dependencies
+        run: |
+          source .venv/bin/activate
+          python -m pip install --upgrade pip
+          pip install -r dev-requirements.txt
+      - name: Build project
+        run: |
+          source .venv/bin/activate
+          python -m build
+      - name: Upload wheels
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels
+          path: ./dist
+
+  publish-to-pypi:
+    name: "Publish to Test PyPI"
+    runs-on: ubuntu-latest
+    needs: [ build ]
+    environment:
+      name: testpypi
+      url: https://pypi.org/p/pipecat-ai
+    permissions:
+      id-token: write
+    steps:
+      - name: Download wheels
+        uses: actions/download-artifact@v4
+        with:
+          name: wheels
+          path: ./dist
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          verbose: true
+          print-hash: true
+          repository-url: https://test.pypi.org/legacy/
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -0,0 +1,49 @@
+name: test
+
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - "**"
+    paths-ignore:
+      - "docs/**"
+
+concurrency:
+  group: build-test-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: "Unit and Integration Tests"
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        id: setup_python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - name: Cache virtual environment
+        uses: actions/cache@v3
+        with:
+          # We are hashing requirements-dev.txt and requirements-extra.txt which
+          # contain all dependencies needed to run the tests and examples.
+          key: venv-${{ runner.os }}-${{ steps.setup_python.outputs.python-version}}-${{ hashFiles('linux-py3.10-requirements.txt') }}-${{ hashFiles('dev-requirements.txt') }}
+          path: .venv
+      - name: Install system packages
+        run: sudo apt-get install -y portaudio19-dev
+      - name: Setup virtual environment
+        run: |
+          python -m venv .venv
+      - name: Install basic Python dependencies
+        run: |
+          source .venv/bin/activate
+          python -m pip install --upgrade pip
+          pip install -r linux-py3.10-requirements.txt -r dev-requirements.txt
+      - name: Test with pytest
+        run: |
+          source .venv/bin/activate
+          pytest --doctest-modules --ignore-glob="*to_be_updated*" src tests
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@
 env/
 __pycache__/
 *~
+venv
+.venv
 #*#

 # Distribution / packaging
@@ -25,3 +27,4 @@ share/python-wheels/
 MANIFEST
 .DS_Store
 .env
+fly.toml
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,271 @@
+# Changelog
+
+All notable changes to **pipecat** will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.0.17] - 2024-05-19
+
+### Added
+
+- Added `google.generativeai` model support, including vision. This new `google`
+  service defaults to using `gemini-1.5-flash-latest`. Example in
+  `examples/foundational/12a-describe-video-gemini-flash.py`.
+
+- Added vision support to `openai` service. Example in
+  `examples/foundational/12a-describe-video-gemini-flash.py`.
+
+- Added initial interruptions support. The assistant contexts (or aggregators)
+  should now be placed after the output transport. This way, only the completed
+  spoken context is added to the assistant context.
+
+- Added `VADParams` so you can control voice confidence level and others.
+
+- `VADAnalyzer` now uses an exponential smoothed volume to improve speech
+  detection. This is useful when voice confidence is high (because there's
+  someone talking near you) but volume is low.
+
+### Fixed
+
+- Fixed an issue where TTSService was not pushing TextFrames downstream.
+
+- Fixed issues with Ctrl-C program termination.
+
+- Fixed an issue that was causing `StopTaskFrame` to actually not exit the
+  `PipelineTask`.
+
+## [0.0.16] - 2024-05-16
+
+### Fixed
+
+- `DailyTransport`: don't publish camera and audio tracks if not enabled.
+
+- Fixed an issue in `BaseInputTransport` that was causing frames pushed
+  downstream not pushed in the right order.
+
+## [0.0.15] - 2024-05-15
+
+### Fixed
+
+- Quick hot fix for receiving `DailyTransportMessage`.
+
+## [0.0.14] - 2024-05-15
+
+### Added
+
+- Added `DailyTransport` event `on_participant_left`.
+
+- Added support for receiving `DailyTransportMessage`.
+
+### Fixed
+
+- Images are now resized to the size of the output camera. This was causing
+  images not being displayed.
+
+- Fixed an issue in `DailyTransport` that would not allow the input processor to
+  shutdown if no participant ever joined the room.
+
+- Fixed base transports start and stop. In some situation processors would halt
+  or not shutdown properly.
+
+## [0.0.13] - 2024-05-14
+
+### Changed
+
+- `MoondreamService` argument `model_id` is now `model`.
+
+- `VADAnalyzer` arguments have been renamed for more clarity.
+
+### Fixed
+
+- Fixed an issue with `DailyInputTransport` and `DailyOutputTransport` that
+  could cause some threads to not start properly.
+
+- Fixed `STTService`. Add `max_silence_secs` and `max_buffer_secs` to handle
+  better what's being passed to the STT service. Also add exponential smoothing
+  to the RMS.
+
+- Fixed `WhisperSTTService`. Add `no_speech_prob` to avoid garbage output text.
+
+## [0.0.12] - 2024-05-14
+
+### Added
+
+- Added `DailyTranscriptionSettings` to be able to specify transcription
+  settings much easier (e.g. language).
+
+### Other
+
+- Updated `simple-chatbot` with Spanish.
+
+- Add missing dependencies in some of the examples.
+
+## [0.0.11] - 2024-05-13
+
+### Added
+
+- Allow stopping pipeline tasks with new `StopTaskFrame`.
+
+### Changed
+
+- TTS, STT and image generation service now use `AsyncGenerator`.
+
+### Fixed
+
+- `DailyTransport`: allow registering for participant transcriptions even if
+  input transport is not initialized yet.
+
+### Other
+
+- Updated `storytelling-chatbot`.
+
+## [0.0.10] - 2024-05-13
+
+### Added
+
+- Added Intel GPU support to `MoondreamService`.
+
+- Added support for sending transport messages (e.g. to communicate with an app
+  at the other end of the transport).
+
+- Added `FrameProcessor.push_error()` to easily send an `ErrorFrame` upstream.
+
+### Fixed
+
+- Fixed Azure services (TTS and image generation).
+
+### Other
+
+- Updated `simple-chatbot`, `moondream-chatbot` and `translation-chatbot`
+  examples.
+
+## [0.0.9] - 2024-05-12
+
+### Changed
+
+Many things have changed in this version. Many of the main ideas such as frames,
+processors, services and transports are still there but some things have changed
+a bit.
+
+- `Frame`s describe the basic units for processing. For example, text, image or
+  audio frames. Or control frames to indicate a user has started or stopped
+  speaking.
+
+- `FrameProcessor`s process frames (e.g. they convert a `TextFrame` to an
+  `ImageRawFrame`) and push new frames downstream or upstream to their linked
+  peers.
+
+- `FrameProcessor`s can be linked together. The easiest wait is to use the
+  `Pipeline` which is a container for processors. Linking processors allow
+  frames to travel upstream or downstream easily.
+
+- `Transport`s are a way to send or receive frames. There can be local
+  transports (e.g. local audio or native apps), network transports
+  (e.g. websocket) or service transports (e.g. https://daily.co).
+
+- `Pipeline`s are just a processor container for other processors.
+
+- A `PipelineTask` know how to run a pipeline.
+
+- A `PipelineRunner` can run one or more tasks and it is also used, for example,
+  to capture Ctrl-C from the user.
+
+## [0.0.8] - 2024-04-11
+
+### Added
+
+- Added `FireworksLLMService`.
+
+- Added `InterimTranscriptionFrame` and enable interim results in
+  `DailyTransport` transcriptions.
+
+### Changed
+
+- `FalImageGenService` now uses new `fal_client` package.
+
+### Fixed
+
+- `FalImageGenService`: use `asyncio.to_thread` to not block main loop when
+  generating images.
+
+- Allow `TranscriptionFrame` after an end frame (transcriptions can be delayed
+  and received after `UserStoppedSpeakingFrame`).
+
+## [0.0.7] - 2024-04-10
+
+### Added
+
+- Add `use_cpu` argument to `MoondreamService`.
+
+## [0.0.6] - 2024-04-10
+
+### Added
+
+- Added `FalImageGenService.InputParams`.
+
+- Added `URLImageFrame` and `UserImageFrame`.
+
+- Added `UserImageRequestFrame` and allow requesting an image from a participant.
+
+- Added base `VisionService` and `MoondreamService`
+
+### Changed
+
+- Don't pass `image_size` to `ImageGenService`, images should have their own size.
+
+- `ImageFrame` now receives a tuple`(width,height)` to specify the size.
+
+- `on_first_other_participant_joined` now gets a participant argument.
+
+### Fixed
+
+- Check if camera, speaker and microphone are enabled before writing to them.
+
+### Performance
+
+- `DailyTransport` only subscribe to desired participant video track.
+
+## [0.0.5] - 2024-04-06
+
+### Changed
+
+- Use `camera_bitrate` and `camera_framerate`.
+
+- Increase `camera_framerate` to 30 by default.
+
+### Fixed
+
+- Fixed `LocalTransport.read_audio_frames`.
+
+## [0.0.4] - 2024-04-04
+
+### Added
+
+- Added project optional dependencies `[silero,openai,...]`.
+
+### Changed
+
+- Moved thransports to its own directory.
+
+- Use `OPENAI_API_KEY` instead of `OPENAI_CHATGPT_API_KEY`.
+
+### Fixed
+
+- Don't write to microphone/speaker if not enabled.
+
+### Other
+
+- Added live translation example.
+
+- Fix foundational examples.
+
+## [0.0.3] - 2024-03-13
+
+### Other
+
+- Added `storybot` and `chatbot` examples.
+
+## [0.0.2] - 2024-03-12
+
+Initial public release.
--- a/CHANGELOG.md.template
+++ b/CHANGELOG.md.template
@@ -0,0 +1,62 @@
+# Changelog
+
+All notable changes to the **&lt;project name&gt;** SDK will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+Please make sure to add your changes to the appropriate categories:
+
+## [Unreleased]
+
+### Added
+
+<!-- for new functionality -->
+
+- n/a
+
+### Changed
+
+<!-- for changed functionality -->
+
+- n/a
+
+### Deprecated
+
+<!-- for soon-to-be removed functionality -->
+
+- n/a
+
+### Removed
+
+<!-- for removed functionality -->
+
+- n/a
+
+### Fixed
+
+<!-- for fixed bugs -->
+
+- n/a
+
+### Performance
+
+<!-- for performance-relevant changes -->
+
+- n/a
+
+### Security
+
+<!-- for security-relevant changes -->
+
+- n/a
+
+### Other
+
+<!-- for everything else -->
+
+- n/a
+
+## [0.1.0] - YYYY-MM-DD
+
+Initial release.
--- a/40
+++ b/40
@@ -0,0 +1,40 @@
+# setup
+FROM python:3.11.5
+
+WORKDIR /app
+COPY requirements.txt /app
+COPY *.py /app
+COPY pyproject.toml /app
+
+COPY src/ /app/src/
+COPY examples/ /app/examples/
+
+WORKDIR /app
+RUN ls --recursive /app/
+RUN pip3 install --upgrade -r requirements.txt
+RUN python -m build .
+RUN pip3 install .
+RUN pip3 install gunicorn
+# If running on Ubuntu, Azure TTS requires some extra config
+# https://learn.microsoft.com/en-us/azure/ai-services/speech-service/quickstarts/setup-platform?pivots=programming-language-python&tabs=linux%2Cubuntu%2Cdotnetcli%2Cdotnet%2Cjre%2Cmaven%2Cnodejs%2Cmac%2Cpypi
+
+RUN wget -O - https://www.openssl.org/source/openssl-1.1.1w.tar.gz | tar zxf -
+WORKDIR openssl-1.1.1w
+RUN ./config --prefix=/usr/local
+RUN make -j $(nproc)
+RUN make install_sw install_ssldirs
+RUN ldconfig -v
+ENV SSL_CERT_DIR=/etc/ssl/certs
+
+#ENV LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
+RUN apt clean
+RUN apt-get update
+RUN apt-get -y install build-essential libssl-dev ca-certificates libasound2 wget
+
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /app
+
+EXPOSE 8000
+# run
+CMD ["gunicorn", "--workers=2", "--log-level", "debug", "--chdir", "examples/server", "--capture-output", "daily-bot-manager:app", "--bind=0.0.0.0:8000"]
--- a/24
+++ b/24
@@ -0,0 +1,24 @@
+BSD 2-Clause License
+
+Copyright (c) 2024, Kwindla Hultman Kramer
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/README.md
+++ b/README.md
@@ -1,55 +1,221 @@
-# dailyai SDK
+<div align="center">
+ <img alt="pipecat" width="300px" height="auto" src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/pipecat.png">
+</div>

-This SDK can help you build applications that participate in WebRTC meetings and use various AI services to interact with other participants.
+# Pipecat

-## Build/Install
+[![PyPI](https://img.shields.io/pypi/v/pipecat-ai)](https://pypi.org/project/pipecat-ai) [![Discord](https://img.shields.io/discord/1239284677165056021
+)](https://discord.gg/pipecat)
+
+`pipecat` is a framework for building voice (and multimodal) conversational agents. Things like personal coaches, meeting assistants, [story-telling toys for kids](https://storytelling-chatbot.fly.dev/), customer support bots, [intake flows](https://www.youtube.com/watch?v=lDevgsp9vn0), and snarky social companions.
+
+Take a look at some example apps:
+
+<p float="left">
+    <a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/simple-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/simple-chatbot/image.png" width="280" /></a>&nbsp;
+    <a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/storytelling-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/storytelling-chatbot/image.png" width="280" /></a>
+    <br/>
+    <a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/translation-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/translation-chatbot/image.png" width="280" /></a>&nbsp;
+    <a href="https://github.com/pipecat-ai/pipecat/tree/main/examples/moondream-chatbot"><img src="https://raw.githubusercontent.com/pipecat-ai/pipecat/main/examples/moondream-chatbot/image.png" width="280" /></a>
+</p>
+
+## Getting started with voice agents
+
+You can get started with Pipecat running on your local machine, then move your agent processes to the cloud when you’re ready. You can also add a 📞 telephone number, 🖼️ image output, 📺 video input, use different LLMs, and more.
+
+```shell
+# install the module
+pip install pipecat-ai
+
+# set up an .env file with API keys
+cp dot-env.template .env
+```
+
+By default, in order to minimize dependencies, only the basic framework functionality is available. Some third-party AI services require additional dependencies that you can install with:
+
+```shell
+pip install "pipecat-ai[option,...]"
+```
+
+Your project may or may not need these, so they're made available as optional requirements. Here is a list:
+
+- **AI services**: `anthropic`, `azure`, `deepgram`, `google`, `fal`, `moondream`, `openai`, `playht`, `silero`, `whisper`
+- **Transports**: `local`, `websocket`, `daily`
+
+## Code examples
+
+- [foundational](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational) — small snippets that build on each other, introducing one or two concepts at a time
+- [example apps](https://github.com/pipecat-ai/pipecat/tree/main/examples/) — complete applications that you can use as starting points for development
+
+## A simple voice agent running locally
+
+Here is a very basic Pipecat bot that greets a user when they join a real-time session. We'll use [Daily](https://daily.co) for real-time media transport, and [ElevenLabs](https://elevenlabs.io/) for text-to-speech.
+
+```python
+#app.py
+
+import asyncio
+import aiohttp
+
+from pipecat.frames.frames import EndFrame, TextFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.task import PipelineTask
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+
+async def main():
+  async with aiohttp.ClientSession() as session:
+    # Use Daily as a real-time media transport (WebRTC)
+    transport = DailyTransport(
+      room_url=...,
+      token=...,
+      "Bot Name",
+      DailyParams(audio_out_enabled=True))
+
+    # Use Eleven Labs for Text-to-Speech
+    tts = ElevenLabsTTSService(
+      aiohttp_session=session,
+      api_key=...,
+      voice_id=...,
+      )
+
+    # Simple pipeline that will process text to speech and output the result
+    pipeline = Pipeline([tts, transport.output()])
+
+    # Create Pipecat processor that can run one or more pipelines tasks
+    runner = PipelineRunner()
+
+    # Assign the task callable to run the pipeline
+    task = PipelineTask(pipeline)
+
+    # Register an event handler to play audio when a
+    # participant joins the transport WebRTC session
+    @transport.event_handler("on_participant_joined")
+    async def on_new_participant_joined(transport, participant):
+      participant_name = participant["info"]["userName"] or ''
+      # Queue a TextFrame that will get spoken by the TTS service (Eleven Labs)
+      await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
+
+    # Run the pipeline task
+    await runner.run(task)
+
+if __name__ == "__main__":
+  asyncio.run(main())
+```
+
+Run it with:
+
+```shell
+python app.py
+```
+
+Daily provides a prebuilt WebRTC user interface. Whilst the app is running, you can visit at `https://<yourdomain>.daily.co/<room_url>` and listen to the bot say hello!
+
+
+## WebRTC for production use
+
+WebSockets are fine for server-to-server communication or for initial development. But for production use, you’ll need client-server audio to use a protocol designed for real-time media transport. (For an explanation of the difference between WebSockets and WebRTC, see [this post.](https://www.daily.co/blog/how-to-talk-to-an-llm-with-your-voice/#webrtc))
+
+One way to get up and running quickly with WebRTC is to sign up for a Daily developer account. Daily gives you SDKs and global infrastructure for audio (and video) routing. Every account gets 10,000 audio/video/transcription minutes free each month.
+
+Sign up [here](https://dashboard.daily.co/u/signup) and [create a room](https://docs.daily.co/reference/rest-api/rooms) in the developer Dashboard.
+
+## What is VAD?
+
+Voice Activity Detection &mdash; very important for knowing when a user has finished speaking to your bot. If you are not using press-to-talk, and want Pipecat to detect when the user has finished talking, VAD is an essential component for a natural feeling conversation.
+
+Pipecast makes use of WebRTC VAD by default when using a WebRTC transport layer. Optionally, you can use Silero VAD for improved accuracy at the cost of higher CPU usage.
+
+```shell
+pip install pipecat-ai[silero]
+```
+
+The first time your run your bot with Silero, startup may take a while whilst it downloads and caches the model in the background. You can check the progress of this in the console.
+
+
+## Hacking on the framework itself

 _Note that you may need to set up a virtual environment before following the instructions below. For instance, you might need to run the following from the root of the repo:_

-```
-python3 -m venv env
-source env/bin/activate
+```shell
+python3 -m venv venv
+source venv/bin/activate
 ```

 From the root of this repo, run the following:

-```
-pip install -r requirements.txt
+```shell
+pip install -r dev-requirements.txt -r {env}-requirements.txt
 python -m build
 ```

 This builds the package. To use the package locally (eg to run sample files), run

-```
+```shell
 pip install --editable .
 ```

 If you want to use this package from another directory, you can run:

-```
+```shell
 pip install path_to_this_repo
 ```

-## Running the samples
+### Running tests

-Tou can run the simple sample like so:
+From the root directory, run:

-```
-python src/samples/theoretical-to-real/01-say-one-thing.py -u <url of your Daily meeting> -k <your Daily API Key>
+```shell
+pytest --doctest-modules --ignore-glob="*to_be_updated*" src tests
 ```

-Note that the sample uses Azure's TTS and LLM services. You'll need to set the following environment variables for the sample to work:
+## Setting up your editor

-```
-AZURE_SPEECH_SERVICE_KEY
-AZURE_SPEECH_SERVICE_REGION
-AZURE_CHATGPT_KEY
-AZURE_CHATGPT_ENDPOINT
-AZURE_CHATGPT_DEPLOYMENT_ID
+This project uses strict [PEP 8](https://peps.python.org/pep-0008/) formatting.
+
+### Emacs
+
+You can use [use-package](https://github.com/jwiegley/use-package) to install [py-autopep8](https://codeberg.org/ideasman42/emacs-py-autopep8) package and configure `autopep8` arguments:
+
+```elisp
+(use-package py-autopep8
+  :ensure t
+  :defer t
+  :hook ((python-mode . py-autopep8-mode))
+  :config
+  (setq py-autopep8-options '("-a" "-a", "--max-line-length=100")))
 ```

-If you have those environment variables stored in an .env file, you can quickly load them into your terminal's environment by running this:
+`autopep8` was installed in the `venv` environment described before, so you should be able to use [pyvenv-auto](https://github.com/ryotaro612/pyvenv-auto) to automatically load that environment inside Emacs.
+
+```elisp
+(use-package pyvenv-auto
+  :ensure t
+  :defer t
+  :hook ((python-mode . pyvenv-auto-run)))

-```bash
-export $(grep -v '^#' .env | xargs)
 ```
+
+### Visual Studio Code
+
+Install the
+[autopep8](https://marketplace.visualstudio.com/items?itemName=ms-python.autopep8) extension. Then edit the user settings (_Ctrl-Shift-P_ `Open User Settings (JSON)`) and set it as the default Python formatter, enable formatting on save and configure `autopep8` arguments:
+
+```json
+"[python]": {
+    "editor.defaultFormatter": "ms-python.autopep8",
+    "editor.formatOnSave": true
+},
+"autopep8.args": [
+    "-a",
+    "-a",
+    "--max-line-length=100"
+],
+```
+
+## Getting help
+
+➡️ [Join our Discord](https://discord.gg/pipecat)
+
+➡️ [Reach us on X](https://x.com/pipecat_ai)
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -0,0 +1,6 @@
+autopep8~=2.1.0
+build~=1.2.1
+pip-tools~=7.4.1
+pytest~=8.2.0
+setuptools~=69.5.1
+setuptools_scm~=8.1.0
--- a/docs/README.md
+++ b/docs/README.md
@@ -0,0 +1,10 @@
+# Pipecat Docs
+
+## [Architecture Overview](architecture.md)
+
+Learn about the thinking behind the framework's design.
+
+## [A Frame's Progress](frame-progress.md)
+
+See how a Frame is processed through a Transport, a Pipeline, and a series of Frame Processors.
+
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -0,0 +1,17 @@
+# Pipecat architecture guide
+
+## Frames
+
+Frames can represent discrete chunks of data, for instance a chunk of text, a chunk of audio, or an image. They can also be used to as control flow, for instance a frame that indicates that there is no more data available, or that a user started or stopped talking. They can also represent more complex data structures, such as a message array used for an LLM completion.
+
+## FrameProcessors
+
+Frame processors operate on frames. Every frame processor implements a `process_frame` method that consumes one frame and produces zero or more frames. Frame processors can do simple transforms, such as concatenating text fragments into sentences, or they can treat frames as input for an AI Service, and emit chat completions based on message arrays or transform text into audio or images.
+
+## Pipelines
+
+Pipelines are lists of frame processors linked together. Frame processors can push frames upstream or downstream to their peers. A very simple pipeline might chain an LLM frame processor to a text-to-speech frame processor, with a transport as an output.
+
+## Transports
+
+Transports provide input and output frame processors to receive or send frames respectively. For example, the `DailyTransport` does this with a WebRTC session joined to a Daily.co room.
--- a/docs/frame-progress.md
+++ b/docs/frame-progress.md
@@ -0,0 +1,46 @@
+# A Frame's Progress
+
+1. A user says “Hello, LLM” and the cloud transcription service delivers a transcription to the Transport.
+![A transcript frame arrives](images/frame-progress-01.png)
+
+2. The Transport places a Transcription frame in the Pipeline’s source queue.
+![Frame in source queue](images/frame-progress-02.png)
+
+3. The Pipeline passes the Transcription frame to the first Frame Processor in its list, the LLM User Message Aggregator.
+![To UMA](images/frame-progress-03.png)
+
+4. The LLM User Message Aggregator updates the LLM Context with a `{“user”: “Hello LLM”}` message.
+![Update context](images/frame-progress-04.png)
+
+5. The LLM User Message Aggregator yields an LLM Message Frame, containing the updated LLM Context. The Pipeline passes this frame to the LLM Frame Processor.
+![Update context](images/frame-progress-05.png)
+
+6. The LLM Frame Processor creates a streaming chat completion based on the LLM context and yields the first chunk of a response, Text Frame with the value “Hi, “. The Pipeline passes this frame to the TTS Frame Processor. The TTS Frame Processor aggregates this response but doesn’t yield anything, yet, because it’s waiting for a full sentence.
+![LLM yields Text](images/frame-progress-06.png)
+
+7. The LLM Frame Processor yields another Text Frame with the value “there.”. The Pipeline passes this frame to the TTS Frame Processor.
+![LLM yields more Text](images/frame-progress-07.png)
+
+8. The TTS Frame Processor now has a full sentence, so it starts streaming audio based on “Hi, there.” It yields the first chunk of streaming audio as an Audio frame, which the Pipeline passes to the LLM Assistant Message Aggregator.
+![TTS yields Audio](images/frame-progress-08.png)
+
+9. The LLM Assistant Message Aggregator doesn’t do anything with Audio frames, so it immediately yields the frame, unchanged. This is the convention for all Frame Processors: frames that the processor doesn’t process should be immediately yielded.
+![pass-through](images/frame-progress-09.png)
+
+10. The Pipeline places the first Audio frame in its sink queue, which is being watched by the Transport. Since the frame is now in a queue, the Pipeline can continue processing other frames. Note that the source and sink queues form a sort of “boundary of concurrent processing” between a Pipeline and the outside world. In a Pipeline, Frames are processed sequentially; once a Frame is on a queue it can be processed in parallel with the frames being processed by the Pipeline. TODO: link to a more in-depth section about this.
+![sink queue](images/frame-progress-10.png)
+
+11. The TTS Frame Processor yields another Audio frame as the Transport transmits the first Audio frame.
+![parallel audio](images/frame-progress-11.png)
+
+12. As before, the LLM Assistant Message Aggregator immediately yields the Audio frame and the Pipeline places the Audio frame in the sink queue.
+![sink queue 2](images/frame-progress-12.png)
+
+13. The TTS Frame Processor has no more frames to yield. The LLM Frame Processor emits an LLM Response End Frame, which the Pipeline passes to the TTS Frame Processor.
+![response end](images/frame-progress-13.png)
+
+14. The TTS Frame Processor immediately yields the LLM Response End Frame, so the Pipeline passes it along to the LLM Assistant Message Aggregator. The LLM Assistant Message Aggregator updates the LLM Context with the full response from the LLM. TODO TODO: I realized I forgot that the TSS Frame Processor also yields the Text frames that the LLM emitted so that the LLM Assistant Message Aggregator could accumulate them, arrggh.
+![response end](images/frame-progress-14.png)
+
+15. The system is quiet, and waiting for the next message from the Transport.
+![response end](images/frame-progress-15.png)
--- a/docs/images/frame-progress-01.png
+++ b/docs/images/frame-progress-01.png
--- a/docs/images/frame-progress-02.png
+++ b/docs/images/frame-progress-02.png
--- a/docs/images/frame-progress-03.png
+++ b/docs/images/frame-progress-03.png
--- a/docs/images/frame-progress-04.png
+++ b/docs/images/frame-progress-04.png
--- a/docs/images/frame-progress-05.png
+++ b/docs/images/frame-progress-05.png
--- a/docs/images/frame-progress-06.png
+++ b/docs/images/frame-progress-06.png
--- a/docs/images/frame-progress-07.png
+++ b/docs/images/frame-progress-07.png
--- a/docs/images/frame-progress-08.png
+++ b/docs/images/frame-progress-08.png
--- a/docs/images/frame-progress-09.png
+++ b/docs/images/frame-progress-09.png
--- a/docs/images/frame-progress-10.png
+++ b/docs/images/frame-progress-10.png
--- a/docs/images/frame-progress-11.png
+++ b/docs/images/frame-progress-11.png
--- a/docs/images/frame-progress-12.png
+++ b/docs/images/frame-progress-12.png
--- a/docs/images/frame-progress-13.png
+++ b/docs/images/frame-progress-13.png
--- a/docs/images/frame-progress-14.png
+++ b/docs/images/frame-progress-14.png
--- a/docs/images/frame-progress-15.png
+++ b/docs/images/frame-progress-15.png
--- a/dot-env.template
+++ b/dot-env.template
@@ -0,0 +1,35 @@
+# Anthropic
+ANTHROPIC_API_KEY=...
+
+# Azure
+AZURE_SPEECH_REGION=...
+AZURE_SPEECH_API_KEY=...
+
+AZURE_CHATGPT_API_KEY=...
+AZURE_CHATGPT_ENDPOINT=https://...
+AZURE_CHATGPT_MODEL=...
+
+AZURE_DALLE_API_KEY=...
+AZURE_DALLE_ENDPOINT=https://...
+AZURE_DALLE_MODEL=...
+
+# Daily
+DAILY_API_KEY=...
+DAILY_SAMPLE_ROOM_URL=https://...
+
+# ElevenLabs
+ELEVENLABS_API_KEY=...
+ELEVENLABS_VOICE_ID=...
+
+# Fal
+FAL_KEY=...
+
+# Fireworks
+FIREWORKS_API_KEY=...
+
+# PlayHT
+PLAY_HT_USER_ID=...
+PLAY_HT_API_KEY=...
+
+# OpenAI
+OPENAI_API_KEY=...
--- a/examples/README.md
+++ b/examples/README.md
@@ -0,0 +1,84 @@
+
+
+# Pipecat &mdash; Examples
+
+## Foundational snippets
+Small snippets that build on each other, introducing one or two concepts at a time.
+
+➡️ [Take a look](https://github.com/pipecat-ai/pipecat/tree/main/examples/foundational)
+
+## Chatbot examples
+Collection of self-contained real-time voice and video AI demo applications built with Pipecat.
+
+### Quickstart
+
+Each project has its own set of dependencies and configuration variables. They intentionally avoids shared code across projects &mdash; you can grab whichever demo folder you want to work with as a starting point.
+
+We recommend you start with a virtual environment:
+
+```shell
+cd pipecat-ai/examples/simple-chatbot
+
+python -m venv venv
+
+source venv/bin/activate
+
+pip install -r requirements.txt
+```
+
+Next, follow the steps in the README for each demo.
+
+ℹ️ Make sure you `pip install -r requirements.txt` for each demo project, so you can be sure to have the necessary service dependencies that extend the functionality of Pipecat. You can read more about the framework architecture [here](https://github.com/pipecat-ai/pipecat/tree/main/docs).
+
+## Projects:
+
+| Project                                      | Description                                                                                                                                | Services                                       |
+| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------- |
+| [Simple Chatbot](simple-chatbot)             | Basic voice-driven conversational bot. A good starting point for learning the flow of the framework.                                       | Deepgram, OpenAI, Daily, Daily Prebuilt UI            |
+| [Storytelling Chatbot](storytelling-chatbot) | Stitches together multiple third-party services to create a collaborative storytime experience.                                            | Deepgram, ElevenLabs, Open AI, Fal, Daily, Custom UI  |
+| [Translation Chatbot](translation-chatbot)   | Listens for user speech, then translates that speech to Spanish and speaks the translation back. Demonstrates multi-participant use-cases. | Deepgram, Azure, OpenAI, Daily, Daily Prebuilt UI     |
+| [Moondream Chatbot](moondream-chatbot)       | Demonstrates how to add vision capabilities to GPT4. **Note: works best with a GPU**                                                       | Deepgram, OpenAI, Moondream, Daily, Daily Prebuilt UI |
+| Function-calling Chatbot (TBC)               | A chatbot that can call functions in response to user input                                                                                | Deepgram, OpenAI, Fireworks, Daily, Daily Prebuilt UI |
+
+> [!IMPORTANT]
+> These example projects use Daily as a WebRTC transport and can be joined using their hosted Prebuilt UI.
+> It provides a quick way to join a real-time session with your bot and test your ideas without building any frontend code. If you'd like to see an example of a custom UI, try Storybot.
+
+
+## FAQ
+
+### Deployment
+
+For each of these demos we've included a `Dockerfile`. Out of the box, this should provide everything needed to get the respective demo running on a VM:
+
+```shell
+docker build username/app:tag .
+
+docker run -p 7860:7860 --env-file ./.env username/app:tag
+
+docker push ...
+```
+
+### SSL
+
+If you're working with a custom UI (such as with the Storytelling Chatbot), it's important to ensure your deployment platform supports HTTPS, as accessing user devices such as mics and webcams requires SSL.
+
+If you try to run a custom UI without SSL, you may see an error in the console telling you that `navigator` is undefined, or no devices are available.
+
+### Are these examples production ready?
+
+Yes, kind of.
+
+These demos attempt to keep things simple and are unopinionated regarding environment or scalability.
+
+We're using FastAPI to spawn a subprocess for the bots / agents &mdash; useful for small tests, but not so great for production grade apps with many concurrent users. You can see how this works in each project's `start` endpoint in `server.py`.
+
+Creating virtualized worker pools and on-demand instances is out of scope for these examples, but we hope to add some examples to this repo soon!
+
+For projects that have CUDA as a requirement, such as Moondream Chatbot, be sure to deploy to a GPU-powered platform (such as [fly.io](https://fly.io) or [Runpod](https://runpod.io).)
+
+## Getting help
+
+➡️ [Join our Discord](https://discord.gg/pipecat)
+
+➡️ [Reach us on Twitter](https://x.com/pipecat_ai)
--- a/examples/foundational/01-say-one-thing.py
+++ b/examples/foundational/01-say-one-thing.py
@@ -0,0 +1,56 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+from pipecat.frames.frames import EndFrame, TextFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.task import PipelineTask
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main(room_url):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url, None, "Say One Thing", DailyParams(audio_out_enabled=True))
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        runner = PipelineRunner()
+
+        task = PipelineTask(Pipeline([tts, transport.output()]))
+
+        # Register an event handler so we can play the audio when the
+        # participant joins.
+        @transport.event_handler("on_participant_joined")
+        async def on_new_participant_joined(transport, participant):
+            participant_name = participant["info"]["userName"] or ''
+            await task.queue_frames([TextFrame(f"Hello there, {participant_name}!"), EndFrame()])
+
+        await runner.run(task)
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url))
--- a/examples/foundational/01a-local-audio.py
+++ b/examples/foundational/01a-local-audio.py
@@ -0,0 +1,53 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+from pipecat.frames.frames import EndFrame, TextFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.transports.base_transport import TransportParams
+from pipecat.transports.local.audio import LocalAudioTransport
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main():
+    async with aiohttp.ClientSession() as session:
+        transport = LocalAudioTransport(TransportParams(audio_out_enabled=True))
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        pipeline = Pipeline([tts, transport.output()])
+
+        task = PipelineTask(pipeline)
+
+        async def say_something():
+            await asyncio.sleep(1)
+            await task.queue_frames([TextFrame("Hello there!"), EndFrame()])
+
+        runner = PipelineRunner()
+
+        await asyncio.gather(runner.run(task), say_something())
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/examples/foundational/02-llm-say-one-thing.py
+++ b/examples/foundational/02-llm-say-one-thing.py
@@ -0,0 +1,68 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+from pipecat.frames.frames import EndFrame, LLMMessagesFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.services.openai import OpenAILLMService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main(room_url):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            None,
+            "Say One Thing From an LLM",
+            DailyParams(audio_out_enabled=True))
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            model="gpt-4-turbo-preview")
+
+        messages = [
+            {
+                "role": "system",
+                "content": "You are an LLM in a WebRTC session, and this is a 'hello world' demo. Say hello to the world.",
+            }]
+
+        runner = PipelineRunner()
+
+        task = PipelineTask(Pipeline([llm, tts, transport.output()]))
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            await task.queue_frames([LLMMessagesFrame(messages), EndFrame()])
+
+        await runner.run(task)
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url))
--- a/examples/foundational/03-still-frame.py
+++ b/examples/foundational/03-still-frame.py
@@ -0,0 +1,68 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+from pipecat.frames.frames import TextFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.services.fal import FalImageGenService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main(room_url):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            None,
+            "Show a still frame image",
+            DailyParams(
+                camera_out_enabled=True,
+                camera_out_width=1024,
+                camera_out_height=1024
+            )
+        )
+
+        imagegen = FalImageGenService(
+            params=FalImageGenService.InputParams(
+                image_size="square_hd"
+            ),
+            aiohttp_session=session,
+            key=os.getenv("FAL_KEY"),
+        )
+
+        runner = PipelineRunner()
+
+        task = PipelineTask(Pipeline([imagegen, transport.output()]))
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            # Note that we do not put an EndFrame() item in the pipeline for this demo.
+            # This means that the bot will stay in the channel until it times out.
+            # An EndFrame() in the pipeline would cause the transport to shut
+            # down.
+            await task.queue_frames([TextFrame("a cat in the style of picasso")])
+
+        await runner.run(task)
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url))
--- a/examples/foundational/03a-local-still-frame.py
+++ b/examples/foundational/03a-local-still-frame.py
@@ -0,0 +1,68 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+import tkinter as tk
+
+from pipecat.frames.frames import TextFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.services.fal import FalImageGenService
+from pipecat.transports.base_transport import TransportParams
+from pipecat.transports.local.tk import TkLocalTransport
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main():
+    async with aiohttp.ClientSession() as session:
+        tk_root = tk.Tk()
+        tk_root.title("Picasso Cat")
+
+        transport = TkLocalTransport(
+            tk_root,
+            TransportParams(
+                camera_out_enabled=True,
+                camera_out_width=1024,
+                camera_out_height=1024))
+
+        imagegen = FalImageGenService(
+            params=FalImageGenService.InputParams(
+                image_size="square_hd"
+            ),
+            aiohttp_session=session,
+            key=os.getenv("FAL_KEY"),
+        )
+
+        pipeline = Pipeline([imagegen, transport.output()])
+
+        task = PipelineTask(pipeline)
+        await task.queue_frames([TextFrame("a cat in the style of picasso")])
+
+        runner = PipelineRunner()
+
+        async def run_tk():
+            while runner.is_active():
+                tk_root.update()
+                tk_root.update_idletasks()
+                await asyncio.sleep(0.1)
+
+        await asyncio.gather(runner.run(task), run_tk())
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/examples/foundational/04-utterance-and-speech.py
+++ b/examples/foundational/04-utterance-and-speech.py
@@ -0,0 +1,86 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import aiohttp
+import asyncio
+import os
+import sys
+
+from pipecat.pipeline.merge_pipeline import SequentialMergePipeline
+from pipecat.pipeline.pipeline import Pipeline
+
+from pipecat.frames.frames import EndPipeFrame, LLMMessagesFrame, TextFrame
+from pipecat.pipeline.task import PipelineTask
+from pipecat.services.azure import AzureLLMService, AzureTTSService
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.services.transport_services import TransportServiceOutput
+from pipecat.services.transports.daily_transport import DailyTransport
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main(room_url: str):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(room_url, None, "Static And Dynamic Speech")
+
+        meeting = TransportServiceOutput(transport, mic_enabled=True)
+
+        llm = AzureLLMService(
+            api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
+            endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
+            model=os.getenv("AZURE_CHATGPT_MODEL"),
+        )
+        azure_tts = AzureTTSService(
+            api_key=os.getenv("AZURE_SPEECH_API_KEY"),
+            region=os.getenv("AZURE_SPEECH_REGION"),
+        )
+
+        elevenlabs_tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        messages = [{"role": "system",
+                     "content": "tell the user a joke about llamas"}]
+
+        # Start a task to run the LLM to create a joke, and convert the LLM
+        # output to audio frames. This task will run in parallel with generating
+        # and speaking the audio for static text, so there's no delay to speak
+        # the LLM response.
+        llm_pipeline = Pipeline([llm, elevenlabs_tts])
+        llm_task = PipelineTask(llm_pipeline)
+        await llm_task.queue_frames([LLMMessagesFrame(messages), EndPipeFrame()])
+
+        simple_tts_pipeline = Pipeline([azure_tts])
+        await simple_tts_pipeline.queue_frames(
+            [
+                TextFrame("My friend the LLM is going to tell a joke about llamas."),
+                EndPipeFrame(),
+            ]
+        )
+
+        merge_pipeline = SequentialMergePipeline(
+            [simple_tts_pipeline, llm_pipeline])
+
+        await asyncio.gather(
+            transport.run(merge_pipeline),
+            simple_tts_pipeline.run_pipeline(),
+            llm_pipeline.run_pipeline(),
+        )
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url))
--- a/examples/foundational/05-sync-speech-and-image.py
+++ b/examples/foundational/05-sync-speech-and-image.py
@@ -0,0 +1,164 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+from dataclasses import dataclass
+
+from pipecat.frames.frames import (
+    AppFrame,
+    EndFrame,
+    Frame,
+    ImageRawFrame,
+    LLMFullResponseStartFrame,
+    LLMMessagesFrame,
+    TextFrame
+)
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.processors.aggregators.gated import GatedAggregator
+from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
+from pipecat.processors.aggregators.sentence import SentenceAggregator
+from pipecat.processors.aggregators.parallel_task import ParallelTask
+from pipecat.services.openai import OpenAILLMService
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.services.fal import FalImageGenService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+@dataclass
+class MonthFrame(AppFrame):
+    month: str
+
+    def __str__(self):
+        return f"{self.name}(month: {self.month})"
+
+
+class MonthPrepender(FrameProcessor):
+    def __init__(self):
+        super().__init__()
+        self.most_recent_month = "Placeholder, month frame not yet received"
+        self.prepend_to_next_text_frame = False
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if isinstance(frame, MonthFrame):
+            self.most_recent_month = frame.month
+        elif self.prepend_to_next_text_frame and isinstance(frame, TextFrame):
+            await self.push_frame(TextFrame(f"{self.most_recent_month}: {frame.text}"))
+            self.prepend_to_next_text_frame = False
+        elif isinstance(frame, LLMFullResponseStartFrame):
+            self.prepend_to_next_text_frame = True
+            await self.push_frame(frame)
+        else:
+            await self.push_frame(frame, direction)
+
+
+async def main(room_url):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            None,
+            "Month Narration Bot",
+            DailyParams(
+                audio_out_enabled=True,
+                camera_out_enabled=True,
+                camera_out_width=1024,
+                camera_out_height=1024
+            )
+        )
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            model="gpt-4-turbo-preview")
+
+        imagegen = FalImageGenService(
+            params=FalImageGenService.InputParams(
+                image_size="square_hd"
+            ),
+            aiohttp_session=session,
+            key=os.getenv("FAL_KEY"),
+        )
+
+        gated_aggregator = GatedAggregator(
+            gate_open_fn=lambda frame: isinstance(frame, ImageRawFrame),
+            gate_close_fn=lambda frame: isinstance(frame, LLMFullResponseStartFrame),
+            start_open=False
+        )
+
+        sentence_aggregator = SentenceAggregator()
+        month_prepender = MonthPrepender()
+        llm_full_response_aggregator = LLMFullResponseAggregator()
+
+        pipeline = Pipeline([
+            llm,                     # LLM
+            sentence_aggregator,     # Aggregates LLM output into full sentences
+            ParallelTask(            # Run pipelines in parallel aggregating the result
+                [month_prepender, tts],                   # Create "Month: sentence" and output audio
+                [llm_full_response_aggregator, imagegen]  # Aggregate full LLM response
+            ),
+            gated_aggregator,        # Queues everything until an image is available
+            transport.output()       # Transport output
+        ])
+
+        frames = []
+        for month in [
+            "January",
+            "February",
+            "March",
+            "April",
+            "May",
+            "June",
+            "July",
+            "August",
+            "September",
+            "October",
+            "November",
+            "December",
+        ]:
+            messages = [
+                {
+                    "role": "system",
+                    "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.",
+                }
+            ]
+            frames.append(MonthFrame(month=month))
+            frames.append(LLMMessagesFrame(messages))
+
+        frames.append(EndFrame())
+
+        runner = PipelineRunner()
+
+        task = PipelineTask(pipeline)
+
+        await task.queue_frames(frames)
+
+        await runner.run(task)
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url))
--- a/examples/foundational/05a-local-sync-speech-and-image.py
+++ b/examples/foundational/05a-local-sync-speech-and-image.py
@@ -0,0 +1,168 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import aiohttp
+import asyncio
+import os
+import sys
+
+import tkinter as tk
+
+from pipecat.frames.frames import AudioRawFrame, Frame, URLImageRawFrame, LLMMessagesFrame, TextFrame
+from pipecat.pipeline.parallel_pipeline import ParallelPipeline
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.llm_response import LLMFullResponseAggregator
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.services.openai import OpenAILLMService
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.services.fal import FalImageGenService
+from pipecat.transports.base_transport import TransportParams
+from pipecat.transports.local.tk import TkLocalTransport
+
+from loguru import logger
+
+from dotenv import load_dotenv
+
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main():
+    async with aiohttp.ClientSession() as session:
+        tk_root = tk.Tk()
+        tk_root.title("Calendar")
+
+        runner = PipelineRunner()
+
+        async def get_month_data(month):
+            messages = [{"role": "system", "content": f"Describe a nature photograph suitable for use in a calendar, for the month of {month}. Include only the image description with no preamble. Limit the description to one sentence, please.", }]
+
+            class ImageDescription(FrameProcessor):
+                def __init__(self):
+                    super().__init__()
+                    self.text = ""
+
+                async def process_frame(self, frame: Frame, direction: FrameDirection):
+                    if isinstance(frame, TextFrame):
+                        self.text = frame.text
+                    await self.push_frame(frame, direction)
+
+            class AudioGrabber(FrameProcessor):
+                def __init__(self):
+                    super().__init__()
+                    self.audio = bytearray()
+
+                async def process_frame(self, frame: Frame, direction: FrameDirection):
+                    if isinstance(frame, AudioRawFrame):
+                        self.audio.extend(frame.audio)
+                        self.frame = AudioRawFrame(
+                            bytes(self.audio), frame.sample_rate, frame.num_channels)
+
+            class ImageGrabber(FrameProcessor):
+                def __init__(self):
+                    super().__init__()
+                    self.frame = None
+
+                async def process_frame(self, frame: Frame, direction: FrameDirection):
+                    if isinstance(frame, URLImageRawFrame):
+                        self.frame = frame
+
+            llm = OpenAILLMService(
+                api_key=os.getenv("OPENAI_API_KEY"),
+                model="gpt-4-turbo-preview")
+
+            tts = ElevenLabsTTSService(
+                aiohttp_session=session,
+                api_key=os.getenv("ELEVENLABS_API_KEY"),
+                voice_id=os.getenv("ELEVENLABS_VOICE_ID"))
+
+            imagegen = FalImageGenService(
+                params=FalImageGenService.InputParams(
+                    image_size="square_hd"
+                ),
+                aiohttp_session=session,
+                key=os.getenv("FAL_KEY"))
+
+            aggregator = LLMFullResponseAggregator()
+
+            description = ImageDescription()
+
+            audio_grabber = AudioGrabber()
+
+            image_grabber = ImageGrabber()
+
+            pipeline = Pipeline([
+                llm,
+                aggregator,
+                description,
+                ParallelPipeline([tts, audio_grabber],
+                                 [imagegen, image_grabber])
+            ])
+
+            task = PipelineTask(pipeline)
+            await task.queue_frame(LLMMessagesFrame(messages))
+            await task.stop_when_done()
+
+            await runner.run(task)
+
+            return {
+                "month": month,
+                "text": description.text,
+                "image": image_grabber.frame,
+                "audio": audio_grabber.frame,
+            }
+
+        transport = TkLocalTransport(
+            tk_root,
+            TransportParams(
+                audio_out_enabled=True,
+                camera_out_enabled=True,
+                camera_out_width=1024,
+                camera_out_height=1024))
+
+        pipeline = Pipeline([transport.output()])
+
+        task = PipelineTask(pipeline)
+
+        # We only specify 5 months as we create tasks all at once and we might
+        # get rate limited otherwise.
+        months: list[str] = [
+            "January",
+            "February",
+            # "March",
+            # "April",
+            # "May",
+        ]
+
+        # We create one task per month. This will be executed concurrently.
+        month_tasks = [asyncio.create_task(get_month_data(month)) for month in months]
+
+        # Now we wait for each month task in the order they're completed. The
+        # benefit is we'll have as little delay as possible before the first
+        # month, and likely no delay between months, but the months won't
+        # display in order.
+        async def show_images(month_tasks):
+            for month_data_task in asyncio.as_completed(month_tasks):
+                data = await month_data_task
+                await task.queue_frames([data["image"], data["audio"]])
+
+            await runner.stop_when_done()
+
+        async def run_tk():
+            while True:
+                tk_root.update()
+                tk_root.update_idletasks()
+                await asyncio.sleep(0.1)
+
+        await asyncio.gather(runner.run(task), show_images(month_tasks), run_tk())
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/examples/foundational/06-listen-and-respond.py
+++ b/examples/foundational/06-listen-and-respond.py
@@ -0,0 +1,101 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+from pipecat.frames.frames import LLMMessagesFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.llm_response import (
+    LLMAssistantResponseAggregator,
+    LLMUserResponseAggregator,
+)
+from pipecat.processors.logger import FrameLogger
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.services.openai import OpenAILLMService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+from pipecat.vad.silero import SileroVADAnalyzer
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main(room_url: str, token):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            token,
+            "Respond bot",
+            DailyParams(
+                audio_out_enabled=True,
+                transcription_enabled=True,
+                vad_enabled=True,
+                vad_analyzer=SileroVADAnalyzer()
+            )
+        )
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            model="gpt-4-turbo-preview")
+
+        fl_in = FrameLogger("Inner")
+        fl_out = FrameLogger("Outer")
+
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+            },
+        ]
+        tma_in = LLMUserResponseAggregator(messages)
+        tma_out = LLMAssistantResponseAggregator(messages)
+
+        pipeline = Pipeline([
+            fl_in,
+            transport.input(),
+            tma_in,
+            llm,
+            fl_out,
+            tts,
+            transport.output(),
+            tma_out
+        ])
+
+        task = PipelineTask(pipeline)
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            transport.capture_participant_transcription(participant["id"])
+            # Kick off the conversation.
+            messages.append(
+                {"role": "system", "content": "Please introduce yourself to the user."})
+            await task.queue_frames([LLMMessagesFrame(messages)])
+
+        runner = PipelineRunner()
+
+        await runner.run(task)
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url, token))
--- a/examples/foundational/06a-image-sync.py
+++ b/examples/foundational/06a-image-sync.py
@@ -0,0 +1,123 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+from PIL import Image
+
+from pipecat.frames.frames import ImageRawFrame, Frame, SystemFrame, TextFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.llm_context import (
+    LLMAssistantContextAggregator,
+    LLMUserContextAggregator,
+)
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.services.openai import OpenAILLMService
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.transports.services.daily import DailyTransport
+
+from pipecat.transports.services.daily import DailyParams
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+class ImageSyncAggregator(FrameProcessor):
+    def __init__(self, speaking_path: str, waiting_path: str):
+        super().__init__()
+        self._speaking_image = Image.open(speaking_path)
+        self._speaking_image_format = self._speaking_image.format
+        self._speaking_image_bytes = self._speaking_image.tobytes()
+
+        self._waiting_image = Image.open(waiting_path)
+        self._waiting_image_format = self._waiting_image.format
+        self._waiting_image_bytes = self._waiting_image.tobytes()
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if not isinstance(frame, SystemFrame):
+            await self.push_frame(ImageRawFrame(image=self._speaking_image_bytes, size=(1024, 1024), format=self._speaking_image_format))
+            await self.push_frame(frame)
+            await self.push_frame(ImageRawFrame(image=self._waiting_image_bytes, size=(1024, 1024), format=self._waiting_image_format))
+        else:
+            await self.push_frame(frame)
+
+
+async def main(room_url: str, token):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            token,
+            "Respond bot",
+            DailyParams(
+                audio_out_enabled=True,
+                camera_out_width=1024,
+                camera_out_height=1024,
+                transcription_enabled=True
+            )
+        )
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            model="gpt-4-turbo-preview")
+
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+            },
+        ]
+
+        tma_in = LLMUserContextAggregator(messages)
+        tma_out = LLMAssistantContextAggregator(messages)
+
+        image_sync_aggregator = ImageSyncAggregator(
+            os.path.join(os.path.dirname(__file__), "assets", "speaking.png"),
+            os.path.join(os.path.dirname(__file__), "assets", "waiting.png"),
+        )
+
+        pipeline = Pipeline([
+            transport.input(),
+            image_sync_aggregator,
+            tma_in,
+            llm,
+            tts,
+            transport.output(),
+            tma_out
+        ])
+
+        task = PipelineTask(pipeline)
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            participant_name = participant["info"]["userName"] or ''
+            transport.capture_participant_transcription(participant["id"])
+            await task.queue_frames([TextFrame(f"Hi, this is {participant_name}.")])
+
+        runner = PipelineRunner()
+
+        await runner.run(task)
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url, token))
--- a/examples/foundational/07-interruptible.py
+++ b/examples/foundational/07-interruptible.py
@@ -0,0 +1,94 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+from pipecat.frames.frames import LLMMessagesFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.llm_response import (
+    LLMAssistantResponseAggregator, LLMUserResponseAggregator)
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.services.openai import OpenAILLMService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+from pipecat.vad.silero import SileroVADAnalyzer
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main(room_url: str, token):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            token,
+            "Respond bot",
+            DailyParams(
+                audio_out_enabled=True,
+                transcription_enabled=True,
+                vad_enabled=True,
+                vad_analyzer=SileroVADAnalyzer()
+            )
+        )
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            model="gpt-4-turbo-preview")
+
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
+            },
+        ]
+
+        tma_in = LLMUserResponseAggregator(messages)
+        tma_out = LLMAssistantResponseAggregator(messages)
+
+        pipeline = Pipeline([
+            transport.input(),   # Transport user input
+            tma_in,              # User responses
+            llm,                 # LLM
+            tts,                 # TTS
+            transport.output(),  # Transport bot output
+            tma_out              # Assistant spoken responses
+        ])
+
+        task = PipelineTask(pipeline, allow_interruptions=True)
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            transport.capture_participant_transcription(participant["id"])
+            # Kick off the conversation.
+            messages.append(
+                {"role": "system", "content": "Please introduce yourself to the user."})
+            await task.queue_frames([LLMMessagesFrame(messages)])
+
+        runner = PipelineRunner()
+
+        await runner.run(task)
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url, token))
--- a/examples/foundational/08-bots-arguing.py
+++ b/examples/foundational/08-bots-arguing.py
@@ -0,0 +1,148 @@
+from typing import Tuple
+import aiohttp
+import asyncio
+import logging
+import os
+from pipecat.pipeline.aggregators import SentenceAggregator
+from pipecat.pipeline.pipeline import Pipeline
+
+from pipecat.transports.daily_transport import DailyTransport
+from pipecat.services.azure_ai_services import AzureLLMService, AzureTTSService
+from pipecat.services.elevenlabs_ai_services import ElevenLabsTTSService
+from pipecat.services.fal_ai_services import FalImageGenService
+from pipecat.pipeline.frames import AudioFrame, EndFrame, ImageFrame, LLMMessagesFrame, TextFrame
+
+from runner import configure
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logging.basicConfig(format=f"%(levelno)s %(asctime)s %(message)s")
+logger = logging.getLogger("pipecat")
+logger.setLevel(logging.DEBUG)
+
+
+async def main(room_url: str):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            None,
+            "Respond bot",
+            duration_minutes=10,
+            mic_enabled=True,
+            mic_sample_rate=16000,
+            camera_enabled=True,
+            camera_width=1024,
+            camera_height=1024,
+        )
+
+        llm = AzureLLMService(
+            api_key=os.getenv("AZURE_CHATGPT_API_KEY"),
+            endpoint=os.getenv("AZURE_CHATGPT_ENDPOINT"),
+            model=os.getenv("AZURE_CHATGPT_MODEL"),
+        )
+        tts1 = AzureTTSService(
+            api_key=os.getenv("AZURE_SPEECH_API_KEY"),
+            region=os.getenv("AZURE_SPEECH_REGION"),
+        )
+        tts2 = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id="jBpfuIE2acCO8z3wKNLl",
+        )
+        dalle = FalImageGenService(
+            params=FalImageGenService.InputParams(
+                image_size="1024x1024"
+            ),
+            aiohttp_session=session,
+            key=os.getenv("FAL_KEY"),
+        )
+
+        bot1_messages = [
+            {
+                "role": "system",
+                "content": "You are a stern librarian. You strongly believe that a hot dog is a sandwich. Start by stating this fact in a few sentences, then be prepared to debate this with the user. You shouldn't ever compromise on the fundamental truth that a hot dog is a sandwich. Your responses should only be a few sentences long.",
+            },
+        ]
+        bot2_messages = [
+            {
+                "role": "system",
+                "content": "You are a silly cat, and you strongly believe that a hot dog is not a sandwich. Debate this with the user, only responding with a few sentences. Don't ever accept that a hot dog is a sandwich.",
+            },
+        ]
+
+        async def get_text_and_audio(messages) -> Tuple[str, bytearray]:
+            """This function streams text from the LLM and uses the TTS service to convert
+             that text to speech as it's received. """
+            source_queue = asyncio.Queue()
+            sink_queue = asyncio.Queue()
+            sentence_aggregator = SentenceAggregator()
+            pipeline = Pipeline(
+                [llm, sentence_aggregator, tts1], source_queue, sink_queue
+            )
+
+            await source_queue.put(LLMMessagesFrame(messages))
+            await source_queue.put(EndFrame())
+            await pipeline.run_pipeline()
+
+            message = ""
+            all_audio = bytearray()
+            while sink_queue.qsize():
+                frame = sink_queue.get_nowait()
+                if isinstance(frame, TextFrame):
+                    message += frame.text
+                elif isinstance(frame, AudioFrame):
+                    all_audio.extend(frame.audio)
+
+            return (message, all_audio)
+
+        async def get_bot1_statement():
+            message, audio = await get_text_and_audio(bot1_messages)
+
+            bot1_messages.append({"role": "assistant", "content": message})
+            bot2_messages.append({"role": "user", "content": message})
+
+            return audio
+
+        async def get_bot2_statement():
+            message, audio = await get_text_and_audio(bot2_messages)
+
+            bot2_messages.append({"role": "assistant", "content": message})
+            bot1_messages.append({"role": "user", "content": message})
+
+            return audio
+
+        async def argue():
+            for i in range(100):
+                print(f"In iteration {i}")
+
+                bot1_description = "A woman conservatively dressed as a librarian in a library surrounded by books, cartoon, serious, highly detailed"
+
+                (audio1, image_data1) = await asyncio.gather(
+                    get_bot1_statement(), dalle.run_image_gen(bot1_description)
+                )
+                await transport.send_queue.put(
+                    [
+                        ImageFrame(image_data1[1], image_data1[2]),
+                        AudioFrame(audio1),
+                    ]
+                )
+
+                bot2_description = "A cat dressed in a hot dog costume, cartoon, bright colors, funny, highly detailed"
+
+                (audio2, image_data2) = await asyncio.gather(
+                    get_bot2_statement(), dalle.run_image_gen(bot2_description)
+                )
+                await transport.send_queue.put(
+                    [
+                        ImageFrame(image_data2[1], image_data2[2]),
+                        AudioFrame(audio2),
+                    ]
+                )
+
+        await asyncio.gather(transport.run(), argue())
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url))
--- a/examples/foundational/09-mirror.py
+++ b/examples/foundational/09-mirror.py
@@ -0,0 +1,53 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import sys
+
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.transports.services.daily import DailyTransport, DailyParams
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main(room_url, token):
+    transport = DailyTransport(
+        room_url, token, "Test",
+        DailyParams(
+            audio_in_enabled=True,
+            audio_out_enabled=True,
+            camera_out_enabled=True,
+            camera_out_width=1280,
+            camera_out_height=720
+        )
+    )
+
+    @transport.event_handler("on_first_participant_joined")
+    async def on_first_participant_joined(transport, participant):
+        transport.capture_participant_video(participant["id"])
+
+    pipeline = Pipeline([transport.input(), transport.output()])
+
+    runner = PipelineRunner()
+
+    task = PipelineTask(pipeline)
+
+    await runner.run(task)
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url, token))
--- a/examples/foundational/09a-local-mirror.py
+++ b/examples/foundational/09a-local-mirror.py
@@ -0,0 +1,65 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import sys
+
+import tkinter as tk
+
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.transports.base_transport import TransportParams
+from pipecat.transports.local.tk import TkLocalTransport
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+async def main(room_url, token):
+    tk_root = tk.Tk()
+    tk_root.title("Local Mirror")
+
+    daily_transport = DailyTransport(room_url, token, "Test", DailyParams(audio_in_enabled=True))
+
+    tk_transport = TkLocalTransport(
+        tk_root,
+        TransportParams(
+            audio_out_enabled=True,
+            camera_out_enabled=True,
+            camera_out_width=1280,
+            camera_out_height=720))
+
+    @daily_transport.event_handler("on_first_participant_joined")
+    async def on_first_participant_joined(transport, participant):
+        transport.capture_participant_video(participant["id"])
+
+    pipeline = Pipeline([daily_transport.input(), tk_transport.output()])
+
+    runner = PipelineRunner()
+
+    async def run_tk():
+        while runner.is_active():
+            tk_root.update()
+            tk_root.update_idletasks()
+            await asyncio.sleep(0.1)
+
+    task = PipelineTask(pipeline)
+
+    await asyncio.gather(runner.run(task), run_tk())
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url, token))
--- a/examples/foundational/10-wake-word.py
+++ b/examples/foundational/10-wake-word.py
@@ -0,0 +1,189 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import random
+import sys
+
+from PIL import Image
+
+from pipecat.frames.frames import (
+    Frame,
+    SystemFrame,
+    TextFrame,
+    ImageRawFrame,
+    SpriteFrame,
+    TranscriptionFrame,
+)
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.llm_context import (
+    LLMUserContextAggregator,
+    LLMAssistantContextAggregator,
+)
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.services.openai import OpenAILLMService
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+sprites = {}
+image_files = [
+    "sc-default.png",
+    "sc-talk.png",
+    "sc-listen-1.png",
+    "sc-think-1.png",
+    "sc-think-2.png",
+    "sc-think-3.png",
+    "sc-think-4.png",
+]
+
+script_dir = os.path.dirname(__file__)
+
+for file in image_files:
+    # Build the full path to the image file
+    full_path = os.path.join(script_dir, "assets", file)
+    # Get the filename without the extension to use as the dictionary key
+    filename = os.path.splitext(os.path.basename(full_path))[0]
+    # Open the image and convert it to bytes
+    with Image.open(full_path) as img:
+        sprites[file] = ImageRawFrame(image=img.tobytes(), size=img.size, format=img.format)
+
+# When the bot isn't talking, show a static image of the cat listening
+quiet_frame = sprites["sc-listen-1.png"]
+
+# When the bot is talking, build an animation from two sprites
+talking_list = [sprites["sc-default.png"], sprites["sc-talk.png"]]
+talking = [random.choice(talking_list) for x in range(30)]
+talking_frame = SpriteFrame(talking)
+
+# TODO: Support "thinking" as soon as we get a valid transcript, while LLM
+# is processing
+thinking_list = [
+    sprites["sc-think-1.png"],
+    sprites["sc-think-2.png"],
+    sprites["sc-think-3.png"],
+    sprites["sc-think-4.png"],
+]
+thinking_frame = SpriteFrame(thinking_list)
+
+
+class NameCheckFilter(FrameProcessor):
+    def __init__(self, names: list[str]):
+        super().__init__()
+        self._names = names
+        self._sentence = ""
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if isinstance(frame, SystemFrame):
+            await self.push_frame(frame, direction)
+            return
+
+        content: str = ""
+
+        # TODO: split up transcription by participant
+        if isinstance(frame, TranscriptionFrame):
+            content = frame.text
+            self._sentence += content
+            if self._sentence.endswith((".", "?", "!")):
+                if any(name in self._sentence for name in self._names):
+                    await self.push_frame(TextFrame(self._sentence))
+                    self._sentence = ""
+                else:
+                    self._sentence = ""
+        else:
+            await self.push_frame(frame, direction)
+
+
+class ImageSyncAggregator(FrameProcessor):
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        await self.push_frame(talking_frame)
+        await self.push_frame(frame)
+        await self.push_frame(quiet_frame)
+
+
+async def main(room_url: str, token):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            token,
+            "Santa Cat",
+            DailyParams(
+                audio_out_enabled=True,
+                camera_out_enabled=True,
+                camera_out_width=720,
+                camera_out_height=1280,
+                camera_out_framerate=10,
+                transcription_enabled=True
+            )
+        )
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            model="gpt-4-turbo-preview")
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id="jBpfuIE2acCO8z3wKNLl",
+        )
+        isa = ImageSyncAggregator()
+
+        messages = [
+            {
+                "role": "system",
+                "content": "You are Santa Cat, a cat that lives in Santa's workshop at the North Pole. You should be clever, and a bit sarcastic. You should also tell jokes every once in a while.  Your responses should only be a few sentences long.",
+            },
+        ]
+
+        tma_in = LLMUserContextAggregator(messages)
+        tma_out = LLMAssistantContextAggregator(messages)
+        ncf = NameCheckFilter(["Santa Cat", "Santa"])
+
+        pipeline = Pipeline([
+            transport.input(),
+            isa,
+            ncf,
+            tma_in,
+            llm,
+            tts,
+            transport.output(),
+            tma_out
+        ])
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            # Send some greeting at the beginning.
+            await tts.say("Hi! If you want to talk to me, just say 'hey Santa Cat'.")
+            transport.capture_participant_transcription(participant["id"])
+
+        async def starting_image():
+            await transport.send_image(quiet_frame)
+
+        runner = PipelineRunner()
+
+        task = PipelineTask(pipeline)
+
+        await asyncio.gather(runner.run(task), starting_image())
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url, token))
--- a/examples/foundational/11-sound-effects.py
+++ b/examples/foundational/11-sound-effects.py
@@ -0,0 +1,142 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import aiohttp
+import asyncio
+import os
+import sys
+import wave
+
+from pipecat.frames.frames import (
+    Frame,
+    AudioRawFrame,
+    LLMFullResponseEndFrame,
+    LLMMessagesFrame,
+)
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.llm_context import (
+    LLMUserContextAggregator,
+    LLMAssistantContextAggregator,
+)
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.processors.logger import FrameLogger
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.services.openai import OpenAILLMService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+sounds = {}
+sound_files = ["ding1.wav", "ding2.wav"]
+
+script_dir = os.path.dirname(__file__)
+
+for file in sound_files:
+    # Build the full path to the image file
+    full_path = os.path.join(script_dir, "assets", file)
+    # Get the filename without the extension to use as the dictionary key
+    filename = os.path.splitext(os.path.basename(full_path))[0]
+    # Open the image and convert it to bytes
+    with wave.open(full_path) as audio_file:
+        sounds[file] = AudioRawFrame(audio_file.readframes(-1),
+                                     audio_file.getframerate(), audio_file.getnchannels())
+
+
+class OutboundSoundEffectWrapper(FrameProcessor):
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if isinstance(frame, LLMFullResponseEndFrame):
+            await self.push_frame(sounds["ding1.wav"])
+            # In case anything else downstream needs it
+            await self.push_frame(frame, direction)
+        else:
+            await self.push_frame(frame, direction)
+
+
+class InboundSoundEffectWrapper(FrameProcessor):
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if isinstance(frame, LLMMessagesFrame):
+            await self.push_frame(sounds["ding2.wav"])
+            # In case anything else downstream needs it
+            await self.push_frame(frame, direction)
+        else:
+            await self.push_frame(frame, direction)
+
+
+async def main(room_url: str, token):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            token,
+            "Respond bot",
+            DailyParams(audio_out_enabled=True, transcription_enabled=True)
+        )
+
+        llm = OpenAILLMService(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            model="gpt-4-turbo-preview")
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id="ErXwobaYiN019PkySvjV",
+        )
+
+        messages = [
+            {
+                "role": "system",
+                "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio. Respond to what the user said in a creative and helpful way.",
+            },
+        ]
+
+        tma_in = LLMUserContextAggregator(messages)
+        tma_out = LLMAssistantContextAggregator(messages)
+        out_sound = OutboundSoundEffectWrapper()
+        in_sound = InboundSoundEffectWrapper()
+        fl = FrameLogger("LLM Out")
+        fl2 = FrameLogger("Transcription In")
+
+        pipeline = Pipeline([
+            transport.input(),
+            tma_in,
+            in_sound,
+            fl2,
+            llm,
+            fl,
+            tts,
+            out_sound,
+            transport.output(),
+            tma_out
+        ])
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            transport.capture_participant_transcription(participant["id"])
+            await tts.say("Hi, I'm listening!")
+            await transport.send_audio(sounds["ding1.wav"])
+
+        runner = PipelineRunner()
+
+        task = PipelineTask(pipeline)
+
+        await runner.run(task)
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url, token))
--- a/examples/foundational/12-describe-video.py
+++ b/examples/foundational/12-describe-video.py
@@ -0,0 +1,110 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.user_response import UserResponseAggregator
+from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.services.moondream import MoondreamService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+from pipecat.vad.silero import SileroVADAnalyzer
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+class UserImageRequester(FrameProcessor):
+
+    def __init__(self, participant_id: str | None = None):
+        super().__init__()
+        self._participant_id = participant_id
+
+    def set_participant_id(self, participant_id: str):
+        self._participant_id = participant_id
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if self._participant_id and isinstance(frame, TextFrame):
+            await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
+        await self.push_frame(frame, direction)
+
+
+async def main(room_url: str, token):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            token,
+            "Describe participant video",
+            DailyParams(
+                audio_out_enabled=True,
+                transcription_enabled=True,
+                vad_enabled=True,
+                vad_analyzer=SileroVADAnalyzer()
+            )
+        )
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        user_response = UserResponseAggregator()
+
+        image_requester = UserImageRequester()
+
+        vision_aggregator = VisionImageFrameAggregator()
+
+        # If you run into weird description, try with use_cpu=True
+        moondream = MoondreamService()
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            await tts.say("Hi there! Feel free to ask me what I see.")
+            transport.capture_participant_video(participant["id"], framerate=0)
+            transport.capture_participant_transcription(participant["id"])
+            image_requester.set_participant_id(participant["id"])
+
+        pipeline = Pipeline([
+            transport.input(),
+            user_response,
+            image_requester,
+            vision_aggregator,
+            moondream,
+            tts,
+            transport.output()
+        ])
+
+        task = PipelineTask(pipeline)
+
+        runner = PipelineRunner()
+
+        await runner.run(task)
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url, token))
--- a/examples/foundational/12a-describe-video-gemini-flash.py
+++ b/examples/foundational/12a-describe-video-gemini-flash.py
@@ -0,0 +1,110 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.user_response import UserResponseAggregator
+from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.services.google import GoogleLLMService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+from pipecat.vad.silero import SileroVADAnalyzer
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+class UserImageRequester(FrameProcessor):
+
+    def __init__(self, participant_id: str | None = None):
+        super().__init__()
+        self._participant_id = participant_id
+
+    def set_participant_id(self, participant_id: str):
+        self._participant_id = participant_id
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if self._participant_id and isinstance(frame, TextFrame):
+            await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
+        await self.push_frame(frame, direction)
+
+
+async def main(room_url: str, token):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            token,
+            "Describe participant video",
+            DailyParams(
+                audio_in_enabled=True,  # This is so Silero VAD can get audio data
+                audio_out_enabled=True,
+                transcription_enabled=True,
+                vad_enabled=True,
+                vad_analyzer=SileroVADAnalyzer()
+            )
+        )
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        user_response = UserResponseAggregator()
+
+        image_requester = UserImageRequester()
+
+        vision_aggregator = VisionImageFrameAggregator()
+
+        google = GoogleLLMService(model="gemini-1.5-flash-latest")
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            await tts.say("Hi there! Feel free to ask me what I see.")
+            transport.capture_participant_video(participant["id"], framerate=0)
+            transport.capture_participant_transcription(participant["id"])
+            image_requester.set_participant_id(participant["id"])
+
+        pipeline = Pipeline([
+            transport.input(),
+            user_response,
+            image_requester,
+            vision_aggregator,
+            google,
+            tts,
+            transport.output()
+        ])
+
+        task = PipelineTask(pipeline)
+
+        runner = PipelineRunner()
+
+        await runner.run(task)
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url, token))
--- a/examples/foundational/12b-describe-video-gpt-4o.py
+++ b/examples/foundational/12b-describe-video-gpt-4o.py
@@ -0,0 +1,112 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import aiohttp
+import os
+import sys
+
+from pipecat.frames.frames import Frame, TextFrame, UserImageRequestFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.aggregators.user_response import UserResponseAggregator
+from pipecat.processors.aggregators.vision_image_frame import VisionImageFrameAggregator
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.services.elevenlabs import ElevenLabsTTSService
+from pipecat.services.openai import OpenAILLMService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+from pipecat.vad.silero import SileroVADAnalyzer
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+class UserImageRequester(FrameProcessor):
+
+    def __init__(self, participant_id: str | None = None):
+        super().__init__()
+        self._participant_id = participant_id
+
+    def set_participant_id(self, participant_id: str):
+        self._participant_id = participant_id
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if self._participant_id and isinstance(frame, TextFrame):
+            await self.push_frame(UserImageRequestFrame(self._participant_id), FrameDirection.UPSTREAM)
+        await self.push_frame(frame, direction)
+
+
+async def main(room_url: str, token):
+    async with aiohttp.ClientSession() as session:
+        transport = DailyTransport(
+            room_url,
+            token,
+            "Describe participant video",
+            DailyParams(
+                audio_out_enabled=True,
+                transcription_enabled=True,
+                vad_enabled=True,
+                vad_analyzer=SileroVADAnalyzer()
+            )
+        )
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        user_response = UserResponseAggregator()
+
+        image_requester = UserImageRequester()
+
+        vision_aggregator = VisionImageFrameAggregator()
+
+        openai = OpenAILLMService(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            model="gpt-4o"
+        )
+
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        @transport.event_handler("on_first_participant_joined")
+        async def on_first_participant_joined(transport, participant):
+            await tts.say("Hi there! Feel free to ask me what I see.")
+            transport.capture_participant_video(participant["id"], framerate=0)
+            transport.capture_participant_transcription(participant["id"])
+            image_requester.set_participant_id(participant["id"])
+
+        pipeline = Pipeline([
+            transport.input(),
+            user_response,
+            image_requester,
+            vision_aggregator,
+            openai,
+            tts,
+            transport.output()
+        ])
+
+        task = PipelineTask(pipeline)
+
+        runner = PipelineRunner()
+
+        await runner.run(task)
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url, token))
--- a/examples/foundational/13-whisper-transcription.py
+++ b/examples/foundational/13-whisper-transcription.py
@@ -0,0 +1,55 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import sys
+
+from pipecat.frames.frames import Frame, TranscriptionFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.services.whisper import WhisperSTTService
+from pipecat.transports.services.daily import DailyParams, DailyTransport
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+class TranscriptionLogger(FrameProcessor):
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if isinstance(frame, TranscriptionFrame):
+            print(f"Transcription: {frame.text}")
+
+
+async def main(room_url: str):
+    transport = DailyTransport(room_url, None, "Transcription bot",
+                               DailyParams(audio_in_enabled=True))
+
+    stt = WhisperSTTService()
+
+    tl = TranscriptionLogger()
+
+    pipeline = Pipeline([transport.input(), stt, tl])
+
+    task = PipelineTask(pipeline)
+
+    runner = PipelineRunner()
+
+    await runner.run(task)
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url))
--- a/examples/foundational/13a-whisper-local.py
+++ b/examples/foundational/13a-whisper-local.py
@@ -0,0 +1,55 @@
+#
+# Copyright (c) 2024, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+
+import asyncio
+import sys
+
+from pipecat.frames.frames import Frame, TranscriptionFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.pipeline.runner import PipelineRunner
+from pipecat.pipeline.task import PipelineTask
+from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
+from pipecat.services.whisper import WhisperSTTService
+from pipecat.transports.base_transport import TransportParams
+from pipecat.transports.local.audio import LocalAudioTransport
+
+from runner import configure
+
+from loguru import logger
+
+from dotenv import load_dotenv
+load_dotenv(override=True)
+
+logger.remove(0)
+logger.add(sys.stderr, level="DEBUG")
+
+
+class TranscriptionLogger(FrameProcessor):
+
+    async def process_frame(self, frame: Frame, direction: FrameDirection):
+        if isinstance(frame, TranscriptionFrame):
+            print(f"Transcription: {frame.text}")
+
+
+async def main(room_url: str):
+    transport = LocalAudioTransport(TransportParams(audio_in_enabled=True))
+
+    stt = WhisperSTTService()
+
+    tl = TranscriptionLogger()
+
+    pipeline = Pipeline([transport.input(), stt, tl])
+
+    task = PipelineTask(pipeline)
+
+    runner = PipelineRunner()
+
+    await runner.run(task)
+
+
+if __name__ == "__main__":
+    (url, token) = configure()
+    asyncio.run(main(url))
--- a/examples/foundational/assets/ding1.wav
+++ b/examples/foundational/assets/ding1.wav
--- a/examples/foundational/assets/ding2.wav
+++ b/examples/foundational/assets/ding2.wav
--- a/src/samples/deprecated/static-sprite/sprites/intro.png
+++ b/src/samples/deprecated/static-sprite/sprites/intro.png
--- a/src/samples/deprecated/static-sprite/sprites/wait.png
+++ b/src/samples/deprecated/static-sprite/sprites/wait.png
--- a/examples/foundational/assets/sc-listen-2.png
+++ b/examples/foundational/assets/sc-listen-2.png
--- a/src/samples/deprecated/static-sprite/sprites/talk-1.png
+++ b/src/samples/deprecated/static-sprite/sprites/talk-1.png
--- a/src/samples/deprecated/static-sprite/sprites/talk-2.png
+++ b/src/samples/deprecated/static-sprite/sprites/talk-2.png
--- a/examples/foundational/assets/sc-think-2.png
+++ b/examples/foundational/assets/sc-think-2.png
--- a/examples/foundational/assets/sc-think-3.png
+++ b/examples/foundational/assets/sc-think-3.png
--- a/examples/foundational/assets/sc-think-4.png
+++ b/examples/foundational/assets/sc-think-4.png
--- a/examples/foundational/assets/speaking.png
+++ b/examples/foundational/assets/speaking.png
--- a/examples/foundational/assets/waiting.png
+++ b/examples/foundational/assets/waiting.png
--- a/examples/foundational/runner.py
+++ b/examples/foundational/runner.py
@@ -0,0 +1,58 @@
+import argparse
+import os
+import time
+import urllib
+import requests
+
+
+def configure():
+    parser = argparse.ArgumentParser(description="Daily AI SDK Bot Sample")
+    parser.add_argument(
+        "-u",
+        "--url",
+        type=str,
+        required=False,
+        help="URL of the Daily room to join")
+    parser.add_argument(
+        "-k",
+        "--apikey",
+        type=str,
+        required=False,
+        help="Daily API Key (needed to create an owner token for the room)",
+    )
+
+    args, unknown = parser.parse_known_args()
+
+    url = args.url or os.getenv("DAILY_SAMPLE_ROOM_URL")
+    key = args.apikey or os.getenv("DAILY_API_KEY")
+
+    if not url:
+        raise Exception(
+            "No Daily room specified. use the -u/--url option from the command line, or set DAILY_SAMPLE_ROOM_URL in your environment to specify a Daily room URL.")
+
+    if not key:
+        raise Exception("No Daily API key specified. use the -k/--apikey option from the command line, or set DAILY_API_KEY in your environment to specify a Daily API key, available from https://dashboard.daily.co/developers.")
+
+    # Create a meeting token for the given room with an expiration 1 hour in
+    # the future.
+    room_name: str = urllib.parse.urlparse(url).path[1:]
+    expiration: float = time.time() + 60 * 60
+
+    res: requests.Response = requests.post(
+        f"https://api.daily.co/v1/meeting-tokens",
+        headers={
+            "Authorization": f"Bearer {key}"},
+        json={
+            "properties": {
+                "room_name": room_name,
+                "is_owner": True,
+                "exp": expiration}},
+    )
+
+    if res.status_code != 200:
+        raise Exception(
+            f"Failed to create meeting token: {res.status_code} {res.text}")
+
+    token: str = res.json()["token"]
+
+    return (url, token)
--- a/examples/foundational/websocket-server/frames.proto
+++ b/examples/foundational/websocket-server/frames.proto
@@ -0,0 +1,25 @@
+syntax = "proto3";
+
+package pipecat_proto;
+
+message TextFrame {
+    string text = 1;
+}
+
+message AudioFrame {
+    bytes audio = 1;
+}
+
+message TranscriptionFrame {
+    string text = 1;
+    string participant_id = 2;
+    string timestamp = 3;
+}
+
+message Frame {
+    oneof frame {
+        TextFrame text = 1;
+        AudioFrame audio = 2;
+        TranscriptionFrame transcription = 3;
+    }
+}
--- a/examples/foundational/websocket-server/index.html
+++ b/examples/foundational/websocket-server/index.html
@@ -0,0 +1,134 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <script src="//cdn.jsdelivr.net/npm/protobufjs@7.X.X/dist/protobuf.min.js"></script>
+    <title>WebSocket Audio Stream</title>
+</head>
+
+<body>
+    <h1>WebSocket Audio Stream</h1>
+    <button id="startAudioBtn">Start Audio</button>
+    <button id="stopAudioBtn">Stop Audio</button>
+    <script>
+        const SAMPLE_RATE = 16000;
+        const BUFFER_SIZE = 8192;
+        const MIN_AUDIO_SIZE = 6400;
+
+        let audioContext;
+        let microphoneStream;
+        let scriptProcessor;
+        let source;
+        let frame;
+        let audioChunks = [];
+        let isPlaying = false;
+        let ws;
+
+        const proto = protobuf.load("frames.proto", (err, root) => {
+            if (err) throw err;
+            frame = root.lookupType("pipecat_proto.Frame");
+        });
+
+        function initWebSocket() {
+            ws = new WebSocket('ws://localhost:8765');
+
+            ws.addEventListener('open', () => console.log('WebSocket connection established.'));
+            ws.addEventListener('message', handleWebSocketMessage);
+            ws.addEventListener('close', (event) => console.log("WebSocket connection closed.", event.code, event.reason));
+            ws.addEventListener('error', (event) => console.error('WebSocket error:', event));
+        }
+
+        async function handleWebSocketMessage(event) {
+            const arrayBuffer = await event.data.arrayBuffer();
+            enqueueAudioFromProto(arrayBuffer);
+        }
+
+        function enqueueAudioFromProto(arrayBuffer) {
+            const parsedFrame = frame.decode(new Uint8Array(arrayBuffer));
+            if (!parsedFrame?.audio) return false;
+
+            const frameCount = parsedFrame.audio.data.length / 2;
+            const audioOutBuffer = audioContext.createBuffer(1, frameCount, SAMPLE_RATE);
+            const nowBuffering = audioOutBuffer.getChannelData(0);
+            const view = new Int16Array(parsedFrame.audio.data.buffer);
+
+            for (let i = 0; i < frameCount; i++) {
+                const word = view[i];
+                nowBuffering[i] = ((word + 32768) % 65536 - 32768) / 32768.0;
+            }
+
+            audioChunks.push(audioOutBuffer);
+            if (!isPlaying) playNextChunk();
+        }
+
+        function playNextChunk() {
+            if (audioChunks.length === 0) {
+                isPlaying = false;
+                return;
+            }
+
+            isPlaying = true;
+            const audioOutBuffer = audioChunks.shift();
+            const source = audioContext.createBufferSource();
+            source.buffer = audioOutBuffer;
+            source.connect(audioContext.destination);
+            source.onended = playNextChunk;
+            source.start();
+        }
+
+        function startAudio() {
+            if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
+                alert('getUserMedia is not supported in your browser.');
+                return;
+            }
+
+            navigator.mediaDevices.getUserMedia({ audio: true })
+                .then((stream) => {
+                    microphoneStream = stream;
+                    audioContext = new (window.AudioContext || window.webkitAudioContext)();
+                    scriptProcessor = audioContext.createScriptProcessor(BUFFER_SIZE, 1, 1);
+                    source = audioContext.createMediaStreamSource(stream);
+                    source.connect(scriptProcessor);
+                    scriptProcessor.connect(audioContext.destination);
+
+                    const audioBuffer = [];
+                    const skipRatio = Math.floor(audioContext.sampleRate / (SAMPLE_RATE * 2));
+
+                    scriptProcessor.onaudioprocess = (event) => {
+                        const rawLeftChannelData = event.inputBuffer.getChannelData(0);
+                        for (let i = 0; i < rawLeftChannelData.length; i += skipRatio) {
+                            const normalized = ((rawLeftChannelData[i] * 32768.0) + 32768) % 65536 - 32768;
+                            const swappedBytes = ((normalized & 0xff) << 8) | ((normalized >> 8) & 0xff);
+                            audioBuffer.push(swappedBytes);
+                        }
+
+                        if (audioBuffer.length >= MIN_AUDIO_SIZE) {
+                            const audioFrame = frame.create({ audio: { audio: audioBuffer.slice(0, MIN_AUDIO_SIZE) } });
+                            const encodedFrame = new Uint8Array(frame.encode(audioFrame).finish());
+                            ws.send(encodedFrame);
+                            audioBuffer.splice(0, MIN_AUDIO_SIZE);
+                        }
+                    };
+
+                    initWebSocket();
+                })
+                .catch((error) => console.error('Error accessing microphone:', error));
+        }
+
+        function stopAudio() {
+            if (ws) {
+                ws.close();
+                scriptProcessor.disconnect();
+                source.disconnect();
+                ws = undefined;
+            }
+        }
+
+        document.getElementById('startAudioBtn').addEventListener('click', startAudio);
+        document.getElementById('stopAudioBtn').addEventListener('click', stopAudio);
+    </script>
+</body>
+
+</html>
--- a/examples/foundational/websocket-server/sample.py
+++ b/examples/foundational/websocket-server/sample.py
@@ -0,0 +1,50 @@
+import asyncio
+import aiohttp
+import logging
+import os
+from pipecat.pipeline.frame_processor import FrameProcessor
+from pipecat.pipeline.frames import TextFrame, TranscriptionFrame
+from pipecat.pipeline.pipeline import Pipeline
+from pipecat.services.elevenlabs_ai_services import ElevenLabsTTSService
+from pipecat.transports.websocket_transport import WebsocketTransport
+from pipecat.services.whisper_ai_services import WhisperSTTService
+
+logging.basicConfig(format="%(levelno)s %(asctime)s %(message)s")
+logger = logging.getLogger("pipecat")
+logger.setLevel(logging.DEBUG)
+
+
+class WhisperTranscriber(FrameProcessor):
+    async def process_frame(self, frame):
+        if isinstance(frame, TranscriptionFrame):
+            print(f"Transcribed: {frame.text}")
+        else:
+            yield frame
+
+
+async def main():
+    async with aiohttp.ClientSession() as session:
+        transport = WebsocketTransport(
+            mic_enabled=True,
+            speaker_enabled=True,
+        )
+        tts = ElevenLabsTTSService(
+            aiohttp_session=session,
+            api_key=os.getenv("ELEVENLABS_API_KEY"),
+            voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
+        )
+
+        pipeline = Pipeline([
+            WhisperSTTService(),
+            WhisperTranscriber(),
+            tts,
+        ])
+
+        @transport.on_connection
+        async def queue_frame():
+            await pipeline.queue_frames([TextFrame("Hello there!")])
+
+        await transport.run(pipeline)
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/examples/moondream-chatbot/.dockerignore
+++ b/examples/moondream-chatbot/.dockerignore
@@ -0,0 +1,163 @@
+# flyctl launch added from .gitignore
+# Byte-compiled / optimized / DLL files
+**/__pycache__
+**/*.py[cod]
+**/*$py.class
+
+# C extensions
+**/*.so
+
+# Distribution / packaging
+**/.Python
+**/build
+**/develop-eggs
+**/dist
+**/downloads
+**/eggs
+**/.eggs
+**/lib
+**/lib64
+**/parts
+**/sdist
+**/var
+**/wheels
+**/share/python-wheels
+**/*.egg-info
+**/.installed.cfg
+**/*.egg
+**/MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+**/*.manifest
+**/*.spec
+
+# Installer logs
+**/pip-log.txt
+**/pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+**/htmlcov
+**/.tox
+**/.nox
+**/.coverage
+**/.coverage.*
+**/.cache
+**/nosetests.xml
+**/coverage.xml
+**/*.cover
+**/*.py,cover
+**/.hypothesis
+**/.pytest_cache
+**/cover
+
+# Translations
+**/*.mo
+**/*.pot
+
+# Django stuff:
+**/*.log
+**/local_settings.py
+**/db.sqlite3
+**/db.sqlite3-journal
+
+# Flask stuff:
+**/instance
+**/.webassets-cache
+
+# Scrapy stuff:
+**/.scrapy
+
+# Sphinx documentation
+**/docs/_build
+
+# PyBuilder
+**/.pybuilder
+**/target
+
+# Jupyter Notebook
+**/.ipynb_checkpoints
+
+# IPython
+**/profile_default
+**/ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+**/.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+**/__pypackages__
+
+# Celery stuff
+**/celerybeat-schedule
+**/celerybeat.pid
+
+# SageMath parsed files
+**/*.sage.py
+
+# Environments
+**/.env
+**/.venv
+**/env
+**/venv
+**/ENV
+**/env.bak
+**/venv.bak
+
+# Spyder project settings
+**/.spyderproject
+**/.spyproject
+
+# Rope project settings
+**/.ropeproject
+
+# mkdocs documentation
+site
+
+# mypy
+**/.mypy_cache
+**/.dmypy.json
+**/dmypy.json
+
+# Pyre type checker
+**/.pyre
+
+# pytype static type analyzer
+**/.pytype
+
+# Cython debug symbols
+**/cython_debug
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+**/runpod.toml
+fly.toml
--- a/examples/moondream-chatbot/.gitignore
+++ b/examples/moondream-chatbot/.gitignore
@@ -0,0 +1,161 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+runpod.toml
--- a/examples/moondream-chatbot/Dockerfile
+++ b/examples/moondream-chatbot/Dockerfile
@@ -0,0 +1,25 @@
+FROM ubuntu:22.04
+
+RUN apt-get update && apt-get install -y wget
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
+RUN dpkg -i cuda-keyring_1.1-1_all.deb
+
+RUN echo "deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" > /etc/apt/sources.list.d/cuda-ubuntu2204-x86_64.list
+
+RUN apt-get update && apt-get install -y python3 python3-pip
+RUN apt-get install -y cuda-nvcc-12-4 libcublas-12-4 libcudnn8
+
+RUN mkdir /app
+RUN mkdir /app/assets
+RUN mkdir /app/utils
+COPY *.py /app/
+COPY requirements.txt /app/
+copy assets/* /app/assets/
+copy utils/* /app/utils/
+
+WORKDIR /app
+RUN pip3 install -r requirements.txt
+
+EXPOSE 7860
+
+CMD ["python3", "server.py"]
--- a/examples/moondream-chatbot/Dockerfile.intel
+++ b/examples/moondream-chatbot/Dockerfile.intel
@@ -0,0 +1,76 @@
+FROM ubuntu:22.04
+
+# environment variables for Intel OneAPI components
+ENV DPCPPROOT=/opt/intel/oneapi/compiler/latest
+ENV MKLROOT=/opt/intel/oneapi/mkl/latest
+ENV CCLROOT=/opt/intel/oneapi/ccl/latest
+ENV MPIROOT=/opt/intel/oneapi/mpi/latest
+
+# Install necessary dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    wget \
+    lsb-release \
+    pciutils \
+    gnupg2 \
+    python3-pip
+
+# Add Intel OneAPI repository and GPG key
+# Intel GPU repository and GPG key
+# Install Intel OneAPI components and source the environment scripts
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
+    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
+    /bin/bash -c ' \
+    . /etc/os-release && \
+    if [[ " jammy " =~ " ${VERSION_CODENAME} " ]]; then \
+        wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
+        echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu ${VERSION_CODENAME}/lts/2350 unified" | \
+        tee /etc/apt/sources.list.d/intel-gpu-${VERSION_CODENAME}.list && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends intel-opencl-icd \
+            intel-level-zero-gpu level-zero intel-media-va-driver-non-free \
+            libmfx1 libmfxgen1 libvpl2 libegl-mesa0 libegl1-mesa \
+            libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
+            libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 \
+            libxatracker2 mesa-va-drivers mesa-vdpau-drivers \
+            mesa-vulkan-drivers va-driver-all; \
+    else \
+        echo "Ubuntu version ${VERSION_CODENAME} not supported. Exiting..."; \
+        exit 1; \
+    fi' && \
+    apt-get update && apt-get install -y --no-install-recommends \
+    intel-oneapi-dpcpp-cpp-2024.1=2024.1.0-963 intel-oneapi-mkl-devel=2024.1.0-691 \
+    intel-oneapi-ccl-devel=2021.12.0-309 && \
+    apt-get clean && rm -rf /var/lib/apt/lists/* && \
+    groupadd -r render && usermod -aG render root && \
+    echo "source ${DPCPPROOT}/env/vars.sh" >> ~/.bashrc && \
+    echo "source ${MKLROOT}/env/vars.sh" >> ~/.bashrc && \
+    echo "source ${CCLROOT}/env/vars.sh" >> ~/.bashrc && \
+    echo "source ${MPIROOT}/env/vars.sh" >> ~/.bashrc && \
+    echo "export LD_LIBRARY_PATH=${MKLROOT}/lib:${DPCPPROOT}/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH" >> ~/.bashrc
+
+WORKDIR /app
+COPY . /app
+RUN mkdir -p /app /app/assets /app/utils
+COPY *.py requirements.txt assets/* utils/* /app/
+
+# Install the Intel-specific versions of torch
+RUN python3 -m pip install --no-cache-dir -r requirements.txt && \
+    pip uninstall -y torch && \
+    pip freeze | grep 'nvidia-' | xargs pip uninstall -y && \
+    pip install --no-cache-dir --force-reinstall torch==2.1.0.post2 torchvision==0.16.0.post2 torchaudio==2.1.0.post2 \
+    intel-extension-for-pytorch==2.1.30+xpu oneccl_bind_pt==2.1.300+xpu \
+    --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/  
+
+RUN echo '#!/bin/bash\n\
+source ${DPCPPROOT}/env/vars.sh\n\
+source ${MKLROOT}/env/vars.sh\n\
+source ${CCLROOT}/env/vars.sh\n\
+source ${MPIROOT}/env/vars.sh\n\
+export LD_LIBRARY_PATH=${MKLROOT}/lib:${DPCPPROOT}/linux/compiler/lib/intel64_lin:$LD_LIBRARY_PATH\n\
+python3 server.py' > /usr/local/bin/run_app.sh && \
+    chmod +x /usr/local/bin/run_app.sh && \
+    find / -type d -name "__pycache__" -exec rm -rf {} + 
+
+EXPOSE 7860
+ENTRYPOINT ["/usr/local/bin/run_app.sh"]
--- a/examples/moondream-chatbot/README.md
+++ b/examples/moondream-chatbot/README.md
@@ -0,0 +1,44 @@
+# Moondream Chatbot
+
+<img src="image.png" width="420px">
+
+
+This app connects you to a chatbot powered by GPT-4, complete with animations generated by Stable Video Diffusion. The chatbot also has vision powers thanks to [Moondream](https://moondream.ai) so you can ask it, for example, "what do you see?".
+
+ℹ️ The first time, things might take some time to get started since VAD (Voice Activity Detection) and vision models need to be downloaded.
+
+## Get started
+
+```python
+python3 -m venv venv
+source venv/bin/activate
+pip install -r requirements.txt
+
+cp env.example .env # and add your credentials
+
+```
+
+## Run the server
+
+```bash
+python server.py
+```
+
+Then, visit `http://localhost:7860/start` in your browser to start a chatbot
+session.
+
+## Build and test the Docker image
+
+```
+docker build -t moonbot .
+docker run --env-file .env -p 7860:7860 moonbot
+```
+
+### For Intel GPUs (Arc, Max and Flex series)
+
+```
+docker build -t moonbot -f Dockerfile.intel .
+docker run --env-file .env -p 7860:7860 --device /dev/dri moonbot
+```
+
+You can try to visit `http://localhost:7860/start` again.
--- a/examples/moondream-chatbot/assets/robot01.png
+++ b/examples/moondream-chatbot/assets/robot01.png
--- a/examples/moondream-chatbot/assets/robot010.png
+++ b/examples/moondream-chatbot/assets/robot010.png
--- a/examples/moondream-chatbot/assets/robot011.png
+++ b/examples/moondream-chatbot/assets/robot011.png
--- a/examples/moondream-chatbot/assets/robot012.png
+++ b/examples/moondream-chatbot/assets/robot012.png
--- a/examples/moondream-chatbot/assets/robot013.png
+++ b/examples/moondream-chatbot/assets/robot013.png
--- a/examples/moondream-chatbot/assets/robot014.png
+++ b/examples/moondream-chatbot/assets/robot014.png
--- a/examples/moondream-chatbot/assets/robot015.png
+++ b/examples/moondream-chatbot/assets/robot015.png
--- a/examples/moondream-chatbot/assets/robot016.png
+++ b/examples/moondream-chatbot/assets/robot016.png
--- a/examples/moondream-chatbot/assets/robot017.png
+++ b/examples/moondream-chatbot/assets/robot017.png
--- a/examples/moondream-chatbot/assets/robot018.png
+++ b/examples/moondream-chatbot/assets/robot018.png
--- a/examples/moondream-chatbot/assets/robot019.png
+++ b/examples/moondream-chatbot/assets/robot019.png
--- a/examples/moondream-chatbot/assets/robot02.png
+++ b/examples/moondream-chatbot/assets/robot02.png
--- a/examples/moondream-chatbot/assets/robot020.png
+++ b/examples/moondream-chatbot/assets/robot020.png
--- a/examples/moondream-chatbot/assets/robot021.png
+++ b/examples/moondream-chatbot/assets/robot021.png
--- a/examples/moondream-chatbot/assets/robot022.png
+++ b/examples/moondream-chatbot/assets/robot022.png
--- a/examples/moondream-chatbot/assets/robot023.png
+++ b/examples/moondream-chatbot/assets/robot023.png
--- a/examples/moondream-chatbot/assets/robot024.png
+++ b/examples/moondream-chatbot/assets/robot024.png
--- a/examples/moondream-chatbot/assets/robot025.png
+++ b/examples/moondream-chatbot/assets/robot025.png
--- a/examples/moondream-chatbot/assets/robot03.png
+++ b/examples/moondream-chatbot/assets/robot03.png
--- a/examples/moondream-chatbot/assets/robot04.png
+++ b/examples/moondream-chatbot/assets/robot04.png
--- a/examples/moondream-chatbot/assets/robot05.png
+++ b/examples/moondream-chatbot/assets/robot05.png
--- a/examples/moondream-chatbot/assets/robot06.png
+++ b/examples/moondream-chatbot/assets/robot06.png
--- a/examples/moondream-chatbot/assets/robot07.png
+++ b/examples/moondream-chatbot/assets/robot07.png
--- a/examples/moondream-chatbot/assets/robot08.png
+++ b/examples/moondream-chatbot/assets/robot08.png
--- a/examples/moondream-chatbot/assets/robot09.png
+++ b/examples/moondream-chatbot/assets/robot09.png
--- a/Show More
+++ b/Show More