Gemini text input works. We translate from OpenAILLMContext format on the fly in the GoogleLLMService implementation. This commit also implements image input (vision) in both the GoogleLLMService and in the OpenAILLMService. Image input is a hack and needs to be revisited. OpenAI expects images to be uploaded as base64-encoded JPEGs. Google does not require the base64 encoding. Other than for images, we use the OpenAI format as our standard, but base64-encoding the images and then unencoding them in the GoogleLLMService feels wasteful.
59 lines
1.7 KiB
TOML
59 lines
1.7 KiB
TOML
[build-system]
|
|
requires = ["setuptools>=64", "setuptools_scm>=8"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "pipecat-ai"
|
|
dynamic = ["version"]
|
|
description = "An open source framework for voice (and multimodal) assistants"
|
|
license = { text = "BSD 2-Clause License" }
|
|
readme = "README.md"
|
|
requires-python = ">=3.7"
|
|
keywords = ["webrtc", "audio", "video", "ai"]
|
|
classifiers = [
|
|
"Development Status :: 5 - Production/Stable",
|
|
"Intended Audience :: Developers",
|
|
"License :: OSI Approved :: BSD License",
|
|
"Topic :: Communications :: Conferencing",
|
|
"Topic :: Multimedia :: Sound/Audio",
|
|
"Topic :: Multimedia :: Video",
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence"
|
|
]
|
|
dependencies = [
|
|
"aiohttp~=3.9.5",
|
|
"numpy~=1.26.4",
|
|
"loguru~=0.7.0",
|
|
"Pillow~=10.3.0",
|
|
"typing-extensions~=4.11.0",
|
|
]
|
|
|
|
[project.urls]
|
|
Source = "https://github.com/pipecat-ai/pipecat"
|
|
Website = "https://pipecat.ai"
|
|
|
|
[project.optional-dependencies]
|
|
anthropic = [ "anthropic~=0.25.7" ]
|
|
azure = [ "azure-cognitiveservices-speech~=1.37.0" ]
|
|
daily = [ "daily-python~=0.7.4" ]
|
|
examples = [ "python-dotenv~=1.0.0", "flask~=3.0.3", "flask_cors~=4.0.1" ]
|
|
fal = [ "fal-client~=0.4.0" ]
|
|
google = [ "google-generativeai~=0.5.3" ]
|
|
fireworks = [ "openai~=1.26.0" ]
|
|
local = [ "pyaudio~=0.2.0" ]
|
|
moondream = [ "einops~=0.8.0", "timm~=0.9.16", "transformers~=4.40.2" ]
|
|
openai = [ "openai~=1.26.0" ]
|
|
playht = [ "pyht~=0.0.28" ]
|
|
silero = [ "torch~=2.3.0", "torchaudio~=2.3.0" ]
|
|
websocket = [ "websockets~=12.0" ]
|
|
whisper = [ "faster-whisper~=1.0.2" ]
|
|
|
|
[tool.setuptools.packages.find]
|
|
# All the following settings are optional:
|
|
where = ["src"]
|
|
|
|
[tool.pytest.ini_options]
|
|
pythonpath = ["src"]
|
|
|
|
[tool.setuptools_scm]
|
|
local_scheme = "no-local-version"
|