Files
pipecat/docs/api/conf.py
2025-06-25 23:29:37 -04:00

336 lines
9.9 KiB
Python

import logging
import sys
from pathlib import Path
# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("sphinx-build")
# Add source directory to path
docs_dir = Path(__file__).parent
project_root = docs_dir.parent.parent
sys.path.insert(0, str(project_root / "src"))
# Project information
project = "pipecat-ai"
copyright = "2024, Daily"
author = "Daily"
# General configuration
extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"sphinx.ext.intersphinx",
]
# Napoleon settings
napoleon_google_docstring = True
napoleon_numpy_docstring = False
napoleon_include_init_with_doc = False
# AutoDoc settings
autodoc_default_options = {
"members": True,
"member-order": "bysource",
"undoc-members": True,
"exclude-members": "__weakref__,__init__",
"no-index": True,
"show-inheritance": True,
}
# Mock imports for optional dependencies
autodoc_mock_imports = [
"riva",
"livekit",
"pyht", # Base PlayHT package
"pyht.async_client", # PlayHT specific imports
"pyht.client",
"pyht.protos",
"pyht.protos.api_pb2",
"pipecat_ai_playht", # PlayHT wrapper
"aiortc",
"aiortc.mediastreams",
"cv2",
"av",
"pyneuphonic",
"mem0",
"mlx_whisper",
"anthropic",
"assemblyai",
"boto3",
"azure",
"cartesia",
"deepgram",
"elevenlabs",
"fal",
"gladia",
"google",
"krisp",
"langchain",
"lmnt",
"noisereduce",
"openai",
"openpipe",
"simli",
"soundfile",
"pipecat_ai_krisp",
"pyaudio",
"_tkinter",
"tkinter",
"daily",
"daily_python",
"pydantic.BaseModel",
"pydantic.Field",
"pydantic._internal._model_construction",
"pydantic._internal._fields",
# Moondream dependencies
"torch",
"transformers",
"intel_extension_for_pytorch",
# Ultravox dependencies
"huggingface_hub",
"vllm",
"vllm.engine.arg_utils",
"transformers.AutoTokenizer",
# Langchain dependencies
"langchain_core",
"langchain_core.messages",
"langchain_core.runnables",
"langchain_core.messages.AIMessageChunk",
"langchain_core.runnables.Runnable",
# LiveKit dependencies
"livekit",
"livekit.rtc",
"livekit_api",
"livekit_protocol",
"tenacity",
"tenacity.retry",
"tenacity.stop_after_attempt",
"tenacity.wait_exponential",
"rtc",
"rtc.Room",
"rtc.RoomOptions",
"rtc.AudioSource",
"rtc.LocalAudioTrack",
"rtc.TrackPublishOptions",
"rtc.TrackSource",
"rtc.AudioStream",
"rtc.AudioFrameEvent",
"rtc.AudioFrame",
"rtc.Track",
"rtc.TrackKind",
"rtc.RemoteParticipant",
"rtc.RemoteTrackPublication",
"rtc.DataPacket",
# Riva dependencies
"riva",
"riva.client",
"riva.client.Auth",
"riva.client.ASRService",
"riva.client.StreamingRecognitionConfig",
"riva.client.RecognitionConfig",
"riva.client.AudioEncoding",
"riva.client.proto.riva_tts_pb2",
"riva.client.SpeechSynthesisService",
# Local CoreML Smart Turn dependencies
"coremltools",
"coremltools.models",
"coremltools.models.MLModel",
"torch",
"torch.nn",
"torch.nn.functional",
"transformers",
"transformers.AutoFeatureExtractor",
# Also add specific classes that are imported
"AutoFeatureExtractor",
]
# HTML output settings
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"]
autodoc_typehints = "description"
html_show_sphinx = False
def verify_modules():
"""Verify that required modules are available."""
required_modules = {
"services": [
"assemblyai",
"aws",
"cartesia",
"deepgram",
"google",
"lmnt",
"riva",
"simli",
],
"serializers": ["livekit"],
"vad": ["silero", "vad_analyzer"],
"transports": {
"services": ["daily", "livekit"],
"local": ["audio", "tk"],
"network": ["fastapi_websocket", "websocket_server"],
},
}
# Skip importing modules that are in autodoc_mock_imports
skipped_modules = set(autodoc_mock_imports)
missing = []
for category, modules in required_modules.items():
if isinstance(modules, dict):
# Handle nested structure
for subcategory, submodules in modules.items():
for module in submodules:
# Check if module is in autodoc_mock_imports
if (
f"pipecat.{category}.{subcategory}.{module}" in skipped_modules
or module in skipped_modules
):
logger.info(
f"Skipping import of mocked module: pipecat.{category}.{subcategory}.{module}"
)
continue
try:
__import__(f"pipecat.{category}.{subcategory}.{module}")
logger.info(
f"Successfully imported pipecat.{category}.{subcategory}.{module}"
)
except (ImportError, TypeError, NameError) as e:
missing.append(f"pipecat.{category}.{subcategory}.{module}")
logger.warning(
f"Optional module not available: pipecat.{category}.{subcategory}.{module} - {str(e)}"
)
else:
# Handle flat structure
for module in modules:
# Check if module is in autodoc_mock_imports
if f"pipecat.{category}.{module}" in skipped_modules or module in skipped_modules:
logger.info(f"Skipping import of mocked module: pipecat.{category}.{module}")
continue
try:
__import__(f"pipecat.{category}.{module}")
logger.info(f"Successfully imported pipecat.{category}.{module}")
except (ImportError, TypeError, NameError) as e:
missing.append(f"pipecat.{category}.{module}")
logger.warning(
f"Optional module not available: pipecat.{category}.{module} - {str(e)}"
)
if missing:
logger.warning(f"Some optional modules are not available: {missing}")
def clean_title(title: str) -> str:
"""Automatically clean module titles."""
# Remove everything after space (like 'module', 'processor', etc.)
title = title.split(" ")[0]
# Get the last part of the dot-separated path
parts = title.split(".")
title = parts[-1]
# Special cases for service names and common acronyms
special_cases = {
"ai": "AI",
"aws": "AWS",
"api": "API",
"vad": "VAD",
"assemblyai": "AssemblyAI",
"deepgram": "Deepgram",
"elevenlabs": "ElevenLabs",
"openai": "OpenAI",
"openpipe": "OpenPipe",
"playht": "PlayHT",
"xtts": "XTTS",
"lmnt": "LMNT",
}
# Check if the entire title is a special case
if title.lower() in special_cases:
return special_cases[title.lower()]
# Otherwise, capitalize each word
words = title.split("_")
cleaned_words = []
for word in words:
if word.lower() in special_cases:
cleaned_words.append(special_cases[word.lower()])
else:
cleaned_words.append(word.capitalize())
return " ".join(cleaned_words)
def setup(app):
"""Generate API documentation during Sphinx build."""
from sphinx.ext.apidoc import main
docs_dir = Path(__file__).parent
project_root = docs_dir.parent.parent
output_dir = str(docs_dir / "api")
source_dir = str(project_root / "src" / "pipecat")
# Clean existing files
if Path(output_dir).exists():
import shutil
shutil.rmtree(output_dir)
logger.info(f"Cleaned existing documentation in {output_dir}")
logger.info(f"Generating API documentation...")
logger.info(f"Output directory: {output_dir}")
logger.info(f"Source directory: {source_dir}")
excludes = [
str(project_root / "src/pipecat/pipeline/to_be_updated"),
str(project_root / "src/pipecat/processors/gstreamer"),
str(project_root / "src/pipecat/services/to_be_updated"),
str(project_root / "src/pipecat/vad"), # deprecated
"**/test_*.py",
"**/tests/*.py",
]
try:
main(
[
"-f", # Force overwriting
"-e", # Don't generate empty files
"-M", # Put module documentation before submodule documentation
"--no-toc", # Don't create a table of contents file
"--separate", # Put documentation for each module in its own page
"--module-first", # Module documentation before submodule documentation
"--implicit-namespaces", # Added: Handle implicit namespace packages
"-o",
output_dir,
source_dir,
]
+ excludes
)
logger.info("API documentation generated successfully!")
# Process generated RST files to update titles
for rst_file in Path(output_dir).glob("**/*.rst"): # Changed to recursive glob
content = rst_file.read_text()
lines = content.split("\n")
# Find and clean up the title
if lines and "=" in lines[1]: # Title is typically the first line
old_title = lines[0]
new_title = clean_title(old_title)
content = content.replace(old_title, new_title)
rst_file.write_text(content)
logger.info(f"Updated title: {old_title} -> {new_title}")
except Exception as e:
logger.error(f"Error generating API documentation: {e}", exc_info=True)
# Run module verification
verify_modules()