import logging import sys from pathlib import Path # Configure logging logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger("sphinx-build") # Add source directory to path docs_dir = Path(__file__).parent project_root = docs_dir.parent.parent sys.path.insert(0, str(project_root / "src")) # Project information project = "pipecat-ai" copyright = "2024, Daily" author = "Daily" # General configuration extensions = [ "sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx.ext.viewcode", "sphinx.ext.intersphinx", ] # Napoleon settings napoleon_google_docstring = True napoleon_numpy_docstring = False napoleon_include_init_with_doc = False # AutoDoc settings autodoc_default_options = { "members": True, "member-order": "bysource", "undoc-members": True, "exclude-members": "__weakref__,__init__", "no-index": True, "show-inheritance": True, } # Mock imports for optional dependencies autodoc_mock_imports = [ "riva", "livekit", "pyht", # Base PlayHT package "pyht.async_client", # PlayHT specific imports "pyht.client", "pyht.protos", "pyht.protos.api_pb2", "pipecat_ai_playht", # PlayHT wrapper "aiortc", "aiortc.mediastreams", "cv2", "av", "pyneuphonic", "mem0", "mlx_whisper", "anthropic", "assemblyai", "boto3", "azure", "cartesia", "deepgram", "elevenlabs", "fal", "gladia", "google", "krisp", "langchain", "lmnt", "noisereduce", "openai", "openpipe", "simli", "soundfile", "pipecat_ai_krisp", "pyaudio", "_tkinter", "tkinter", "daily", "daily_python", "pydantic.BaseModel", "pydantic.Field", "pydantic._internal._model_construction", "pydantic._internal._fields", # Moondream dependencies "torch", "transformers", "intel_extension_for_pytorch", # Ultravox dependencies "huggingface_hub", "vllm", "vllm.engine.arg_utils", "transformers.AutoTokenizer", # Langchain dependencies "langchain_core", "langchain_core.messages", "langchain_core.runnables", "langchain_core.messages.AIMessageChunk", "langchain_core.runnables.Runnable", # LiveKit dependencies "livekit", "livekit.rtc", "livekit_api", "livekit_protocol", "tenacity", "tenacity.retry", "tenacity.stop_after_attempt", "tenacity.wait_exponential", "rtc", "rtc.Room", "rtc.RoomOptions", "rtc.AudioSource", "rtc.LocalAudioTrack", "rtc.TrackPublishOptions", "rtc.TrackSource", "rtc.AudioStream", "rtc.AudioFrameEvent", "rtc.AudioFrame", "rtc.Track", "rtc.TrackKind", "rtc.RemoteParticipant", "rtc.RemoteTrackPublication", "rtc.DataPacket", # Riva dependencies "riva", "riva.client", "riva.client.Auth", "riva.client.ASRService", "riva.client.StreamingRecognitionConfig", "riva.client.RecognitionConfig", "riva.client.AudioEncoding", "riva.client.proto.riva_tts_pb2", "riva.client.SpeechSynthesisService", # Local CoreML Smart Turn dependencies "coremltools", "coremltools.models", "coremltools.models.MLModel", "torch", "torch.nn", "torch.nn.functional", "transformers", "transformers.AutoFeatureExtractor", # Also add specific classes that are imported "AutoFeatureExtractor", ] # HTML output settings html_theme = "sphinx_rtd_theme" html_static_path = ["_static"] autodoc_typehints = "description" html_show_sphinx = False def verify_modules(): """Verify that required modules are available.""" required_modules = { "services": [ "assemblyai", "aws", "cartesia", "deepgram", "google", "lmnt", "riva", "simli", ], "serializers": ["livekit"], "vad": ["silero", "vad_analyzer"], "transports": { "services": ["daily", "livekit"], "local": ["audio", "tk"], "network": ["fastapi_websocket", "websocket_server"], }, } # Skip importing modules that are in autodoc_mock_imports skipped_modules = set(autodoc_mock_imports) missing = [] for category, modules in required_modules.items(): if isinstance(modules, dict): # Handle nested structure for subcategory, submodules in modules.items(): for module in submodules: # Check if module is in autodoc_mock_imports if ( f"pipecat.{category}.{subcategory}.{module}" in skipped_modules or module in skipped_modules ): logger.info( f"Skipping import of mocked module: pipecat.{category}.{subcategory}.{module}" ) continue try: __import__(f"pipecat.{category}.{subcategory}.{module}") logger.info( f"Successfully imported pipecat.{category}.{subcategory}.{module}" ) except (ImportError, TypeError, NameError) as e: missing.append(f"pipecat.{category}.{subcategory}.{module}") logger.warning( f"Optional module not available: pipecat.{category}.{subcategory}.{module} - {str(e)}" ) else: # Handle flat structure for module in modules: # Check if module is in autodoc_mock_imports if f"pipecat.{category}.{module}" in skipped_modules or module in skipped_modules: logger.info(f"Skipping import of mocked module: pipecat.{category}.{module}") continue try: __import__(f"pipecat.{category}.{module}") logger.info(f"Successfully imported pipecat.{category}.{module}") except (ImportError, TypeError, NameError) as e: missing.append(f"pipecat.{category}.{module}") logger.warning( f"Optional module not available: pipecat.{category}.{module} - {str(e)}" ) if missing: logger.warning(f"Some optional modules are not available: {missing}") def clean_title(title: str) -> str: """Automatically clean module titles.""" # Remove everything after space (like 'module', 'processor', etc.) title = title.split(" ")[0] # Get the last part of the dot-separated path parts = title.split(".") title = parts[-1] # Special cases for service names and common acronyms special_cases = { "ai": "AI", "aws": "AWS", "api": "API", "vad": "VAD", "assemblyai": "AssemblyAI", "deepgram": "Deepgram", "elevenlabs": "ElevenLabs", "openai": "OpenAI", "openpipe": "OpenPipe", "playht": "PlayHT", "xtts": "XTTS", "lmnt": "LMNT", } # Check if the entire title is a special case if title.lower() in special_cases: return special_cases[title.lower()] # Otherwise, capitalize each word words = title.split("_") cleaned_words = [] for word in words: if word.lower() in special_cases: cleaned_words.append(special_cases[word.lower()]) else: cleaned_words.append(word.capitalize()) return " ".join(cleaned_words) def setup(app): """Generate API documentation during Sphinx build.""" from sphinx.ext.apidoc import main docs_dir = Path(__file__).parent project_root = docs_dir.parent.parent output_dir = str(docs_dir / "api") source_dir = str(project_root / "src" / "pipecat") # Clean existing files if Path(output_dir).exists(): import shutil shutil.rmtree(output_dir) logger.info(f"Cleaned existing documentation in {output_dir}") logger.info(f"Generating API documentation...") logger.info(f"Output directory: {output_dir}") logger.info(f"Source directory: {source_dir}") excludes = [ str(project_root / "src/pipecat/pipeline/to_be_updated"), str(project_root / "src/pipecat/processors/gstreamer"), str(project_root / "src/pipecat/services/to_be_updated"), str(project_root / "src/pipecat/vad"), # deprecated "**/test_*.py", "**/tests/*.py", ] try: main( [ "-f", # Force overwriting "-e", # Don't generate empty files "-M", # Put module documentation before submodule documentation "--no-toc", # Don't create a table of contents file "--separate", # Put documentation for each module in its own page "--module-first", # Module documentation before submodule documentation "--implicit-namespaces", # Added: Handle implicit namespace packages "-o", output_dir, source_dir, ] + excludes ) logger.info("API documentation generated successfully!") # Process generated RST files to update titles for rst_file in Path(output_dir).glob("**/*.rst"): # Changed to recursive glob content = rst_file.read_text() lines = content.split("\n") # Find and clean up the title if lines and "=" in lines[1]: # Title is typically the first line old_title = lines[0] new_title = clean_title(old_title) content = content.replace(old_title, new_title) rst_file.write_text(content) logger.info(f"Updated title: {old_title} -> {new_title}") except Exception as e: logger.error(f"Error generating API documentation: {e}", exc_info=True) # Run module verification verify_modules()