Merge pull request #2738 from pipecat-ai/aleix/openai-cached-tokens-metrics

BaseOpenAILLMService: include cached tokens to metrics frame
2025-09-25 13:36:03 -07:00
parent 3797f41c8c 33447ad6f2
commit 7078fb53bd
2 changed files with 12 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to **Pipecat** will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

+## [Unreleased]
+
+### Added
+
+- Include OpenAI-based LLM services cached tokens to `MetricsFrame`.
+
 ## [0.0.86] - 2025-09-24

 ### Added
--- a/src/pipecat/services/openai/base_llm.py
+++ b/src/pipecat/services/openai/base_llm.py
@@ -337,10 +337,16 @@ class BaseOpenAILLMService(LLMService):

        async for chunk in chunk_stream:
            if chunk.usage:
+                cached_tokens = (
+                    chunk.usage.prompt_tokens_details.cached_tokens
+                    if chunk.usage.prompt_tokens_details
+                    else None
+                )
                tokens = LLMTokenUsage(
                    prompt_tokens=chunk.usage.prompt_tokens,
                    completion_tokens=chunk.usage.completion_tokens,
                    total_tokens=chunk.usage.total_tokens,
+                    cache_read_input_tokens=cached_tokens,
                )
                await self.start_llm_usage_metrics(tokens)