234 lines
7.7 KiB
Markdown
234 lines
7.7 KiB
Markdown
# Streaming Example
|
|
|
|
Learn how to use streaming responses with the FastGPT Python SDK.
|
|
|
|
## Why Use Streaming?
|
|
|
|
Streaming allows you to:
|
|
|
|
- **Display real-time responses** - Show text as it's generated
|
|
- **Reduce perceived latency** - Users see content immediately
|
|
- **Better user experience** - More interactive and engaging
|
|
|
|
## Basic Streaming
|
|
|
|
```python
|
|
import json
|
|
from fastgpt_client import ChatClient
|
|
|
|
with ChatClient(api_key="fastgpt-xxxxx") as client:
|
|
response = client.create_chat_completion(
|
|
messages=[{"role": "user", "content": "Tell me a short story"}],
|
|
stream=True
|
|
)
|
|
|
|
print("Story: ", end="", flush=True)
|
|
for line in response.iter_lines():
|
|
if line.startswith("data:"):
|
|
data = line[5:].strip()
|
|
if data and data != "[DONE]":
|
|
chunk = json.loads(data)
|
|
if "choices" in chunk and chunk["choices"]:
|
|
delta = chunk["choices"][0].get("delta", {})
|
|
content = delta.get("content", "")
|
|
if content:
|
|
print(content, end="", flush=True)
|
|
print()
|
|
```
|
|
|
|
## Complete Streaming Example
|
|
|
|
```python
|
|
"""Streaming chat completion example."""
|
|
|
|
import json
|
|
from fastgpt_client import ChatClient
|
|
from dotenv import load_dotenv
|
|
import os
|
|
|
|
load_dotenv()
|
|
|
|
API_KEY = os.getenv("API_KEY")
|
|
BASE_URL = os.getenv("BASE_URL")
|
|
|
|
|
|
def stream_chat():
|
|
"""Simple streaming chat completion."""
|
|
with ChatClient(api_key=API_KEY, base_url=BASE_URL) as client:
|
|
response = client.create_chat_completion(
|
|
messages=[{"role": "user", "content": "Tell me a short story about AI"}],
|
|
stream=True
|
|
)
|
|
|
|
print("\n=== Streaming Response ===\n")
|
|
for line in response.iter_lines():
|
|
if line.startswith("data:"):
|
|
data = line[5:].strip()
|
|
if data and data != "[DONE]":
|
|
chunk = json.loads(data)
|
|
if "choices" in chunk and chunk["choices"]:
|
|
delta = chunk["choices"][0].get("delta", {})
|
|
content = delta.get("content", "")
|
|
if content:
|
|
print(content, end="", flush=True)
|
|
print("\n")
|
|
|
|
|
|
def stream_with_progress():
|
|
"""Stream with progress indicator."""
|
|
with ChatClient(api_key=API_KEY, base_url=BASE_URL) as client:
|
|
response = client.create_chat_completion(
|
|
messages=[{"role": "user", "content": "Explain quantum computing"}],
|
|
stream=True
|
|
)
|
|
|
|
print("\n=== Streaming with Progress ===\n")
|
|
token_count = 0
|
|
for line in response.iter_lines():
|
|
if line.startswith("data:"):
|
|
data = line[5:].strip()
|
|
if data and data != "[DONE]":
|
|
chunk = json.loads(data)
|
|
if "choices" in chunk and chunk["choices"]:
|
|
delta = chunk["choices"][0].get("delta", {})
|
|
content = delta.get("content", "")
|
|
if content:
|
|
print(content, end="", flush=True)
|
|
token_count += 1
|
|
print(f"\n\nTotal tokens: {token_count}")
|
|
|
|
|
|
def stream_with_buffer():
|
|
"""Stream with word buffering (print complete words only)."""
|
|
with ChatClient(api_key=API_KEY, base_url=BASE_URL) as client:
|
|
response = client.create_chat_completion(
|
|
messages=[{"role": "user", "content": "What is machine learning?"}],
|
|
stream=True
|
|
)
|
|
|
|
print("\n=== Buffered Streaming ===\n")
|
|
buffer = ""
|
|
for line in response.iter_lines():
|
|
if line.startswith("data:"):
|
|
data = line[5:].strip()
|
|
if data and data != "[DONE]":
|
|
chunk = json.loads(data)
|
|
if "choices" in chunk and chunk["choices"]:
|
|
delta = chunk["choices"][0].get("delta", {})
|
|
content = delta.get("content", "")
|
|
if content:
|
|
buffer += content
|
|
# Print complete words
|
|
if " " in buffer:
|
|
parts = buffer.split(" ", 1)
|
|
print(parts[0] + " ", end="", flush=True)
|
|
buffer = parts[1] if len(parts) > 1 else ""
|
|
# Print remaining content
|
|
if buffer:
|
|
print(buffer)
|
|
print()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
stream_chat()
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
try:
|
|
stream_with_progress()
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
try:
|
|
stream_with_buffer()
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
```
|
|
|
|
## Async Streaming
|
|
|
|
```python
|
|
import asyncio
|
|
import json
|
|
from fastgpt_client import AsyncChatClient
|
|
|
|
async def stream_async():
|
|
"""Async streaming example."""
|
|
async with AsyncChatClient(api_key="fastgpt-xxxxx") as client:
|
|
response = await client.create_chat_completion(
|
|
messages=[{"role": "user", "content": "Tell me about async/await"}],
|
|
stream=True
|
|
)
|
|
|
|
async for line in response.aiter_lines():
|
|
if line.startswith("data:"):
|
|
data = line[5:].strip()
|
|
if data and data != "[DONE]":
|
|
chunk = json.loads(data)
|
|
if "choices" in chunk and chunk["choices"]:
|
|
delta = chunk["choices"][0].get("delta", {})
|
|
content = delta.get("content", "")
|
|
if content:
|
|
print(content, end="", flush=True)
|
|
|
|
asyncio.run(stream_async())
|
|
```
|
|
|
|
## Streaming Event Types
|
|
|
|
FastGPT sends multiple SSE (Server-Sent Events) event types. The common ones are:
|
|
|
|
- `data` - Standard response chunks (OpenAI-compatible)
|
|
- `answer` - Main chat response content
|
|
- `fastAnswer` - Quick reply content
|
|
- `flowNodeStatus` - Workflow node status updates
|
|
- `interactive` - Interactive node prompts
|
|
- `error` - Error events
|
|
|
|
For more details on event types, see [Streaming Events](../advanced/streaming_events.md).
|
|
|
|
## Best Practices
|
|
|
|
1. **Always flush output**: Use `flush=True` when printing
|
|
2. **Handle connection errors**: Streaming can fail mid-response
|
|
3. **Use context managers**: Ensures proper cleanup
|
|
4. **Buffer for better formatting**: Consider buffering for word boundaries
|
|
|
|
```python
|
|
import time
|
|
|
|
def robust_stream():
|
|
"""Handle streaming errors gracefully."""
|
|
try:
|
|
with ChatClient(api_key=API_KEY) as client:
|
|
response = client.create_chat_completion(
|
|
messages=[{"role": "user", "content": "Hello"}],
|
|
stream=True
|
|
)
|
|
|
|
for line in response.iter_lines():
|
|
if line:
|
|
try:
|
|
data = line[5:].strip() if line.startswith("data:") else line
|
|
if data and data != "[DONE]":
|
|
chunk = json.loads(data)
|
|
# Process chunk
|
|
if "choices" in chunk and chunk["choices"]:
|
|
delta = chunk["choices"][0].get("delta", {})
|
|
content = delta.get("content", "")
|
|
if content:
|
|
print(content, end="", flush=True)
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
except Exception as e:
|
|
print(f"\nStreaming error: {e}")
|
|
```
|
|
|
|
## See Also
|
|
|
|
- [Streaming Events](../advanced/streaming_events.md) - Advanced SSE event handling
|
|
- [Async Usage](async_usage.md) - Async streaming examples
|
|
- [Error Handling](../advanced/error_handling.md) - Robust error handling
|