Update agent flow and frontend chat experience

This commit is contained in:
Xin Wang
2026-03-31 21:58:24 +08:00
parent e34e569de4
commit 0e2dafe440
13 changed files with 345 additions and 372 deletions

View File

@@ -1 +1,3 @@
# GEMINI_API_KEY= # OPENAI_API_KEY=
# OPENAI_BASE_URL=
# TAVILY_API_KEY=

View File

@@ -21,7 +21,7 @@ def main() -> None:
) )
parser.add_argument( parser.add_argument(
"--reasoning-model", "--reasoning-model",
default="gemini-2.5-pro-preview-05-06", default="openai/gpt-5.4",
help="Model for the final answer", help="Model for the final answer",
) )
args = parser.parse_args() args = parser.parse_args()

View File

@@ -11,13 +11,13 @@ requires-python = ">=3.11,<4.0"
dependencies = [ dependencies = [
"langgraph>=0.2.6", "langgraph>=0.2.6",
"langchain>=0.3.19", "langchain>=0.3.19",
"langchain-google-genai", "langchain-openai",
"python-dotenv>=1.0.1", "python-dotenv>=1.0.1",
"langgraph-sdk>=0.1.57", "langgraph-sdk>=0.1.57",
"langgraph-cli", "langgraph-cli",
"langgraph-api", "langgraph-api",
"fastapi", "fastapi",
"google-genai", "tavily-python",
] ]

View File

@@ -9,26 +9,33 @@ class Configuration(BaseModel):
"""The configuration for the agent.""" """The configuration for the agent."""
query_generator_model: str = Field( query_generator_model: str = Field(
default="gemini-2.0-flash", default="openai/gpt-5.4",
metadata={ metadata={
"description": "The name of the language model to use for the agent's query generation." "description": "The name of the language model to use for the agent's query generation."
}, },
) )
reflection_model: str = Field( reflection_model: str = Field(
default="gemini-2.5-flash", default="openai/gpt-5.4",
metadata={ metadata={
"description": "The name of the language model to use for the agent's reflection." "description": "The name of the language model to use for the agent's reflection."
}, },
) )
answer_model: str = Field( answer_model: str = Field(
default="gemini-2.5-pro", default="openai/gpt-5.4",
metadata={ metadata={
"description": "The name of the language model to use for the agent's answer." "description": "The name of the language model to use for the agent's answer."
}, },
) )
reasoning_model: str = Field(
default="openai/gpt-5.4",
metadata={
"description": "Fallback model used when the client does not provide a supported reasoning model."
},
)
number_of_initial_queries: int = Field( number_of_initial_queries: int = Field(
default=3, default=3,
metadata={"description": "The number of initial search queries to generate."}, metadata={"description": "The number of initial search queries to generate."},

View File

@@ -1,91 +1,134 @@
import os import os
from typing import Any
from agent.tools_and_schemas import SearchQueryList, Reflection from agent.configuration import Configuration
from dotenv import load_dotenv from agent.prompts import (
from langchain_core.messages import AIMessage answer_instructions,
from langgraph.types import Send get_current_date,
from langgraph.graph import StateGraph query_writer_instructions,
from langgraph.graph import START, END reflection_instructions,
from langchain_core.runnables import RunnableConfig )
from google.genai import Client
from agent.state import ( from agent.state import (
OverallState, OverallState,
QueryGenerationState, QueryGenerationState,
ReflectionState, ReflectionState,
WebSearchState, WebSearchState,
) )
from agent.configuration import Configuration from agent.tools_and_schemas import Reflection, SearchQueryList
from agent.prompts import (
get_current_date,
query_writer_instructions,
web_searcher_instructions,
reflection_instructions,
answer_instructions,
)
from langchain_google_genai import ChatGoogleGenerativeAI
from agent.utils import ( from agent.utils import (
get_citations, format_sources_for_prompt,
get_research_topic, get_research_topic,
insert_citation_markers, normalize_model_name,
resolve_urls, normalize_tavily_sources,
shorten_search_query,
) )
from dotenv import load_dotenv
from langchain_core.messages import AIMessage
from langchain_core.runnables import RunnableConfig
from langchain_openai import ChatOpenAI
from langgraph.graph import END, START, StateGraph
from langgraph.types import Send
from tavily import TavilyClient
load_dotenv() load_dotenv()
if os.getenv("GEMINI_API_KEY") is None: TAVILY_TOPIC = "general"
raise ValueError("GEMINI_API_KEY is not set") TAVILY_SEARCH_DEPTH = "advanced"
TAVILY_MAX_RESULTS = 5
# Used for Google Search API TAVILY_CHUNKS_PER_SOURCE = 3
genai_client = Client(api_key=os.getenv("GEMINI_API_KEY"))
def require_env(name: str) -> str:
"""Return an environment variable or raise a clear error."""
value = os.getenv(name)
if not value:
raise ValueError(f"{name} is not set")
return value
def get_chat_model(model_name: str, temperature: float = 0) -> ChatOpenAI:
"""Create the OpenAI chat model lazily for easier testing."""
base_url = os.getenv("OPENAI_BASE_URL")
return ChatOpenAI(
model=normalize_model_name(model_name),
api_key=require_env("OPENAI_API_KEY"),
base_url=base_url or None,
temperature=temperature,
)
def get_tavily_client() -> TavilyClient:
"""Create the Tavily client lazily for easier testing."""
return TavilyClient(api_key=require_env("TAVILY_API_KEY"))
def create_structured_response(
prompt: str,
model_name: str,
schema_model: type[Any],
temperature: float = 1,
) -> Any:
"""Call the LangChain OpenAI chat model and parse structured output."""
llm = get_chat_model(model_name, temperature=temperature)
return llm.with_structured_output(
schema_model,
method="function_calling",
).invoke(prompt)
def extract_text_content(content: Any) -> str:
"""Normalize LangChain message content into a string."""
if isinstance(content, str):
return content.strip()
if isinstance(content, list):
text_chunks = []
for chunk in content:
if isinstance(chunk, dict) and chunk.get("type") == "text":
text_chunks.append(str(chunk.get("text", "")))
return "\n".join(text_chunks).strip()
return str(content).strip()
def create_text_response(prompt: str, model_name: str, temperature: float = 0) -> str:
"""Call the LangChain OpenAI chat model and return markdown output."""
response = get_chat_model(model_name, temperature=temperature).invoke(prompt)
text_output = extract_text_content(response.content)
if not text_output:
raise ValueError("OpenAI response did not include text output")
return text_output
# Nodes
def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState: def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState:
"""LangGraph node that generates search queries based on the User's question. """Generate initial search queries from the user's request."""
Uses Gemini 2.0 Flash to create an optimized search queries for web research based on
the User's question.
Args:
state: Current graph state containing the User's question
config: Configuration for the runnable, including LLM provider settings
Returns:
Dictionary with state update, including search_query key containing the generated queries
"""
configurable = Configuration.from_runnable_config(config) configurable = Configuration.from_runnable_config(config)
reasoning_model = normalize_model_name(
state.get("reasoning_model"),
configurable.reasoning_model or configurable.query_generator_model,
)
# check for custom initial search query count
if state.get("initial_search_query_count") is None: if state.get("initial_search_query_count") is None:
state["initial_search_query_count"] = configurable.number_of_initial_queries state["initial_search_query_count"] = configurable.number_of_initial_queries
# init Gemini 2.0 Flash
llm = ChatGoogleGenerativeAI(
model=configurable.query_generator_model,
temperature=1.0,
max_retries=2,
api_key=os.getenv("GEMINI_API_KEY"),
)
structured_llm = llm.with_structured_output(SearchQueryList)
# Format the prompt
current_date = get_current_date()
formatted_prompt = query_writer_instructions.format( formatted_prompt = query_writer_instructions.format(
current_date=current_date, current_date=get_current_date(),
research_topic=get_research_topic(state["messages"]), research_topic=get_research_topic(state["messages"]),
number_queries=state["initial_search_query_count"], number_queries=state["initial_search_query_count"],
) )
# Generate the search queries result = create_structured_response(
result = structured_llm.invoke(formatted_prompt) formatted_prompt,
reasoning_model,
SearchQueryList,
temperature=1,
)
return {"search_query": result.query} return {"search_query": result.query}
def continue_to_web_research(state: QueryGenerationState): def continue_to_web_research(state: QueryGenerationState) -> list[Send]:
"""LangGraph node that sends the search queries to the web research node. """Fan out search queries into parallel web research nodes."""
This is used to spawn n number of web research nodes, one for each search query.
"""
return [ return [
Send("web_research", {"search_query": search_query, "id": int(idx)}) Send("web_research", {"search_query": search_query, "id": int(idx)})
for idx, search_query in enumerate(state["search_query"]) for idx, search_query in enumerate(state["search_query"])
@@ -93,83 +136,63 @@ def continue_to_web_research(state: QueryGenerationState):
def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState: def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
"""LangGraph node that performs web research using the native Google Search API tool. """Execute Tavily search and return raw evidence for one query."""
search_query = shorten_search_query(state["search_query"])
Executes a web search using the native Google Search API tool in combination with Gemini 2.0 Flash. tavily_response = get_tavily_client().search(
query=search_query,
Args: topic=TAVILY_TOPIC,
state: Current graph state containing the search query and research loop count search_depth=TAVILY_SEARCH_DEPTH,
config: Configuration for the runnable, including search API settings chunks_per_source=TAVILY_CHUNKS_PER_SOURCE,
max_results=TAVILY_MAX_RESULTS,
Returns: include_answer=False,
Dictionary with state update, including sources_gathered, research_loop_count, and web_research_results include_raw_content=False,
"""
# Configure
configurable = Configuration.from_runnable_config(config)
formatted_prompt = web_searcher_instructions.format(
current_date=get_current_date(),
research_topic=state["search_query"],
) )
sources = normalize_tavily_sources(tavily_response.get("results", []))
# Uses the google genai client as the langchain client doesn't return grounding metadata if not sources:
response = genai_client.models.generate_content( return {
model=configurable.query_generator_model, "sources_gathered": [],
contents=formatted_prompt, "search_query": [search_query],
config={ "web_research_result": [
"tools": [{"google_search": {}}], f'No Tavily results were returned for "{search_query}".'
"temperature": 0, ],
}, }
evidence = "\n\n".join(
[
f"Search Query: {search_query}",
"Source Evidence:",
format_sources_for_prompt(sources),
]
) )
# resolve the urls to short urls for saving tokens and time
resolved_urls = resolve_urls(
response.candidates[0].grounding_metadata.grounding_chunks, state["id"]
)
# Gets the citations and adds them to the generated text
citations = get_citations(response, resolved_urls)
modified_text = insert_citation_markers(response.text, citations)
sources_gathered = [item for citation in citations for item in citation["segments"]]
return { return {
"sources_gathered": sources_gathered, "sources_gathered": sources,
"search_query": [state["search_query"]], "search_query": [search_query],
"web_research_result": [modified_text], "web_research_result": [evidence],
} }
def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState: def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
"""LangGraph node that identifies knowledge gaps and generates potential follow-up queries. """Decide whether additional research is needed."""
Analyzes the current summary to identify areas for further research and generates
potential follow-up queries. Uses structured output to extract
the follow-up query in JSON format.
Args:
state: Current graph state containing the running summary and research topic
config: Configuration for the runnable, including LLM provider settings
Returns:
Dictionary with state update, including search_query key containing the generated follow-up query
"""
configurable = Configuration.from_runnable_config(config) configurable = Configuration.from_runnable_config(config)
# Increment the research loop count and get the reasoning model
state["research_loop_count"] = state.get("research_loop_count", 0) + 1 state["research_loop_count"] = state.get("research_loop_count", 0) + 1
reasoning_model = state.get("reasoning_model", configurable.reflection_model) reasoning_model = normalize_model_name(
state.get("reasoning_model"),
configurable.reasoning_model,
)
# Format the prompt
current_date = get_current_date()
formatted_prompt = reflection_instructions.format( formatted_prompt = reflection_instructions.format(
current_date=current_date, current_date=get_current_date(),
research_topic=get_research_topic(state["messages"]), research_topic=get_research_topic(state["messages"]),
summaries="\n\n---\n\n".join(state["web_research_result"]), summaries="\n\n---\n\n".join(state["web_research_result"]),
) )
# init Reasoning Model result = create_structured_response(
llm = ChatGoogleGenerativeAI( formatted_prompt,
model=reasoning_model, reasoning_model,
temperature=1.0, Reflection,
max_retries=2, temperature=1,
api_key=os.getenv("GEMINI_API_KEY"),
) )
result = llm.with_structured_output(Reflection).invoke(formatted_prompt)
return { return {
"is_sufficient": result.is_sufficient, "is_sufficient": result.is_sufficient,
@@ -183,19 +206,8 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
def evaluate_research( def evaluate_research(
state: ReflectionState, state: ReflectionState,
config: RunnableConfig, config: RunnableConfig,
) -> OverallState: ) -> OverallState | list[Send]:
"""LangGraph routing function that determines the next step in the research flow. """Route either back into research or into final answer synthesis."""
Controls the research loop by deciding whether to continue gathering information
or to finalize the summary based on the configured maximum number of research loops.
Args:
state: Current graph state containing the research loop count
config: Configuration for the runnable, including max_research_loops setting
Returns:
String literal indicating the next node to visit ("web_research" or "finalize_summary")
"""
configurable = Configuration.from_runnable_config(config) configurable = Configuration.from_runnable_config(config)
max_research_loops = ( max_research_loops = (
state.get("max_research_loops") state.get("max_research_loops")
@@ -204,90 +216,53 @@ def evaluate_research(
) )
if state["is_sufficient"] or state["research_loop_count"] >= max_research_loops: if state["is_sufficient"] or state["research_loop_count"] >= max_research_loops:
return "finalize_answer" return "finalize_answer"
else:
return [ return [
Send( Send(
"web_research", "web_research",
{ {
"search_query": follow_up_query, "search_query": follow_up_query,
"id": state["number_of_ran_queries"] + int(idx), "id": state["number_of_ran_queries"] + int(idx),
}, },
) )
for idx, follow_up_query in enumerate(state["follow_up_queries"]) for idx, follow_up_query in enumerate(state["follow_up_queries"])
] ]
def finalize_answer(state: OverallState, config: RunnableConfig): def finalize_answer(state: OverallState, config: RunnableConfig) -> OverallState:
"""LangGraph node that finalizes the research summary. """Generate the final cited answer from the accumulated research summaries."""
Prepares the final output by deduplicating and formatting sources, then
combining them with the running summary to create a well-structured
research report with proper citations.
Args:
state: Current graph state containing the running summary and sources gathered
Returns:
Dictionary with state update, including running_summary key containing the formatted final summary with sources
"""
configurable = Configuration.from_runnable_config(config) configurable = Configuration.from_runnable_config(config)
reasoning_model = state.get("reasoning_model") or configurable.answer_model reasoning_model = normalize_model_name(
state.get("reasoning_model"),
configurable.reasoning_model or configurable.answer_model,
)
# Format the prompt
current_date = get_current_date()
formatted_prompt = answer_instructions.format( formatted_prompt = answer_instructions.format(
current_date=current_date, current_date=get_current_date(),
research_topic=get_research_topic(state["messages"]), research_topic=get_research_topic(state["messages"]),
summaries="\n---\n\n".join(state["web_research_result"]), summaries="\n---\n\n".join(state["web_research_result"]),
) )
answer = create_text_response(formatted_prompt, reasoning_model, temperature=0)
# init Reasoning Model, default to Gemini 2.5 Flash
llm = ChatGoogleGenerativeAI(
model=reasoning_model,
temperature=0,
max_retries=2,
api_key=os.getenv("GEMINI_API_KEY"),
)
result = llm.invoke(formatted_prompt)
# Replace the short urls with the original urls and add all used urls to the sources_gathered
unique_sources = []
for source in state["sources_gathered"]:
if source["short_url"] in result.content:
result.content = result.content.replace(
source["short_url"], source["value"]
)
unique_sources.append(source)
return { return {
"messages": [AIMessage(content=result.content)], "messages": [AIMessage(content=answer)],
"sources_gathered": unique_sources, "sources_gathered": state.get("sources_gathered", []),
} }
# Create our Agent Graph
builder = StateGraph(OverallState, config_schema=Configuration) builder = StateGraph(OverallState, config_schema=Configuration)
# Define the nodes we will cycle between
builder.add_node("generate_query", generate_query) builder.add_node("generate_query", generate_query)
builder.add_node("web_research", web_research) builder.add_node("web_research", web_research)
builder.add_node("reflection", reflection) builder.add_node("reflection", reflection)
builder.add_node("finalize_answer", finalize_answer) builder.add_node("finalize_answer", finalize_answer)
# Set the entrypoint as `generate_query`
# This means that this node is the first one called
builder.add_edge(START, "generate_query") builder.add_edge(START, "generate_query")
# Add conditional edge to continue with search queries in a parallel branch
builder.add_conditional_edges( builder.add_conditional_edges(
"generate_query", continue_to_web_research, ["web_research"] "generate_query", continue_to_web_research, ["web_research"]
) )
# Reflect on the web research
builder.add_edge("web_research", "reflection") builder.add_edge("web_research", "reflection")
# Evaluate the research
builder.add_conditional_edges( builder.add_conditional_edges(
"reflection", evaluate_research, ["web_research", "finalize_answer"] "reflection", evaluate_research, ["web_research", "finalize_answer"]
) )
# Finalize the answer
builder.add_edge("finalize_answer", END) builder.add_edge("finalize_answer", END)
graph = builder.compile(name="pro-search-agent") graph = builder.compile(name="pro-search-agent")

View File

@@ -15,6 +15,7 @@ Instructions:
- Queries should be diverse, if the topic is broad, generate more than 1 query. - Queries should be diverse, if the topic is broad, generate more than 1 query.
- Don't generate multiple similar queries, 1 is enough. - Don't generate multiple similar queries, 1 is enough.
- Query should ensure that the most current information is gathered. The current date is {current_date}. - Query should ensure that the most current information is gathered. The current date is {current_date}.
- Keep every individual query under 400 characters. Prefer concise search terms over full-sentence restatements.
Format: Format:
- Format your response as a JSON object with ALL two of these exact keys: - Format your response as a JSON object with ALL two of these exact keys:
@@ -34,26 +35,31 @@ Topic: What revenue grew more last year apple stock or the number of people buyi
Context: {research_topic}""" Context: {research_topic}"""
web_searcher_instructions = """Conduct targeted Google Searches to gather the most recent, credible information on "{research_topic}" and synthesize it into a verifiable text artifact. web_searcher_instructions = """Review the provided Tavily search results for "{research_topic}" and synthesize them into a verifiable research note.
Instructions: Instructions:
- Query should ensure that the most current information is gathered. The current date is {current_date}. - Query should ensure that the most current information is gathered. The current date is {current_date}.
- Conduct multiple, diverse searches to gather comprehensive information. - Use only the provided Tavily search results. Do not invent or infer facts that are not supported by those results.
- Consolidate key findings while meticulously tracking the source(s) for each specific piece of information. - Consolidate the key findings into a concise research note for this query.
- The output should be a well-written summary or report based on your search findings. - Every factual paragraph or bullet must include at least one markdown citation using the exact title and URL from the provided sources, for example: [Reuters](https://www.reuters.com/example).
- Only include the information found in the search results, don't make up any information. - Preserve full URLs in citations. Do not use placeholders, IDs, or shortened URLs.
- If the results are insufficient, explicitly say what is missing and cite the closest available sources.
Research Topic: Research Topic:
{research_topic} {research_topic}
Search Results:
{search_results}
""" """
reflection_instructions = """You are an expert research assistant analyzing summaries about "{research_topic}". reflection_instructions = """You are an expert research assistant analyzing collected research evidence about "{research_topic}".
Instructions: Instructions:
- Identify knowledge gaps or areas that need deeper exploration and generate a follow-up query. (1 or multiple). - Identify knowledge gaps or areas that need deeper exploration and generate a follow-up query. (1 or multiple).
- If provided summaries are sufficient to answer the user's question, don't generate a follow-up query. - If the provided evidence is sufficient to answer the user's question, don't generate a follow-up query.
- If there is a knowledge gap, generate a follow-up query that would help expand your understanding. - If there is a knowledge gap, generate a follow-up query that would help expand your understanding.
- Focus on technical details, implementation specifics, or emerging trends that weren't fully covered. - Focus on technical details, implementation specifics, or emerging trends that weren't fully covered.
- Keep every follow-up query under 400 characters. Prefer compact search phrases over long natural-language questions.
Requirements: Requirements:
- Ensure the follow-up query is self-contained and includes necessary context for web search. - Ensure the follow-up query is self-contained and includes necessary context for web search.
@@ -73,24 +79,26 @@ Example:
}} }}
``` ```
Reflect carefully on the Summaries to identify knowledge gaps and produce a follow-up query. Then, produce your output following this JSON format: Reflect carefully on the Research Evidence to identify knowledge gaps and produce a follow-up query. Then, produce your output following this JSON format:
Summaries: Research Evidence:
{summaries} {summaries}
""" """
answer_instructions = """Generate a high-quality answer to the user's question based on the provided summaries. answer_instructions = """Generate a high-quality answer to the user's question based on the provided research evidence.
Instructions: Instructions:
- The current date is {current_date}. - The current date is {current_date}.
- You are the final step of a multi-step research process, don't mention that you are the final step. - You are the final step of a multi-step research process, don't mention that you are the final step.
- You have access to all the information gathered from the previous steps. - You have access to all the information gathered from the previous steps.
- You have access to the user's question. - You have access to the user's question.
- Generate a high-quality answer to the user's question based on the provided summaries and the user's question. - Generate a high-quality answer to the user's question based on the provided research evidence and the user's question.
- Include the sources you used from the Summaries in the answer correctly, use markdown format (e.g. [apnews](https://vertexaisearch.cloud.google.com/id/1-0)). THIS IS A MUST. - Use only the evidence present in the Research Evidence.
- Preserve or reuse source citations from the Research Evidence as full markdown links. Every factual paragraph should include at least one citation.
- Do not invent sources, placeholders, or shortened URLs.
User Context: User Context:
- {research_topic} - {research_topic}
Summaries: Research Evidence:
{summaries}""" {summaries}"""

View File

@@ -29,13 +29,8 @@ class ReflectionState(TypedDict):
number_of_ran_queries: int number_of_ran_queries: int
class Query(TypedDict):
query: str
rationale: str
class QueryGenerationState(TypedDict): class QueryGenerationState(TypedDict):
search_query: list[Query] search_query: list[str]
class WebSearchState(TypedDict): class WebSearchState(TypedDict):

View File

@@ -1,166 +1,106 @@
from typing import Any, Dict, List from __future__ import annotations
from langchain_core.messages import AnyMessage, AIMessage, HumanMessage
import re
from typing import Any, List
from langchain_core.messages import AIMessage, AnyMessage, HumanMessage
DEFAULT_REASONING_MODEL = "openai/gpt-5.4"
SUPPORTED_MODEL_PREFIXES = ("gpt-", "deepseek-", "glm-")
MAX_TAVILY_QUERY_LENGTH = 380
def get_research_topic(messages: List[AnyMessage]) -> str: def get_research_topic(messages: List[AnyMessage]) -> str:
""" """Get the research topic from the messages."""
Get the research topic from the messages.
"""
# check if request has a history and combine the messages into a single string
if len(messages) == 1: if len(messages) == 1:
research_topic = messages[-1].content return str(messages[-1].content)
else:
research_topic = "" research_topic = ""
for message in messages: for message in messages:
if isinstance(message, HumanMessage): if isinstance(message, HumanMessage):
research_topic += f"User: {message.content}\n" research_topic += f"User: {message.content}\n"
elif isinstance(message, AIMessage): elif isinstance(message, AIMessage):
research_topic += f"Assistant: {message.content}\n" research_topic += f"Assistant: {message.content}\n"
return research_topic return research_topic
def resolve_urls(urls_to_resolve: List[Any], id: int) -> Dict[str, str]: def normalize_model_name(
""" model_name: str | None,
Create a map of the vertex ai search urls (very long) to a short url with a unique id for each url. fallback: str = DEFAULT_REASONING_MODEL,
Ensures each original URL gets a consistent shortened form while maintaining uniqueness. ) -> str:
""" """Normalize stale or unsupported model names to a supported default."""
prefix = f"https://vertexaisearch.cloud.google.com/id/" candidate = (model_name or "").strip()
urls = [site.web.uri for site in urls_to_resolve] if candidate.startswith(SUPPORTED_MODEL_PREFIXES):
return candidate
# Create a dictionary that maps each unique URL to its first occurrence index provider, _, model = candidate.partition("/")
resolved_map = {} if provider and model.startswith(SUPPORTED_MODEL_PREFIXES):
for idx, url in enumerate(urls): return candidate
if url not in resolved_map:
resolved_map[url] = f"{prefix}{id}-{idx}"
return resolved_map return fallback
def insert_citation_markers(text, citations_list): def _normalize_source_title(source: dict[str, Any], index: int) -> str:
""" title = (source.get("title") or "").strip()
Inserts citation markers into a text string based on start and end indices. return title or f"Source {index}"
Args:
text (str): The original text string.
citations_list (list): A list of dictionaries, where each dictionary
contains 'start_index', 'end_index', and
'segment_string' (the marker to insert).
Indices are assumed to be for the original text.
Returns: def normalize_tavily_sources(results: list[dict[str, Any]]) -> list[dict[str, str]]:
str: The text with citation markers inserted. """Normalize Tavily results into a stable source shape."""
""" normalized_sources: list[dict[str, str]] = []
# Sort citations by end_index in descending order. seen_urls: set[str] = set()
# If end_index is the same, secondary sort by start_index descending.
# This ensures that insertions at the end of the string don't affect
# the indices of earlier parts of the string that still need to be processed.
sorted_citations = sorted(
citations_list, key=lambda c: (c["end_index"], c["start_index"]), reverse=True
)
modified_text = text for index, result in enumerate(results, start=1):
for citation_info in sorted_citations: url = (result.get("url") or "").strip()
# These indices refer to positions in the *original* text, if not url or url in seen_urls:
# but since we iterate from the end, they remain valid for insertion continue
# relative to the parts of the string already processed.
end_idx = citation_info["end_index"] normalized_sources.append(
marker_to_insert = "" {
for segment in citation_info["segments"]: "label": f"S{len(normalized_sources) + 1}",
marker_to_insert += f" [{segment['label']}]({segment['short_url']})" "title": _normalize_source_title(result, index),
# Insert the citation marker at the original end_idx position "value": url,
modified_text = ( "content": (result.get("content") or "").strip(),
modified_text[:end_idx] + marker_to_insert + modified_text[end_idx:] }
)
seen_urls.add(url)
return normalized_sources
def format_sources_for_prompt(sources: list[dict[str, str]]) -> str:
"""Format normalized search results for prompt injection."""
if not sources:
return "No search results were returned."
formatted_sources = []
for source in sources:
snippet = source.get("content") or "No snippet available."
formatted_sources.append(
"\n".join(
[
f"{source['label']}: {source['title']}",
f"URL: {source['value']}",
f"Snippet: {snippet}",
]
)
) )
return modified_text return "\n\n".join(formatted_sources)
def get_citations(response, resolved_urls_map): def shorten_search_query(
""" query: str,
Extracts and formats citation information from a Gemini model's response. max_length: int = MAX_TAVILY_QUERY_LENGTH,
) -> str:
"""Normalize and trim search queries to fit Tavily's length limit."""
normalized_query = re.sub(r"\s+", " ", query).strip()
if len(normalized_query) <= max_length:
return normalized_query
This function processes the grounding metadata provided in the response to truncated_query = normalized_query[:max_length]
construct a list of citation objects. Each citation object includes the last_space = truncated_query.rfind(" ")
start and end indices of the text segment it refers to, and a string if last_space > max_length // 2:
containing formatted markdown links to the supporting web chunks. truncated_query = truncated_query[:last_space]
Args: return truncated_query.rstrip(" ,;:-")
response: The response object from the Gemini model, expected to have
a structure including `candidates[0].grounding_metadata`.
It also relies on a `resolved_map` being available in its
scope to map chunk URIs to resolved URLs.
Returns:
list: A list of dictionaries, where each dictionary represents a citation
and has the following keys:
- "start_index" (int): The starting character index of the cited
segment in the original text. Defaults to 0
if not specified.
- "end_index" (int): The character index immediately after the
end of the cited segment (exclusive).
- "segments" (list[str]): A list of individual markdown-formatted
links for each grounding chunk.
- "segment_string" (str): A concatenated string of all markdown-
formatted links for the citation.
Returns an empty list if no valid candidates or grounding supports
are found, or if essential data is missing.
"""
citations = []
# Ensure response and necessary nested structures are present
if not response or not response.candidates:
return citations
candidate = response.candidates[0]
if (
not hasattr(candidate, "grounding_metadata")
or not candidate.grounding_metadata
or not hasattr(candidate.grounding_metadata, "grounding_supports")
):
return citations
for support in candidate.grounding_metadata.grounding_supports:
citation = {}
# Ensure segment information is present
if not hasattr(support, "segment") or support.segment is None:
continue # Skip this support if segment info is missing
start_index = (
support.segment.start_index
if support.segment.start_index is not None
else 0
)
# Ensure end_index is present to form a valid segment
if support.segment.end_index is None:
continue # Skip if end_index is missing, as it's crucial
# Add 1 to end_index to make it an exclusive end for slicing/range purposes
# (assuming the API provides an inclusive end_index)
citation["start_index"] = start_index
citation["end_index"] = support.segment.end_index
citation["segments"] = []
if (
hasattr(support, "grounding_chunk_indices")
and support.grounding_chunk_indices
):
for ind in support.grounding_chunk_indices:
try:
chunk = candidate.grounding_metadata.grounding_chunks[ind]
resolved_url = resolved_urls_map.get(chunk.web.uri, None)
citation["segments"].append(
{
"label": chunk.web.title.split(".")[:-1][0],
"short_url": resolved_url,
"value": chunk.web.uri,
}
)
except (IndexError, AttributeError, NameError):
# Handle cases where chunk, web, uri, or resolved_map might be problematic
# For simplicity, we'll just skip adding this particular segment link
# In a production system, you might want to log this.
pass
citations.append(citation)
return citations

View File

@@ -38,7 +38,9 @@ services:
langgraph-postgres: langgraph-postgres:
condition: service_healthy condition: service_healthy
environment: environment:
GEMINI_API_KEY: ${GEMINI_API_KEY} OPENAI_API_KEY: ${OPENAI_API_KEY}
OPENAI_BASE_URL: ${OPENAI_BASE_URL}
TAVILY_API_KEY: ${TAVILY_API_KEY}
LANGSMITH_API_KEY: ${LANGSMITH_API_KEY} LANGSMITH_API_KEY: ${LANGSMITH_API_KEY}
REDIS_URI: redis://langgraph-redis:6379 REDIS_URI: redis://langgraph-redis:6379
POSTGRES_URI: postgres://postgres:postgres@langgraph-postgres:5432/postgres?sslmode=disable POSTGRES_URI: postgres://postgres:postgres@langgraph-postgres:5432/postgres?sslmode=disable

View File

@@ -6,10 +6,27 @@ import { WelcomeScreen } from "@/components/WelcomeScreen";
import { ChatMessagesView } from "@/components/ChatMessagesView"; import { ChatMessagesView } from "@/components/ChatMessagesView";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
const getSourceDisplayName = (source: any): string | null => {
if (source?.title) {
return source.title;
}
if (source?.value) {
try {
return new URL(source.value).hostname.replace(/^www\./, "");
} catch {
return null;
}
}
return source?.label || null;
};
export default function App() { export default function App() {
const [processedEventsTimeline, setProcessedEventsTimeline] = useState< const [processedEventsTimeline, setProcessedEventsTimeline] = useState<
ProcessedEvent[] ProcessedEvent[]
>([]); >([]);
const [selectedModel, setSelectedModel] = useState("openai/gpt-5.4");
const [historicalActivities, setHistoricalActivities] = useState< const [historicalActivities, setHistoricalActivities] = useState<
Record<string, ProcessedEvent[]> Record<string, ProcessedEvent[]>
>({}); >({});
@@ -38,7 +55,7 @@ export default function App() {
const sources = event.web_research.sources_gathered || []; const sources = event.web_research.sources_gathered || [];
const numSources = sources.length; const numSources = sources.length;
const uniqueLabels = [ const uniqueLabels = [
...new Set(sources.map((s: any) => s.label).filter(Boolean)), ...new Set(sources.map(getSourceDisplayName).filter(Boolean)),
]; ];
const exampleLabels = uniqueLabels.slice(0, 3).join(", "); const exampleLabels = uniqueLabels.slice(0, 3).join(", ");
processedEvent = { processedEvent = {
@@ -157,6 +174,8 @@ export default function App() {
handleSubmit={handleSubmit} handleSubmit={handleSubmit}
isLoading={thread.isLoading} isLoading={thread.isLoading}
onCancel={handleCancel} onCancel={handleCancel}
model={selectedModel}
onModelChange={setSelectedModel}
/> />
) : error ? ( ) : error ? (
<div className="flex flex-col items-center justify-center h-full"> <div className="flex flex-col items-center justify-center h-full">
@@ -181,6 +200,8 @@ export default function App() {
onCancel={handleCancel} onCancel={handleCancel}
liveActivityEvents={processedEventsTimeline} liveActivityEvents={processedEventsTimeline}
historicalActivities={historicalActivities} historicalActivities={historicalActivities}
model={selectedModel}
onModelChange={setSelectedModel}
/> />
)} )}
</main> </main>

View File

@@ -230,6 +230,8 @@ interface ChatMessagesViewProps {
onCancel: () => void; onCancel: () => void;
liveActivityEvents: ProcessedEvent[]; liveActivityEvents: ProcessedEvent[];
historicalActivities: Record<string, ProcessedEvent[]>; historicalActivities: Record<string, ProcessedEvent[]>;
model: string;
onModelChange: (model: string) => void;
} }
export function ChatMessagesView({ export function ChatMessagesView({
@@ -240,6 +242,8 @@ export function ChatMessagesView({
onCancel, onCancel,
liveActivityEvents, liveActivityEvents,
historicalActivities, historicalActivities,
model,
onModelChange,
}: ChatMessagesViewProps) { }: ChatMessagesViewProps) {
const [copiedMessageId, setCopiedMessageId] = useState<string | null>(null); const [copiedMessageId, setCopiedMessageId] = useState<string | null>(null);
@@ -316,6 +320,8 @@ export function ChatMessagesView({
isLoading={isLoading} isLoading={isLoading}
onCancel={onCancel} onCancel={onCancel}
hasHistory={messages.length > 0} hasHistory={messages.length > 0}
model={model}
onModelChange={onModelChange}
/> />
</div> </div>
); );

View File

@@ -16,6 +16,8 @@ interface InputFormProps {
onCancel: () => void; onCancel: () => void;
isLoading: boolean; isLoading: boolean;
hasHistory: boolean; hasHistory: boolean;
model: string;
onModelChange: (model: string) => void;
} }
export const InputForm: React.FC<InputFormProps> = ({ export const InputForm: React.FC<InputFormProps> = ({
@@ -23,10 +25,11 @@ export const InputForm: React.FC<InputFormProps> = ({
onCancel, onCancel,
isLoading, isLoading,
hasHistory, hasHistory,
model,
onModelChange,
}) => { }) => {
const [internalInputValue, setInternalInputValue] = useState(""); const [internalInputValue, setInternalInputValue] = useState("");
const [effort, setEffort] = useState("medium"); const [effort, setEffort] = useState("medium");
const [model, setModel] = useState("gemini-2.5-flash-preview-04-17");
const handleInternalSubmit = (e?: React.FormEvent) => { const handleInternalSubmit = (e?: React.FormEvent) => {
if (e) e.preventDefault(); if (e) e.preventDefault();
@@ -130,33 +133,41 @@ export const InputForm: React.FC<InputFormProps> = ({
<Cpu className="h-4 w-4 mr-2" /> <Cpu className="h-4 w-4 mr-2" />
Model Model
</div> </div>
<Select value={model} onValueChange={setModel}> <Select value={model} onValueChange={onModelChange}>
<SelectTrigger className="w-[150px] bg-transparent border-none cursor-pointer"> <SelectTrigger className="w-[150px] bg-transparent border-none cursor-pointer">
<SelectValue placeholder="Model" /> <SelectValue placeholder="Model" />
</SelectTrigger> </SelectTrigger>
<SelectContent className="bg-neutral-700 border-neutral-600 text-neutral-300 cursor-pointer"> <SelectContent className="bg-neutral-700 border-neutral-600 text-neutral-300 cursor-pointer">
<SelectItem <SelectItem
value="gemini-2.0-flash" value="openai/gpt-5.4"
className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer" className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
> >
<div className="flex items-center"> <div className="flex items-center">
<Zap className="h-4 w-4 mr-2 text-yellow-400" /> 2.0 Flash <Zap className="h-4 w-4 mr-2 text-emerald-400" /> openai/gpt-5.4
</div> </div>
</SelectItem> </SelectItem>
<SelectItem <SelectItem
value="gemini-2.5-flash-preview-04-17" value="deepseek-v3"
className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer" className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
> >
<div className="flex items-center"> <div className="flex items-center">
<Zap className="h-4 w-4 mr-2 text-orange-400" /> 2.5 Flash <Cpu className="h-4 w-4 mr-2 text-cyan-400" /> deepseek-v3
</div> </div>
</SelectItem> </SelectItem>
<SelectItem <SelectItem
value="gemini-2.5-pro-preview-05-06" value="glm-4.5-air"
className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer" className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
> >
<div className="flex items-center"> <div className="flex items-center">
<Cpu className="h-4 w-4 mr-2 text-purple-400" /> 2.5 Pro <Cpu className="h-4 w-4 mr-2 text-sky-400" /> glm-4.5-air
</div>
</SelectItem>
<SelectItem
value="glm-4.7"
className="hover:bg-neutral-600 focus:bg-neutral-600 cursor-pointer"
>
<div className="flex items-center">
<Cpu className="h-4 w-4 mr-2 text-violet-400" /> glm-4.7
</div> </div>
</SelectItem> </SelectItem>
</SelectContent> </SelectContent>

View File

@@ -8,12 +8,16 @@ interface WelcomeScreenProps {
) => void; ) => void;
onCancel: () => void; onCancel: () => void;
isLoading: boolean; isLoading: boolean;
model: string;
onModelChange: (model: string) => void;
} }
export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({
handleSubmit, handleSubmit,
onCancel, onCancel,
isLoading, isLoading,
model,
onModelChange,
}) => ( }) => (
<div className="h-full flex flex-col items-center justify-center text-center px-4 flex-1 w-full max-w-3xl mx-auto gap-4"> <div className="h-full flex flex-col items-center justify-center text-center px-4 flex-1 w-full max-w-3xl mx-auto gap-4">
<div> <div>
@@ -30,6 +34,8 @@ export const WelcomeScreen: React.FC<WelcomeScreenProps> = ({
isLoading={isLoading} isLoading={isLoading}
onCancel={onCancel} onCancel={onCancel}
hasHistory={false} hasHistory={false}
model={model}
onModelChange={onModelChange}
/> />
</div> </div>
<p className="text-xs text-neutral-500"> <p className="text-xs text-neutral-500">