Add sub skills
This commit is contained in:
124
data-structure-fetcher/scripts/embedding_api_call.py
Normal file
124
data-structure-fetcher/scripts/embedding_api_call.py
Normal file
@@ -0,0 +1,124 @@
|
||||
import sys
|
||||
import requests
|
||||
import json
|
||||
import os
|
||||
|
||||
# =========================
|
||||
# 1. 配置
|
||||
# =========================
|
||||
# 这里填写你刚才启动的 Flask 服务地址
|
||||
# 如果是本机运行,通常是 http://127.0.0.1:5001
|
||||
# 如果是服务器远程访问,请替换为服务器的 IP 地址
|
||||
API_URL = "http://127.0.0.1:5001/search"
|
||||
|
||||
def query_table_metadata(query_text, top_k=10):
|
||||
"""
|
||||
调用 Flask 接口并返回 Markdown 格式的字符串
|
||||
|
||||
Args:
|
||||
query_text (str or list): 查询内容,可以是字符串或字符串列表
|
||||
top_k (int): 返回结果数量
|
||||
|
||||
Returns:
|
||||
str: 格式化的 Markdown 字符串
|
||||
"""
|
||||
|
||||
# 1. 准备请求数据
|
||||
payload = {
|
||||
"q": query_text,
|
||||
"top_k": top_k
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
print(f"🔍 正在查询: '{query_text}' ...")
|
||||
|
||||
try:
|
||||
# 2. 发送 POST 请求
|
||||
# 设置 timeout 防止接口卡死
|
||||
response = requests.post(API_URL, json=payload, headers=headers, timeout=10)
|
||||
|
||||
# 检查 HTTP 状态码
|
||||
if response.status_code != 200:
|
||||
return f"❌ 请求失败,状态码: {response.status_code}, 错误信息: {response.text}"
|
||||
|
||||
# 3. 解析 JSON 响应
|
||||
res_json = response.json()
|
||||
|
||||
if res_json.get("status") != "success":
|
||||
return f"❌ 接口返回错误: {res_json.get('error')}"
|
||||
|
||||
data_list = res_json.get("data", [])
|
||||
|
||||
# 4. 生成 Markdown 内容
|
||||
md_output = generate_markdown(data_list)
|
||||
|
||||
return md_output
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
return "❌ 连接错误:无法连接到 Flask 服务,请检查 IP 地址和端口,或确认服务是否启动。"
|
||||
except requests.exceptions.Timeout:
|
||||
return "❌ 请求超时:服务器响应时间过长。"
|
||||
except Exception as e:
|
||||
return f"❌ 发生未知错误: {str(e)}"
|
||||
|
||||
def generate_markdown(data_list):
|
||||
"""
|
||||
将接口返回的数据列表转换为 Markdown 表格
|
||||
"""
|
||||
if not data_list:
|
||||
return "⚠️ 未查询到任何数据。"
|
||||
|
||||
md_lines = []
|
||||
|
||||
# 遍历每一个查询结果(支持批量查询)
|
||||
for item in data_list:
|
||||
query = item.get("query")
|
||||
results = item.get("results", [])
|
||||
|
||||
# 添加查询标题
|
||||
md_lines.append(f"### 🔎 查询结果:{query}")
|
||||
md_lines.append(f"共找到 {len(results)} 条相关表信息:\n")
|
||||
|
||||
# 添加表头
|
||||
md_lines.append("| ID | 相似度 (Distance) | 元数据 (Metadata) | 文档内容 (Document) |")
|
||||
md_lines.append("| :--- | :---: | :--- | :--- |")
|
||||
|
||||
# 添加每一行数据
|
||||
for res in results:
|
||||
doc_id = res.get("id", "N/A")
|
||||
distance = res.get("distance")
|
||||
# 格式化距离,保留4位小数
|
||||
dist_str = f"{distance:.4f}" if distance is not None else "N/A"
|
||||
|
||||
metadata = res.get("metadata", {})
|
||||
# 将元数据字典转为字符串,去除单引号以便Markdown显示更整洁
|
||||
meta_str = json.dumps(metadata, ensure_ascii=False)
|
||||
|
||||
document = res.get("document", "")
|
||||
# 简单的清洗,防止文档内容里的换行符破坏表格结构
|
||||
document = document.replace("\n", " ").replace("|", "/")
|
||||
|
||||
md_lines.append(f"| {doc_id} | {dist_str} | {meta_str} | {document} |")
|
||||
|
||||
md_lines.append("\n---\n") # 分割线
|
||||
|
||||
return "\n".join(md_lines)
|
||||
|
||||
# =========================
|
||||
# 5. 主程序入口 (测试用)
|
||||
# =========================
|
||||
if __name__ == "__main__":
|
||||
# 获取命令行参数,例如: python client.py "查询内容" 10
|
||||
if len(sys.argv) > 1:
|
||||
query_input = sys.argv[1]
|
||||
k_input = int(sys.argv[2]) if len(sys.argv) > 2 else 5
|
||||
else:
|
||||
# 默认值
|
||||
query_input = "按日分投诉"
|
||||
k_input = 10
|
||||
|
||||
result_md = query_table_metadata(query_input, top_k=k_input)
|
||||
print(result_md)
|
||||
121
data-structure-fetcher/scripts/wenben_api_call.py
Normal file
121
data-structure-fetcher/scripts/wenben_api_call.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import sys
|
||||
import requests
|
||||
import json
|
||||
import os
|
||||
|
||||
# =========================
|
||||
# 1. 配置
|
||||
# =========================
|
||||
# 这里填写你刚才启动的 Flask 服务地址
|
||||
# 如果是本机运行,通常是 http://127.0.0.1:5001
|
||||
# 如果是服务器远程访问,请替换为服务器的 IP 地址
|
||||
API_URL = "http://127.0.0.1:5001/like"
|
||||
|
||||
def query_table_metadata(query_text, top_k=5):
|
||||
"""
|
||||
调用 Flask 接口并返回 Markdown 格式的字符串
|
||||
|
||||
Args:
|
||||
query_text (str or list): 查询内容,可以是字符串或字符串列表
|
||||
top_k (int): 返回结果数量
|
||||
|
||||
Returns:
|
||||
str: 格式化的 Markdown 字符串
|
||||
"""
|
||||
|
||||
# 1. 准备请求数据
|
||||
payload = {
|
||||
"q": query_text,
|
||||
"top_k": top_k
|
||||
}
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
print(f"🔍 正在查询: '{query_text}' ...")
|
||||
|
||||
try:
|
||||
# 2. 发送 POST 请求
|
||||
# 设置 timeout 防止接口卡死
|
||||
response = requests.post(API_URL, json=payload, headers=headers, timeout=10)
|
||||
|
||||
# 检查 HTTP 状态码
|
||||
if response.status_code != 200:
|
||||
return f"❌ 请求失败,状态码: {response.status_code}, 错误信息: {response.text}"
|
||||
|
||||
# 3. 解析 JSON 响应
|
||||
res_json = response.json()
|
||||
|
||||
if res_json.get("status") != "success":
|
||||
return f"❌ 接口返回错误: {res_json.get('error')}"
|
||||
|
||||
data_list = res_json.get("data", [])
|
||||
|
||||
# 4. 生成 Markdown 内容
|
||||
md_output = generate_markdown(data_list)
|
||||
|
||||
return md_output
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
return "❌ 连接错误:无法连接到 Flask 服务,请检查 IP 地址和端口,或确认服务是否启动。"
|
||||
except requests.exceptions.Timeout:
|
||||
return "❌ 请求超时:服务器响应时间过长。"
|
||||
except Exception as e:
|
||||
return f"❌ 发生未知错误: {str(e)}"
|
||||
|
||||
def generate_markdown(data_list):
|
||||
"""
|
||||
将接口返回的数据列表转换为 Markdown 表格
|
||||
"""
|
||||
if not data_list:
|
||||
return "⚠️ 未查询到任何数据。"
|
||||
|
||||
md_lines = []
|
||||
|
||||
# 遍历每一个查询结果(支持批量查询)
|
||||
for item in data_list:
|
||||
query = item.get("query")
|
||||
results = item.get("results", [])
|
||||
|
||||
# 添加查询标题
|
||||
md_lines.append(f"### 🔎 查询结果:{query}")
|
||||
md_lines.append(f"共找到 {len(results)} 条相关表信息:\n")
|
||||
|
||||
# 添加表头
|
||||
md_lines.append("| ID | 元数据 (Metadata) | 文档内容 (Document) |")
|
||||
md_lines.append("| :--- | :--- | :--- |")
|
||||
|
||||
# 添加每一行数据
|
||||
for res in results:
|
||||
doc_id = res.get("\ufeffid", "N/A")
|
||||
|
||||
metadata = res.get("metadata", {})
|
||||
# 将元数据字典转为字符串,去除单引号以便Markdown显示更整洁
|
||||
meta_str = json.dumps(metadata, ensure_ascii=False)
|
||||
|
||||
document = res.get("document", "")
|
||||
# 简单的清洗,防止文档内容里的换行符破坏表格结构
|
||||
document = document.replace("\n", " ").replace("|", "/")
|
||||
|
||||
md_lines.append(f"| {doc_id} | {meta_str} | {document} |")
|
||||
|
||||
md_lines.append("\n---\n") # 分割线
|
||||
|
||||
return "\n".join(md_lines)
|
||||
|
||||
# =========================
|
||||
# 5. 主程序入口 (测试用)
|
||||
# =========================
|
||||
if __name__ == "__main__":
|
||||
# 获取命令行参数,例如: python client.py "查询内容" 10
|
||||
if len(sys.argv) > 1:
|
||||
query_input = sys.argv[1]
|
||||
k_input = int(sys.argv[2]) if len(sys.argv) > 2 else 5
|
||||
else:
|
||||
# 默认值
|
||||
query_input = "投诉"
|
||||
k_input = 5
|
||||
|
||||
result_md = query_table_metadata(query_input, top_k=k_input)
|
||||
print(result_md)
|
||||
Reference in New Issue
Block a user