Add one-skill
This commit is contained in:
@@ -0,0 +1,147 @@
|
||||
-- =====================================================================
|
||||
-- @Name: DORIS-D-SQL-{表名}-INSERT
|
||||
-- @Version: 1.0
|
||||
-- @Desc: Apache Doris 数据插入模板
|
||||
-- @TargetDatabase: Apache Doris
|
||||
-- =====================================================================
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景1:INSERT INTO(追加写入)
|
||||
-- ============================================================================
|
||||
-- 适用:向 Doris 表追加数据,不会删除已有数据
|
||||
|
||||
INSERT INTO db_name.target_table
|
||||
SELECT
|
||||
stat_date,
|
||||
department,
|
||||
region,
|
||||
order_count,
|
||||
total_amount
|
||||
FROM db_name.source_table
|
||||
WHERE stat_date = '${day_id}';
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景2:INSERT OVERWRITE(覆盖写入)
|
||||
-- ============================================================================
|
||||
-- 适用:覆盖目标表(或指定分区)的全部数据
|
||||
-- 注意:Doris 2.0+ 支持,且仅适用于 Partition 表
|
||||
|
||||
-- 覆盖整表
|
||||
INSERT OVERWRITE db_name.target_table
|
||||
SELECT
|
||||
stat_date,
|
||||
department,
|
||||
region,
|
||||
order_count,
|
||||
total_amount
|
||||
FROM db_name.source_table;
|
||||
|
||||
-- 覆盖指定分区(推荐)
|
||||
INSERT OVERWRITE db_name.target_table
|
||||
PARTITION(p202605)
|
||||
SELECT
|
||||
department,
|
||||
region,
|
||||
order_count,
|
||||
total_amount
|
||||
FROM db_name.source_table
|
||||
WHERE stat_date >= '2026-05-01'
|
||||
AND stat_date < '2026-06-01';
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景3:从查询结果写入(ETL 场景)
|
||||
-- ============================================================================
|
||||
|
||||
-- 简单转换后写入
|
||||
INSERT INTO db_name.target_table
|
||||
SELECT
|
||||
order_date,
|
||||
department,
|
||||
COUNT(*) AS order_count,
|
||||
COUNT(DISTINCT user_id) AS unique_users,
|
||||
SUM(total_amount) AS total_amount,
|
||||
AVG(total_amount) AS avg_amount
|
||||
FROM db_name.source_orders o
|
||||
LEFT JOIN db_name.dim_department d ON o.dept_id = d.dept_id
|
||||
WHERE o.order_date = '${day_id}'
|
||||
GROUP BY order_date, department;
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景4:批量 VALUES 写入
|
||||
-- ============================================================================
|
||||
|
||||
INSERT INTO db_name.target_table (stat_date, department, amount)
|
||||
VALUES
|
||||
('2026-05-01', '市场部', 10000.00),
|
||||
('2026-05-01', '技术部', 25000.00),
|
||||
('2026-05-01', '运营部', 18000.00);
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景5:Stream Load(数据导入)
|
||||
-- ============================================================================
|
||||
-- 适用:大批量数据导入(百万级以上)
|
||||
-- 注意:Stream Load 通过 HTTP 协议提交,不是 SQL 语法
|
||||
|
||||
/*
|
||||
-- curl 命令示例
|
||||
curl --location-trusted -u user:password \
|
||||
-H "label:load_order_20260501" \
|
||||
-H "column_separator:," \
|
||||
-H "columns:order_id,order_date,user_id,total_amount" \
|
||||
-T data.csv \
|
||||
http://fe_host:8030/api/db_name/orders/_stream_load
|
||||
*/
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景6:Broker Load(外部数据源导入)
|
||||
-- ============================================================================
|
||||
|
||||
/*
|
||||
LOAD LABEL db_name.load_label_20260501
|
||||
(
|
||||
DATA INFILE('hdfs://namenode:8020/path/to/data/*')
|
||||
INTO TABLE target_table
|
||||
COLUMNS TERMINATED BY ','
|
||||
(stat_date, department, region, amount)
|
||||
SET (amount = amount * 1.0)
|
||||
)
|
||||
WITH BROKER 'broker_name'
|
||||
(
|
||||
'username' = 'hdfs_user',
|
||||
'password' = 'hdfs_password'
|
||||
)
|
||||
PROPERTIES
|
||||
(
|
||||
'timeout' = '3600',
|
||||
'max_filter_ratio' = '0.01'
|
||||
);
|
||||
*/
|
||||
|
||||
-- ============================================================================
|
||||
-- 关键规则说明
|
||||
-- ============================================================================
|
||||
/*
|
||||
1. INSERT INTO vs INSERT OVERWRITE
|
||||
- INSERT INTO:追加数据,不删除已有数据
|
||||
- INSERT OVERWRITE:覆盖数据(Doris 2.0+ 支持)
|
||||
- 日常增量推荐 INSERT INTO,全量刷新推荐 INSERT OVERWRITE
|
||||
|
||||
2. Doris 不使用临时表链式处理
|
||||
- 与 Spark 不同,Doris 通常用单条 SQL 或 CTE 完成 ETL
|
||||
- 直接 INSERT INTO ... SELECT ... 即可
|
||||
|
||||
3. 字段顺序
|
||||
- SELECT 字段顺序必须与目标表列定义顺序一致
|
||||
- 或显式指定列名:INSERT INTO table (col1, col2) SELECT ...
|
||||
|
||||
4. 数据导入方式选择
|
||||
- 少量数据:INSERT INTO ... SELECT ... 或 INSERT INTO ... VALUES ...
|
||||
- 大批量导入:Stream Load(HTTP PUT,最高性能)
|
||||
- HDFS 导入:Broker Load
|
||||
- 外部数据源:Routine Load(Kafka 等)
|
||||
|
||||
5. 性能建议
|
||||
- 批量写入优于逐条写入
|
||||
- Stream Load 是最高性能的导入方式
|
||||
- 建议攒批后一次性写入,避免频繁小批量导入
|
||||
*/
|
||||
Reference in New Issue
Block a user