Files
smart-data-dev-skill/one-skill/smart-data-developer/references/sql/templates/doris/insert-template.sql
2026-05-13 11:03:00 +08:00

148 lines
4.8 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- =====================================================================
-- @Name: DORIS-D-SQL-{表名}-INSERT
-- @Version: 1.0
-- @Desc: Apache Doris 数据插入模板
-- @TargetDatabase: Apache Doris
-- =====================================================================
-- ============================================================================
-- 场景1INSERT INTO追加写入
-- ============================================================================
-- 适用:向 Doris 表追加数据,不会删除已有数据
INSERT INTO db_name.target_table
SELECT
stat_date,
department,
region,
order_count,
total_amount
FROM db_name.source_table
WHERE stat_date = '${day_id}';
-- ============================================================================
-- 场景2INSERT OVERWRITE覆盖写入
-- ============================================================================
-- 适用:覆盖目标表(或指定分区)的全部数据
-- 注意Doris 2.0+ 支持,且仅适用于 Partition 表
-- 覆盖整表
INSERT OVERWRITE db_name.target_table
SELECT
stat_date,
department,
region,
order_count,
total_amount
FROM db_name.source_table;
-- 覆盖指定分区(推荐)
INSERT OVERWRITE db_name.target_table
PARTITION(p202605)
SELECT
department,
region,
order_count,
total_amount
FROM db_name.source_table
WHERE stat_date >= '2026-05-01'
AND stat_date < '2026-06-01';
-- ============================================================================
-- 场景3从查询结果写入ETL 场景)
-- ============================================================================
-- 简单转换后写入
INSERT INTO db_name.target_table
SELECT
order_date,
department,
COUNT(*) AS order_count,
COUNT(DISTINCT user_id) AS unique_users,
SUM(total_amount) AS total_amount,
AVG(total_amount) AS avg_amount
FROM db_name.source_orders o
LEFT JOIN db_name.dim_department d ON o.dept_id = d.dept_id
WHERE o.order_date = '${day_id}'
GROUP BY order_date, department;
-- ============================================================================
-- 场景4批量 VALUES 写入
-- ============================================================================
INSERT INTO db_name.target_table (stat_date, department, amount)
VALUES
('2026-05-01', '市场部', 10000.00),
('2026-05-01', '技术部', 25000.00),
('2026-05-01', '运营部', 18000.00);
-- ============================================================================
-- 场景5Stream Load数据导入
-- ============================================================================
-- 适用:大批量数据导入(百万级以上)
-- 注意Stream Load 通过 HTTP 协议提交,不是 SQL 语法
/*
-- curl 命令示例
curl --location-trusted -u user:password \
-H "label:load_order_20260501" \
-H "column_separator:," \
-H "columns:order_id,order_date,user_id,total_amount" \
-T data.csv \
http://fe_host:8030/api/db_name/orders/_stream_load
*/
-- ============================================================================
-- 场景6Broker Load外部数据源导入
-- ============================================================================
/*
LOAD LABEL db_name.load_label_20260501
(
DATA INFILE('hdfs://namenode:8020/path/to/data/*')
INTO TABLE target_table
COLUMNS TERMINATED BY ','
(stat_date, department, region, amount)
SET (amount = amount * 1.0)
)
WITH BROKER 'broker_name'
(
'username' = 'hdfs_user',
'password' = 'hdfs_password'
)
PROPERTIES
(
'timeout' = '3600',
'max_filter_ratio' = '0.01'
);
*/
-- ============================================================================
-- 关键规则说明
-- ============================================================================
/*
1. INSERT INTO vs INSERT OVERWRITE
- INSERT INTO追加数据不删除已有数据
- INSERT OVERWRITE覆盖数据Doris 2.0+ 支持)
- 日常增量推荐 INSERT INTO全量刷新推荐 INSERT OVERWRITE
2. Doris 不使用临时表链式处理
- 与 Spark 不同Doris 通常用单条 SQL 或 CTE 完成 ETL
- 直接 INSERT INTO ... SELECT ... 即可
3. 字段顺序
- SELECT 字段顺序必须与目标表列定义顺序一致
- 或显式指定列名INSERT INTO table (col1, col2) SELECT ...
4. 数据导入方式选择
- 少量数据INSERT INTO ... SELECT ... 或 INSERT INTO ... VALUES ...
- 大批量导入Stream LoadHTTP PUT最高性能
- HDFS 导入Broker Load
- 外部数据源Routine LoadKafka 等)
5. 性能建议
- 批量写入优于逐条写入
- Stream Load 是最高性能的导入方式
- 建议攒批后一次性写入,避免频繁小批量导入
*/