148 lines
4.8 KiB
SQL
148 lines
4.8 KiB
SQL
-- =====================================================================
|
||
-- @Name: DORIS-D-SQL-{表名}-INSERT
|
||
-- @Version: 1.0
|
||
-- @Desc: Apache Doris 数据插入模板
|
||
-- @TargetDatabase: Apache Doris
|
||
-- =====================================================================
|
||
|
||
-- ============================================================================
|
||
-- 场景1:INSERT INTO(追加写入)
|
||
-- ============================================================================
|
||
-- 适用:向 Doris 表追加数据,不会删除已有数据
|
||
|
||
INSERT INTO db_name.target_table
|
||
SELECT
|
||
stat_date,
|
||
department,
|
||
region,
|
||
order_count,
|
||
total_amount
|
||
FROM db_name.source_table
|
||
WHERE stat_date = '${day_id}';
|
||
|
||
-- ============================================================================
|
||
-- 场景2:INSERT OVERWRITE(覆盖写入)
|
||
-- ============================================================================
|
||
-- 适用:覆盖目标表(或指定分区)的全部数据
|
||
-- 注意:Doris 2.0+ 支持,且仅适用于 Partition 表
|
||
|
||
-- 覆盖整表
|
||
INSERT OVERWRITE db_name.target_table
|
||
SELECT
|
||
stat_date,
|
||
department,
|
||
region,
|
||
order_count,
|
||
total_amount
|
||
FROM db_name.source_table;
|
||
|
||
-- 覆盖指定分区(推荐)
|
||
INSERT OVERWRITE db_name.target_table
|
||
PARTITION(p202605)
|
||
SELECT
|
||
department,
|
||
region,
|
||
order_count,
|
||
total_amount
|
||
FROM db_name.source_table
|
||
WHERE stat_date >= '2026-05-01'
|
||
AND stat_date < '2026-06-01';
|
||
|
||
-- ============================================================================
|
||
-- 场景3:从查询结果写入(ETL 场景)
|
||
-- ============================================================================
|
||
|
||
-- 简单转换后写入
|
||
INSERT INTO db_name.target_table
|
||
SELECT
|
||
order_date,
|
||
department,
|
||
COUNT(*) AS order_count,
|
||
COUNT(DISTINCT user_id) AS unique_users,
|
||
SUM(total_amount) AS total_amount,
|
||
AVG(total_amount) AS avg_amount
|
||
FROM db_name.source_orders o
|
||
LEFT JOIN db_name.dim_department d ON o.dept_id = d.dept_id
|
||
WHERE o.order_date = '${day_id}'
|
||
GROUP BY order_date, department;
|
||
|
||
-- ============================================================================
|
||
-- 场景4:批量 VALUES 写入
|
||
-- ============================================================================
|
||
|
||
INSERT INTO db_name.target_table (stat_date, department, amount)
|
||
VALUES
|
||
('2026-05-01', '市场部', 10000.00),
|
||
('2026-05-01', '技术部', 25000.00),
|
||
('2026-05-01', '运营部', 18000.00);
|
||
|
||
-- ============================================================================
|
||
-- 场景5:Stream Load(数据导入)
|
||
-- ============================================================================
|
||
-- 适用:大批量数据导入(百万级以上)
|
||
-- 注意:Stream Load 通过 HTTP 协议提交,不是 SQL 语法
|
||
|
||
/*
|
||
-- curl 命令示例
|
||
curl --location-trusted -u user:password \
|
||
-H "label:load_order_20260501" \
|
||
-H "column_separator:," \
|
||
-H "columns:order_id,order_date,user_id,total_amount" \
|
||
-T data.csv \
|
||
http://fe_host:8030/api/db_name/orders/_stream_load
|
||
*/
|
||
|
||
-- ============================================================================
|
||
-- 场景6:Broker Load(外部数据源导入)
|
||
-- ============================================================================
|
||
|
||
/*
|
||
LOAD LABEL db_name.load_label_20260501
|
||
(
|
||
DATA INFILE('hdfs://namenode:8020/path/to/data/*')
|
||
INTO TABLE target_table
|
||
COLUMNS TERMINATED BY ','
|
||
(stat_date, department, region, amount)
|
||
SET (amount = amount * 1.0)
|
||
)
|
||
WITH BROKER 'broker_name'
|
||
(
|
||
'username' = 'hdfs_user',
|
||
'password' = 'hdfs_password'
|
||
)
|
||
PROPERTIES
|
||
(
|
||
'timeout' = '3600',
|
||
'max_filter_ratio' = '0.01'
|
||
);
|
||
*/
|
||
|
||
-- ============================================================================
|
||
-- 关键规则说明
|
||
-- ============================================================================
|
||
/*
|
||
1. INSERT INTO vs INSERT OVERWRITE
|
||
- INSERT INTO:追加数据,不删除已有数据
|
||
- INSERT OVERWRITE:覆盖数据(Doris 2.0+ 支持)
|
||
- 日常增量推荐 INSERT INTO,全量刷新推荐 INSERT OVERWRITE
|
||
|
||
2. Doris 不使用临时表链式处理
|
||
- 与 Spark 不同,Doris 通常用单条 SQL 或 CTE 完成 ETL
|
||
- 直接 INSERT INTO ... SELECT ... 即可
|
||
|
||
3. 字段顺序
|
||
- SELECT 字段顺序必须与目标表列定义顺序一致
|
||
- 或显式指定列名:INSERT INTO table (col1, col2) SELECT ...
|
||
|
||
4. 数据导入方式选择
|
||
- 少量数据:INSERT INTO ... SELECT ... 或 INSERT INTO ... VALUES ...
|
||
- 大批量导入:Stream Load(HTTP PUT,最高性能)
|
||
- HDFS 导入:Broker Load
|
||
- 外部数据源:Routine Load(Kafka 等)
|
||
|
||
5. 性能建议
|
||
- 批量写入优于逐条写入
|
||
- Stream Load 是最高性能的导入方式
|
||
- 建议攒批后一次性写入,避免频繁小批量导入
|
||
*/
|