Add one-skill

This commit is contained in:
Xin Wang
2026-05-13 11:03:00 +08:00
parent a4c8b29176
commit f9e36ef92d
34 changed files with 7656 additions and 0 deletions

View File

@@ -0,0 +1,147 @@
-- =====================================================================
-- @Name: DORIS-D-SQL-{表名}-INSERT
-- @Version: 1.0
-- @Desc: Apache Doris 数据插入模板
-- @TargetDatabase: Apache Doris
-- =====================================================================
-- ============================================================================
-- 场景1INSERT INTO追加写入
-- ============================================================================
-- 适用:向 Doris 表追加数据,不会删除已有数据
INSERT INTO db_name.target_table
SELECT
stat_date,
department,
region,
order_count,
total_amount
FROM db_name.source_table
WHERE stat_date = '${day_id}';
-- ============================================================================
-- 场景2INSERT OVERWRITE覆盖写入
-- ============================================================================
-- 适用:覆盖目标表(或指定分区)的全部数据
-- 注意Doris 2.0+ 支持,且仅适用于 Partition 表
-- 覆盖整表
INSERT OVERWRITE db_name.target_table
SELECT
stat_date,
department,
region,
order_count,
total_amount
FROM db_name.source_table;
-- 覆盖指定分区(推荐)
INSERT OVERWRITE db_name.target_table
PARTITION(p202605)
SELECT
department,
region,
order_count,
total_amount
FROM db_name.source_table
WHERE stat_date >= '2026-05-01'
AND stat_date < '2026-06-01';
-- ============================================================================
-- 场景3从查询结果写入ETL 场景)
-- ============================================================================
-- 简单转换后写入
INSERT INTO db_name.target_table
SELECT
order_date,
department,
COUNT(*) AS order_count,
COUNT(DISTINCT user_id) AS unique_users,
SUM(total_amount) AS total_amount,
AVG(total_amount) AS avg_amount
FROM db_name.source_orders o
LEFT JOIN db_name.dim_department d ON o.dept_id = d.dept_id
WHERE o.order_date = '${day_id}'
GROUP BY order_date, department;
-- ============================================================================
-- 场景4批量 VALUES 写入
-- ============================================================================
INSERT INTO db_name.target_table (stat_date, department, amount)
VALUES
('2026-05-01', '市场部', 10000.00),
('2026-05-01', '技术部', 25000.00),
('2026-05-01', '运营部', 18000.00);
-- ============================================================================
-- 场景5Stream Load数据导入
-- ============================================================================
-- 适用:大批量数据导入(百万级以上)
-- 注意Stream Load 通过 HTTP 协议提交,不是 SQL 语法
/*
-- curl 命令示例
curl --location-trusted -u user:password \
-H "label:load_order_20260501" \
-H "column_separator:," \
-H "columns:order_id,order_date,user_id,total_amount" \
-T data.csv \
http://fe_host:8030/api/db_name/orders/_stream_load
*/
-- ============================================================================
-- 场景6Broker Load外部数据源导入
-- ============================================================================
/*
LOAD LABEL db_name.load_label_20260501
(
DATA INFILE('hdfs://namenode:8020/path/to/data/*')
INTO TABLE target_table
COLUMNS TERMINATED BY ','
(stat_date, department, region, amount)
SET (amount = amount * 1.0)
)
WITH BROKER 'broker_name'
(
'username' = 'hdfs_user',
'password' = 'hdfs_password'
)
PROPERTIES
(
'timeout' = '3600',
'max_filter_ratio' = '0.01'
);
*/
-- ============================================================================
-- 关键规则说明
-- ============================================================================
/*
1. INSERT INTO vs INSERT OVERWRITE
- INSERT INTO追加数据不删除已有数据
- INSERT OVERWRITE覆盖数据Doris 2.0+ 支持)
- 日常增量推荐 INSERT INTO全量刷新推荐 INSERT OVERWRITE
2. Doris 不使用临时表链式处理
- 与 Spark 不同Doris 通常用单条 SQL 或 CTE 完成 ETL
- 直接 INSERT INTO ... SELECT ... 即可
3. 字段顺序
- SELECT 字段顺序必须与目标表列定义顺序一致
- 或显式指定列名INSERT INTO table (col1, col2) SELECT ...
4. 数据导入方式选择
- 少量数据INSERT INTO ... SELECT ... 或 INSERT INTO ... VALUES ...
- 大批量导入Stream LoadHTTP PUT最高性能
- HDFS 导入Broker Load
- 外部数据源Routine LoadKafka 等)
5. 性能建议
- 批量写入优于逐条写入
- Stream Load 是最高性能的导入方式
- 建议攒批后一次性写入,避免频繁小批量导入
*/