Add one-skill
This commit is contained in:
@@ -0,0 +1,131 @@
|
||||
-- =====================================================================
|
||||
-- @SparkSqlName: PAIMONA-D-SQL-{表名}-INSERT
|
||||
-- @Version: 1.0
|
||||
-- @Desc: 数据插入模板(INSERT OVERWRITE)
|
||||
-- @TargetTables: ${db_eda_env}.{目标表名}
|
||||
-- @SourceTables: {源表列表}
|
||||
-- @TargetDatabase: Paimon
|
||||
-- @SourceDatabase: Paimon
|
||||
-- =====================================================================
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景1:分区表覆盖写入
|
||||
-- ============================================================================
|
||||
-- 适用:每日/每周/每月增量写入分区表
|
||||
|
||||
INSERT OVERWRITE TABLE ${db_eda_env}.target_table
|
||||
PARTITION (day_id = '${day_id}')
|
||||
SELECT
|
||||
field1,
|
||||
field2,
|
||||
field3,
|
||||
current_timestamp() AS etl_time
|
||||
FROM source_table
|
||||
WHERE day_id = '${day_id}';
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景2:动态分区写入
|
||||
-- ============================================================================
|
||||
-- 适用:多分区字段,数据中包含分区值
|
||||
|
||||
INSERT OVERWRITE TABLE ${db_eda_env}.target_table
|
||||
PARTITION (day_id, region) -- 动态分区字段
|
||||
SELECT
|
||||
field1,
|
||||
field2,
|
||||
field3,
|
||||
day_id, -- 分区字段1(数据中包含)
|
||||
region, -- 分区字段2(数据中包含)
|
||||
current_timestamp() AS etl_time
|
||||
FROM source_table
|
||||
WHERE day_id BETWEEN '${start_day}' AND '${end_day}';
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景3:全表覆盖写入
|
||||
-- ============================================================================
|
||||
-- 适用:全量刷新、初始化数据
|
||||
|
||||
INSERT OVERWRITE TABLE ${db_eda_env}.target_table
|
||||
SELECT
|
||||
field1,
|
||||
field2,
|
||||
field3,
|
||||
current_timestamp() AS etl_time
|
||||
FROM source_table;
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景4:追加写入(慎用)
|
||||
-- ============================================================================
|
||||
-- 适用:日志表、流水表(无分区或允许重复)
|
||||
|
||||
INSERT INTO TABLE ${db_eda_env}.target_table
|
||||
SELECT
|
||||
field1,
|
||||
field2,
|
||||
field3,
|
||||
current_timestamp() AS etl_time
|
||||
FROM source_table
|
||||
WHERE day_id = '${day_id}';
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景5:从临时表写入目标表
|
||||
-- ============================================================================
|
||||
-- 适用:ETL 流程最后一步
|
||||
|
||||
INSERT OVERWRITE TABLE ${db_eda_env}.target_table
|
||||
PARTITION (day_id = '${day_id}')
|
||||
SELECT
|
||||
-- 业务字段(与目标表字段顺序一致)
|
||||
user_id,
|
||||
user_name,
|
||||
order_count,
|
||||
total_amount,
|
||||
|
||||
-- 技术字段
|
||||
current_timestamp() AS etl_time,
|
||||
'${day_id}' AS stat_date
|
||||
FROM ${db_tmp_env}.tmp_xxx_final;
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景6:MERGE INTO(更新插入)
|
||||
-- ============================================================================
|
||||
-- 适用:增量更新、修正历史数据
|
||||
|
||||
MERGE INTO ${db_eda_env}.target_table t
|
||||
USING ${db_tmp_env}.tmp_xxx_source s
|
||||
ON t.id = s.id AND t.day_id = s.day_id
|
||||
WHEN MATCHED THEN
|
||||
UPDATE SET
|
||||
t.name = s.name,
|
||||
t.amount = s.amount,
|
||||
t.etl_time = current_timestamp()
|
||||
WHEN NOT MATCHED THEN
|
||||
INSERT (id, day_id, name, amount, etl_time)
|
||||
VALUES (s.id, s.day_id, s.name, s.amount, current_timestamp());
|
||||
|
||||
-- ============================================================================
|
||||
-- 关键规则说明
|
||||
-- ============================================================================
|
||||
/*
|
||||
1. INSERT OVERWRITE vs INSERT INTO
|
||||
- INSERT OVERWRITE:覆盖写入(推荐)
|
||||
- INSERT INTO:追加写入(可能导致重复数据)
|
||||
|
||||
2. 分区表写入必须指定分区
|
||||
- 避免全表覆盖导致历史数据丢失
|
||||
- 格式:PARTITION (day_id = '${day_id}')
|
||||
|
||||
3. 字段顺序必须与目标表一致
|
||||
- 目标表字段顺序:业务字段 → 技术字段 → 分区字段
|
||||
- SELECT 字段顺序必须匹配
|
||||
|
||||
4. 技术字段补全
|
||||
- etl_time:数据写入时间
|
||||
- stat_date:统计日期(可选)
|
||||
- etl_remark:备注信息(可选)
|
||||
|
||||
5. MERGE INTO 注意事项
|
||||
- Spark 3.x+ 支持
|
||||
- 目标表必须支持事务(如 Paimon/Delta)
|
||||
- 关联字段必须唯一(避免多条匹配)
|
||||
*/
|
||||
Reference in New Issue
Block a user