131 lines
4.4 KiB
SQL
131 lines
4.4 KiB
SQL
-- =====================================================================
|
||
-- @SparkSqlName: PAIMONA-D-SQL-{表名}-INSERT
|
||
-- @Version: 1.0
|
||
-- @Desc: 数据插入模板(INSERT OVERWRITE)
|
||
-- @TargetTables: ${db_eda_env}.{目标表名}
|
||
-- @SourceTables: {源表列表}
|
||
-- @TargetDatabase: Paimon
|
||
-- @SourceDatabase: Paimon
|
||
-- =====================================================================
|
||
|
||
-- ============================================================================
|
||
-- 场景1:分区表覆盖写入
|
||
-- ============================================================================
|
||
-- 适用:每日/每周/每月增量写入分区表
|
||
|
||
INSERT OVERWRITE TABLE ${db_eda_env}.target_table
|
||
PARTITION (day_id = '${day_id}')
|
||
SELECT
|
||
field1,
|
||
field2,
|
||
field3,
|
||
current_timestamp() AS etl_time
|
||
FROM source_table
|
||
WHERE day_id = '${day_id}';
|
||
|
||
-- ============================================================================
|
||
-- 场景2:动态分区写入
|
||
-- ============================================================================
|
||
-- 适用:多分区字段,数据中包含分区值
|
||
|
||
INSERT OVERWRITE TABLE ${db_eda_env}.target_table
|
||
PARTITION (day_id, region) -- 动态分区字段
|
||
SELECT
|
||
field1,
|
||
field2,
|
||
field3,
|
||
day_id, -- 分区字段1(数据中包含)
|
||
region, -- 分区字段2(数据中包含)
|
||
current_timestamp() AS etl_time
|
||
FROM source_table
|
||
WHERE day_id BETWEEN '${start_day}' AND '${end_day}';
|
||
|
||
-- ============================================================================
|
||
-- 场景3:全表覆盖写入
|
||
-- ============================================================================
|
||
-- 适用:全量刷新、初始化数据
|
||
|
||
INSERT OVERWRITE TABLE ${db_eda_env}.target_table
|
||
SELECT
|
||
field1,
|
||
field2,
|
||
field3,
|
||
current_timestamp() AS etl_time
|
||
FROM source_table;
|
||
|
||
-- ============================================================================
|
||
-- 场景4:追加写入(慎用)
|
||
-- ============================================================================
|
||
-- 适用:日志表、流水表(无分区或允许重复)
|
||
|
||
INSERT INTO TABLE ${db_eda_env}.target_table
|
||
SELECT
|
||
field1,
|
||
field2,
|
||
field3,
|
||
current_timestamp() AS etl_time
|
||
FROM source_table
|
||
WHERE day_id = '${day_id}';
|
||
|
||
-- ============================================================================
|
||
-- 场景5:从临时表写入目标表
|
||
-- ============================================================================
|
||
-- 适用:ETL 流程最后一步
|
||
|
||
INSERT OVERWRITE TABLE ${db_eda_env}.target_table
|
||
PARTITION (day_id = '${day_id}')
|
||
SELECT
|
||
-- 业务字段(与目标表字段顺序一致)
|
||
user_id,
|
||
user_name,
|
||
order_count,
|
||
total_amount,
|
||
|
||
-- 技术字段
|
||
current_timestamp() AS etl_time,
|
||
'${day_id}' AS stat_date
|
||
FROM ${db_tmp_env}.tmp_xxx_final;
|
||
|
||
-- ============================================================================
|
||
-- 场景6:MERGE INTO(更新插入)
|
||
-- ============================================================================
|
||
-- 适用:增量更新、修正历史数据
|
||
|
||
MERGE INTO ${db_eda_env}.target_table t
|
||
USING ${db_tmp_env}.tmp_xxx_source s
|
||
ON t.id = s.id AND t.day_id = s.day_id
|
||
WHEN MATCHED THEN
|
||
UPDATE SET
|
||
t.name = s.name,
|
||
t.amount = s.amount,
|
||
t.etl_time = current_timestamp()
|
||
WHEN NOT MATCHED THEN
|
||
INSERT (id, day_id, name, amount, etl_time)
|
||
VALUES (s.id, s.day_id, s.name, s.amount, current_timestamp());
|
||
|
||
-- ============================================================================
|
||
-- 关键规则说明
|
||
-- ============================================================================
|
||
/*
|
||
1. INSERT OVERWRITE vs INSERT INTO
|
||
- INSERT OVERWRITE:覆盖写入(推荐)
|
||
- INSERT INTO:追加写入(可能导致重复数据)
|
||
|
||
2. 分区表写入必须指定分区
|
||
- 避免全表覆盖导致历史数据丢失
|
||
- 格式:PARTITION (day_id = '${day_id}')
|
||
|
||
3. 字段顺序必须与目标表一致
|
||
- 目标表字段顺序:业务字段 → 技术字段 → 分区字段
|
||
- SELECT 字段顺序必须匹配
|
||
|
||
4. 技术字段补全
|
||
- etl_time:数据写入时间
|
||
- stat_date:统计日期(可选)
|
||
- etl_remark:备注信息(可选)
|
||
|
||
5. MERGE INTO 注意事项
|
||
- Spark 3.x+ 支持
|
||
- 目标表必须支持事务(如 Paimon/Delta)
|
||
- 关联字段必须唯一(避免多条匹配)
|
||
*/ |