Files
smart-data-dev-skill/one-skill/smart-data-developer/references/sql/templates/hive/insert-template.sql
2026-05-13 11:03:00 +08:00

142 lines
4.6 KiB
SQL
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- =====================================================================
-- @Name: HIVE-D-SQL-{表名}-INSERT
-- @Version: 1.0
-- @Desc: Hive 数据插入模板
-- @TargetDatabase: Hive
-- =====================================================================
-- ============================================================================
-- 场景1分区表覆盖写入最常用
-- ============================================================================
-- 适用:每日/每周/每月增量写入分区表
INSERT OVERWRITE TABLE db_name.target_table
PARTITION (day_id = '${day_id}')
SELECT
user_id,
user_name,
order_count,
total_amount,
current_timestamp() AS etl_time
FROM db_name.source_table
WHERE day_id = '${day_id}';
-- ============================================================================
-- 场景2动态分区写入
-- ============================================================================
-- 适用:数据中包含分区值,自动写入对应分区
-- 先启用动态分区
SET hive.exec.dynamic.partition = true;
SET hive.exec.dynamic.partition.mode = nonstrict;
INSERT OVERWRITE TABLE db_name.target_table
PARTITION (day_id, region) -- 动态分区字段
SELECT
user_id,
user_name,
order_count,
total_amount,
current_timestamp() AS etl_time,
day_id, -- 分区字段1数据中包含
region -- 分区字段2数据中包含
FROM db_name.source_table
WHERE day_id BETWEEN '${start_day}' AND '${end_day}';
-- ============================================================================
-- 场景3追加写入
-- ============================================================================
-- 适用:日志表、流水表(允许追加)
INSERT INTO TABLE db_name.target_table
SELECT
field1,
field2,
field3,
current_timestamp() AS etl_time
FROM db_name.source_table
WHERE day_id = '${day_id}';
-- ============================================================================
-- 场景4多分区插入Multi-Insert
-- ============================================================================
-- 适用:一次扫描,写入多个目标(提高效率)
FROM db_name.source_table
INSERT OVERWRITE TABLE db_name.target_summary
PARTITION (day_id = '${day_id}')
SELECT
department,
COUNT(*) AS record_count,
SUM(amount) AS total_amount
WHERE day_id = '${day_id}'
GROUP BY department
INSERT OVERWRITE TABLE db_name.target_detail
PARTITION (day_id = '${day_id}')
SELECT
id,
name,
amount,
department
WHERE day_id = '${day_id}'
AND amount > 1000;
-- ============================================================================
-- 场景5导出到文件
-- ============================================================================
INSERT OVERWRITE DIRECTORY '/output/data/export/'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
SELECT
id,
name,
amount,
day_id
FROM db_name.target_table
WHERE day_id = '${day_id}';
-- ============================================================================
-- 场景6CTASCreate Table As Select
-- ============================================================================
-- 从查询结果创建新表
CREATE TABLE db_name.new_table AS
SELECT
department,
COUNT(*) AS employee_count,
AVG(salary) AS avg_salary
FROM db_name.employees
WHERE day_id = '${day_id}'
GROUP BY department;
-- ============================================================================
-- 关键规则说明
-- ============================================================================
/*
1. INSERT OVERWRITE vs INSERT INTO
- INSERT OVERWRITE覆盖分区/表数据(推荐,幂等)
- INSERT INTO追加数据可能产生重复
2. 分区表写入必须指定分区
- 静态分区PARTITION (day_id = '${day_id}')
- 动态分区:需先 SET 配置PARTITION (day_id)
- 混合分区PARTITION (day_id = '2026-05-01', region)
3. 动态分区配置
SET hive.exec.dynamic.partition = true;
SET hive.exec.dynamic.partition.mode = nonstrict; -- 允许全动态
SET hive.exec.max.dynamic.partitions = 1000; -- 最大动态分区数
4. 字段顺序
- SELECT 字段顺序必须与目标表列定义一致
- 分区字段在 SELECT 最后(动态分区时)
5. 性能优化
- 多分区插入Multi-Insert一次扫描多次写入
- INSERT OVERWRITE 比 INSERT INTO 更安全(幂等性)
- 大数据量写入时注意 reducer 数量设置
*/