Add one-skill
This commit is contained in:
@@ -0,0 +1,160 @@
|
||||
-- =====================================================================
|
||||
-- @Name: KUDU-D-SQL-{表名}-INSERT
|
||||
-- @Version: 1.0
|
||||
-- @Desc: Kudu (via Impala) 数据插入模板
|
||||
-- @TargetDatabase: Apache Kudu (via Impala)
|
||||
-- @说明: Kudu 表不支持 INSERT OVERWRITE,支持 INSERT INTO 和 UPSERT
|
||||
-- =====================================================================
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景1:INSERT INTO(追加写入)
|
||||
-- ============================================================================
|
||||
-- 适用:向 Kudu 表追加新数据
|
||||
|
||||
INSERT INTO db_name.kudu_table
|
||||
SELECT
|
||||
id,
|
||||
stat_date,
|
||||
name,
|
||||
department,
|
||||
amount,
|
||||
current_timestamp() AS etl_time
|
||||
FROM db_name.source_table
|
||||
WHERE stat_date = '${day_id}';
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景2:UPSERT INTO(更新插入,Kudu 特有)
|
||||
-- ============================================================================
|
||||
-- 适用:如果主键存在则更新,不存在则插入
|
||||
-- 这是 Kudu 的核心优势,其他 Hive/Spark 表不支持
|
||||
|
||||
-- 基础 UPSERT
|
||||
UPSERT INTO db_name.kudu_table
|
||||
SELECT
|
||||
id,
|
||||
stat_date,
|
||||
name,
|
||||
department,
|
||||
amount,
|
||||
current_timestamp() AS etl_time
|
||||
FROM db_name.staging_table
|
||||
WHERE stat_date = '${day_id}';
|
||||
|
||||
-- 聚合后 UPSERT(增量更新指标表)
|
||||
UPSERT INTO db_name.kudu_metrics
|
||||
SELECT
|
||||
department,
|
||||
'${day_id}' AS stat_date,
|
||||
COUNT(*) AS order_count,
|
||||
SUM(amount) AS total_amount,
|
||||
current_timestamp() AS etl_time
|
||||
FROM db_name.incremental_orders
|
||||
WHERE stat_date = '${day_id}'
|
||||
GROUP BY department;
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景3:UPDATE(Kudu 表特有)
|
||||
-- ============================================================================
|
||||
-- 适用:修改已有数据
|
||||
-- 注意:主键列不能被 UPDATE
|
||||
|
||||
-- 单条更新
|
||||
UPDATE db_name.kudu_table
|
||||
SET status = 'processed',
|
||||
updated_at = current_timestamp()
|
||||
WHERE id = 12345;
|
||||
|
||||
-- 批量条件更新
|
||||
UPDATE db_name.kudu_table
|
||||
SET status = 'expired',
|
||||
updated_at = current_timestamp()
|
||||
WHERE stat_date < '2026-01-01'
|
||||
AND status = 'active';
|
||||
|
||||
-- 关联更新(用子查询)
|
||||
UPDATE db_name.kudu_table t
|
||||
SET t.department = d.new_dept_name
|
||||
FROM db_name.dept_mapping d
|
||||
WHERE t.department = d.old_dept_name;
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景4:DELETE(Kudu 表特有)
|
||||
-- ============================================================================
|
||||
-- 适用:删除数据
|
||||
-- 注意:Kudu 的 DELETE 比 Hive/Spark 方便得多
|
||||
|
||||
-- 条件删除
|
||||
DELETE FROM db_name.kudu_table
|
||||
WHERE stat_date < '2026-01-01';
|
||||
|
||||
-- 按主键删除
|
||||
DELETE FROM db_name.kudu_table
|
||||
WHERE id IN (1001, 1002, 1003);
|
||||
|
||||
-- 关联删除(用子查询)
|
||||
DELETE FROM db_name.kudu_table
|
||||
WHERE user_id IN (
|
||||
SELECT user_id FROM db_name.blacklist
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景5:从查询结果写入
|
||||
-- ============================================================================
|
||||
|
||||
-- 简单 ETL:清洗后写入
|
||||
INSERT INTO db_name.kudu_target
|
||||
SELECT
|
||||
id,
|
||||
'${day_id}' AS stat_date,
|
||||
name,
|
||||
COALESCE(department, '未知') AS department,
|
||||
amount,
|
||||
current_timestamp() AS etl_time
|
||||
FROM db_name.raw_data
|
||||
WHERE stat_date = '${day_id}'
|
||||
AND id IS NOT NULL
|
||||
AND amount > 0;
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景6:批量 VALUES 写入
|
||||
-- ============================================================================
|
||||
|
||||
INSERT INTO db_name.kudu_table (id, stat_date, name, amount)
|
||||
VALUES
|
||||
(1, '2026-05-01', '测试1', 100.00),
|
||||
(2, '2026-05-01', '测试2', 200.00),
|
||||
(3, '2026-05-01', '测试3', 300.00);
|
||||
|
||||
-- ============================================================================
|
||||
-- 关键规则说明
|
||||
-- ============================================================================
|
||||
/*
|
||||
1. Kudu 表与 Hive/Spark 表的核心区别
|
||||
- 支持 INSERT INTO:✅
|
||||
- 支持 INSERT OVERWRITE:❌(不支持!)
|
||||
- 支持 UPSERT:✅(Kudu 独有,核心能力)
|
||||
- 支持 UPDATE:✅(Kudu 独有)
|
||||
- 支持 DELETE:✅(Kudu 独有)
|
||||
|
||||
2. UPSERT 是 Kudu 的核心优势
|
||||
- 主键存在 → 更新(整行替换)
|
||||
- 主键不存在 → 插入新行
|
||||
- 适用于:增量更新、数据修正、指标回填
|
||||
|
||||
3. INSERT INTO 注意事项
|
||||
- 如果主键冲突会报错(不会自动去重)
|
||||
- 需要确保写入数据的主键不重复,或使用 UPSERT
|
||||
|
||||
4. UPDATE 限制
|
||||
- 主键列不能被 UPDATE
|
||||
- WHERE 条件建议包含主键或分区列(性能)
|
||||
|
||||
5. DELETE 建议
|
||||
- 删除大量数据时按分区范围删除
|
||||
- 定期清理历史数据
|
||||
|
||||
6. 性能建议
|
||||
- 批量写入优于逐条写入
|
||||
- UPSERT 比 DELETE + INSERT 更高效
|
||||
- 利用主键做点查,避免全表扫描
|
||||
*/
|
||||
Reference in New Issue
Block a user