Add one-skill

This commit is contained in:
Xin Wang
2026-05-13 11:03:00 +08:00
parent a4c8b29176
commit f9e36ef92d
34 changed files with 7656 additions and 0 deletions

View File

@@ -0,0 +1,160 @@
-- =====================================================================
-- @Name: KUDU-D-SQL-{表名}-INSERT
-- @Version: 1.0
-- @Desc: Kudu (via Impala) 数据插入模板
-- @TargetDatabase: Apache Kudu (via Impala)
-- @说明: Kudu 表不支持 INSERT OVERWRITE支持 INSERT INTO 和 UPSERT
-- =====================================================================
-- ============================================================================
-- 场景1INSERT INTO追加写入
-- ============================================================================
-- 适用:向 Kudu 表追加新数据
INSERT INTO db_name.kudu_table
SELECT
id,
stat_date,
name,
department,
amount,
current_timestamp() AS etl_time
FROM db_name.source_table
WHERE stat_date = '${day_id}';
-- ============================================================================
-- 场景2UPSERT INTO更新插入Kudu 特有)
-- ============================================================================
-- 适用:如果主键存在则更新,不存在则插入
-- 这是 Kudu 的核心优势,其他 Hive/Spark 表不支持
-- 基础 UPSERT
UPSERT INTO db_name.kudu_table
SELECT
id,
stat_date,
name,
department,
amount,
current_timestamp() AS etl_time
FROM db_name.staging_table
WHERE stat_date = '${day_id}';
-- 聚合后 UPSERT增量更新指标表
UPSERT INTO db_name.kudu_metrics
SELECT
department,
'${day_id}' AS stat_date,
COUNT(*) AS order_count,
SUM(amount) AS total_amount,
current_timestamp() AS etl_time
FROM db_name.incremental_orders
WHERE stat_date = '${day_id}'
GROUP BY department;
-- ============================================================================
-- 场景3UPDATEKudu 表特有)
-- ============================================================================
-- 适用:修改已有数据
-- 注意:主键列不能被 UPDATE
-- 单条更新
UPDATE db_name.kudu_table
SET status = 'processed',
updated_at = current_timestamp()
WHERE id = 12345;
-- 批量条件更新
UPDATE db_name.kudu_table
SET status = 'expired',
updated_at = current_timestamp()
WHERE stat_date < '2026-01-01'
AND status = 'active';
-- 关联更新(用子查询)
UPDATE db_name.kudu_table t
SET t.department = d.new_dept_name
FROM db_name.dept_mapping d
WHERE t.department = d.old_dept_name;
-- ============================================================================
-- 场景4DELETEKudu 表特有)
-- ============================================================================
-- 适用:删除数据
-- 注意Kudu 的 DELETE 比 Hive/Spark 方便得多
-- 条件删除
DELETE FROM db_name.kudu_table
WHERE stat_date < '2026-01-01';
-- 按主键删除
DELETE FROM db_name.kudu_table
WHERE id IN (1001, 1002, 1003);
-- 关联删除(用子查询)
DELETE FROM db_name.kudu_table
WHERE user_id IN (
SELECT user_id FROM db_name.blacklist
);
-- ============================================================================
-- 场景5从查询结果写入
-- ============================================================================
-- 简单 ETL清洗后写入
INSERT INTO db_name.kudu_target
SELECT
id,
'${day_id}' AS stat_date,
name,
COALESCE(department, '未知') AS department,
amount,
current_timestamp() AS etl_time
FROM db_name.raw_data
WHERE stat_date = '${day_id}'
AND id IS NOT NULL
AND amount > 0;
-- ============================================================================
-- 场景6批量 VALUES 写入
-- ============================================================================
INSERT INTO db_name.kudu_table (id, stat_date, name, amount)
VALUES
(1, '2026-05-01', '测试1', 100.00),
(2, '2026-05-01', '测试2', 200.00),
(3, '2026-05-01', '测试3', 300.00);
-- ============================================================================
-- 关键规则说明
-- ============================================================================
/*
1. Kudu 表与 Hive/Spark 表的核心区别
- 支持 INSERT INTO
- 支持 INSERT OVERWRITE不支持
- 支持 UPSERTKudu 独有,核心能力)
- 支持 UPDATEKudu 独有)
- 支持 DELETEKudu 独有)
2. UPSERT 是 Kudu 的核心优势
- 主键存在 → 更新(整行替换)
- 主键不存在 → 插入新行
- 适用于:增量更新、数据修正、指标回填
3. INSERT INTO 注意事项
- 如果主键冲突会报错(不会自动去重)
- 需要确保写入数据的主键不重复,或使用 UPSERT
4. UPDATE 限制
- 主键列不能被 UPDATE
- WHERE 条件建议包含主键或分区列(性能)
5. DELETE 建议
- 删除大量数据时按分区范围删除
- 定期清理历史数据
6. 性能建议
- 批量写入优于逐条写入
- UPSERT 比 DELETE + INSERT 更高效
- 利用主键做点查,避免全表扫描
*/