129 lines
4.6 KiB
SQL
129 lines
4.6 KiB
SQL
-- =====================================================================
|
||
-- @Name: DORIS-D-SQL-{表名}-ETL
|
||
-- @Version: 2.0
|
||
-- @Desc: Apache Doris ETL 数据处理模板(临时表链式处理)
|
||
-- @TargetDatabase: Apache Doris
|
||
-- @说明: 统一规范:禁止 CTE,每步物化为临时表,先 DROP 再 CREATE
|
||
-- =====================================================================
|
||
|
||
-- ============================================================================
|
||
-- Step01: 基础清洗与过滤
|
||
-- ============================================================================
|
||
-- 说明:从源表读取数据,进行基础过滤和清洗
|
||
-- 输入:{源表名}
|
||
-- 输出:${db_tmp_env}.tmp_xxx_01
|
||
|
||
DROP TABLE IF EXISTS ${db_tmp_env}.tmp_xxx_01;
|
||
CREATE TABLE ${db_tmp_env}.tmp_xxx_01 AS
|
||
SELECT
|
||
order_id,
|
||
user_id,
|
||
dept_id,
|
||
total_amount,
|
||
status,
|
||
order_date
|
||
FROM db_name.source_table
|
||
WHERE order_date = '${day_id}'
|
||
AND status IN ('completed', 'shipped') -- 业务过滤
|
||
AND total_amount > 0 -- 数据质量过滤
|
||
AND user_id IS NOT NULL; -- NULL过滤
|
||
|
||
-- ============================================================================
|
||
-- Step02: 多表关联与维度补全
|
||
-- ============================================================================
|
||
-- 说明:关联维度表,补全业务属性字段
|
||
-- 输入:${db_tmp_env}.tmp_xxx_01, dim_department, dim_category
|
||
-- 输出:${db_tmp_env}.tmp_xxx_02
|
||
|
||
DROP TABLE IF EXISTS ${db_tmp_env}.tmp_xxx_02;
|
||
CREATE TABLE ${db_tmp_env}.tmp_xxx_02 AS
|
||
SELECT
|
||
a.order_id,
|
||
a.user_id,
|
||
a.total_amount,
|
||
a.status,
|
||
b.dept_name, -- 维度补全:部门名称
|
||
c.category_name, -- 维度补全:类别名称
|
||
a.order_date
|
||
FROM ${db_tmp_env}.tmp_xxx_01 a
|
||
LEFT JOIN db_name.dim_department b
|
||
ON a.dept_id = b.dept_id
|
||
LEFT JOIN db_name.dim_category c
|
||
ON a.category_id = c.category_id;
|
||
|
||
-- ============================================================================
|
||
-- Step03: 聚合计算与指标生成
|
||
-- ============================================================================
|
||
-- 说明:按业务维度聚合,计算统计指标
|
||
-- 输入:${db_tmp_env}.tmp_xxx_02
|
||
-- 输出:${db_tmp_env}.tmp_xxx_03
|
||
|
||
DROP TABLE IF EXISTS ${db_tmp_env}.tmp_xxx_03;
|
||
CREATE TABLE ${db_tmp_env}.tmp_xxx_03 AS
|
||
SELECT
|
||
order_date,
|
||
dept_name,
|
||
category_name,
|
||
COUNT(*) AS record_count, -- 记录数
|
||
COUNT(DISTINCT user_id) AS unique_users, -- 去重用户数
|
||
SUM(total_amount) AS total_amount, -- 总金额
|
||
AVG(total_amount) AS avg_amount, -- 平均金额
|
||
MAX(total_amount) AS max_amount -- 最大金额
|
||
FROM ${db_tmp_env}.tmp_xxx_02
|
||
GROUP BY order_date, dept_name, category_name;
|
||
|
||
-- ============================================================================
|
||
-- Step04: 最终输出写入目标表
|
||
-- ============================================================================
|
||
-- 说明:补全目标表标准字段,写入结果表
|
||
-- 输入:${db_tmp_env}.tmp_xxx_03
|
||
-- 输出:目标表
|
||
|
||
INSERT INTO ${db_eda_env}.target_table
|
||
SELECT
|
||
-- 业务字段
|
||
dept_name,
|
||
category_name,
|
||
record_count,
|
||
unique_users,
|
||
total_amount,
|
||
avg_amount,
|
||
max_amount,
|
||
|
||
-- 技术字段
|
||
NOW() AS etl_time, -- 数据加工时间
|
||
'${day_id}' AS stat_date -- 统计日期
|
||
FROM ${db_tmp_env}.tmp_xxx_03;
|
||
|
||
-- ============================================================================
|
||
-- 关键规则说明
|
||
-- ============================================================================
|
||
/*
|
||
1. 禁止使用 CTE (WITH 子句)
|
||
- 每个步骤必须物化为临时表
|
||
- 原因:便于调试、断点续跑、统一编码规范
|
||
|
||
2. 先 DROP 再 CREATE
|
||
- 每个临时表创建前必须先 DROP TABLE IF EXISTS
|
||
- 原因:防止表已存在导致失败
|
||
|
||
3. Doris 写入方式
|
||
- 默认使用 INSERT INTO
|
||
- Aggregate Key 表:自动合并相同 Key 的数据
|
||
- Unique Key 表:自动按主键去重,保留最新数据
|
||
- Doris 2.0+ 也支持 INSERT OVERWRITE
|
||
|
||
4. 过滤条件前置
|
||
- 所有过滤在最早阶段应用
|
||
- 减少中间数据量
|
||
|
||
5. 临时表命名规范
|
||
- 格式:tmp_{业务简称}_{步骤序号}
|
||
- 示例:tmp_order_stats_01, tmp_order_stats_02
|
||
|
||
6. Doris 特有注意事项
|
||
- 不支持 LEFT SEMI JOIN / LEFT ANTI JOIN
|
||
- 日期函数用 MySQL 风格:DATE_FORMAT, DATE_ADD(INTERVAL)
|
||
- 不支持 collect_list/collect_set,用 GROUP_CONCAT 替代
|
||
*/
|