Add one-skill
This commit is contained in:
@@ -0,0 +1,209 @@
|
||||
-- =====================================================================
|
||||
-- @Name: DORIS-D-SQL-{表名}-CREATE
|
||||
-- @Version: 1.0
|
||||
-- @Desc: Apache Doris 建表模板(OLAP 多模型)
|
||||
-- @TargetDatabase: Apache Doris
|
||||
-- =====================================================================
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景1:Duplicate Key 模型(明细表)
|
||||
-- ============================================================================
|
||||
-- 适用:保留原始明细数据,不做预聚合,数据无冗余
|
||||
-- 特点:数据按 Key 排序存储,支持所有列的查询和聚合
|
||||
|
||||
CREATE TABLE IF NOT EXISTS db_name.detail_table (
|
||||
-- Key 列(排序字段)
|
||||
order_id BIGINT COMMENT '订单ID',
|
||||
order_date DATE COMMENT '订单日期',
|
||||
user_id BIGINT COMMENT '用户ID',
|
||||
|
||||
-- Value 列
|
||||
user_name VARCHAR(50) COMMENT '用户姓名',
|
||||
product_id BIGINT COMMENT '商品ID',
|
||||
product_name VARCHAR(200) COMMENT '商品名称',
|
||||
quantity INT COMMENT '购买数量',
|
||||
unit_price DECIMAL(18,2) COMMENT '单价',
|
||||
total_amount DECIMAL(18,2) COMMENT '总金额',
|
||||
status VARCHAR(20) COMMENT '订单状态',
|
||||
create_time DATETIME COMMENT '创建时间'
|
||||
)
|
||||
DUPLICATE KEY(order_id, order_date, user_id)
|
||||
COMMENT '订单明细表'
|
||||
PARTITION BY RANGE(order_date) (
|
||||
PARTITION p202601 VALUES LESS THAN ('2026-02-01'),
|
||||
PARTITION p202602 VALUES LESS THAN ('2026-03-01'),
|
||||
PARTITION p202603 VALUES LESS THAN ('2026-04-01')
|
||||
)
|
||||
DISTRIBUTED BY HASH(order_id) BUCKETS 8
|
||||
PROPERTIES (
|
||||
'replication_num' = '3',
|
||||
'storage_format' = 'V2'
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景2:Aggregate Key 模型(聚合表)
|
||||
-- ============================================================================
|
||||
-- 适用:预聚合场景,相同 Key 的数据自动合并
|
||||
-- 特点:Value 列必须指定聚合函数(SUM, REPLACE, MAX, MIN, HLL_UNION, BITMAP_UNION)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS db_name.agg_table (
|
||||
-- Key 列(聚合维度)
|
||||
stat_date DATE COMMENT '统计日期',
|
||||
department VARCHAR(100) COMMENT '部门名称',
|
||||
region VARCHAR(100) COMMENT '地区',
|
||||
|
||||
-- Value 列(带聚合函数)
|
||||
order_count BIGINT SUM COMMENT '订单总数',
|
||||
total_amount DECIMAL(18,2) SUM COMMENT '总金额',
|
||||
unique_users BIGINT REPLACE COMMENT '去重用户数(预计算值)',
|
||||
max_amount DECIMAL(18,2) MAX COMMENT '最大金额',
|
||||
last_update DATETIME REPLACE COMMENT '最后更新时间'
|
||||
)
|
||||
AGGREGATE KEY(stat_date, department, region)
|
||||
COMMENT '部门销售聚合表'
|
||||
PARTITION BY RANGE(stat_date) (
|
||||
PARTITION p202601 VALUES LESS THAN ('2026-02-01'),
|
||||
PARTITION p202602 VALUES LESS THAN ('2026-03-01')
|
||||
)
|
||||
DISTRIBUTED BY HASH(department) BUCKETS 8
|
||||
PROPERTIES (
|
||||
'replication_num' = '3',
|
||||
'storage_format' = 'V2'
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景3:Unique Key 模型(唯一主键表)
|
||||
-- ============================================================================
|
||||
-- 适用:需要按主键更新/去重的场景
|
||||
-- 特点:相同主键的数据保留最新一条(整行替换)
|
||||
|
||||
CREATE TABLE IF NOT EXISTS db_name.unique_table (
|
||||
-- Key 列(主键,必须唯一)
|
||||
user_id BIGINT COMMENT '用户ID',
|
||||
|
||||
-- Value 列
|
||||
user_name VARCHAR(50) COMMENT '用户姓名',
|
||||
phone VARCHAR(20) COMMENT '手机号',
|
||||
email VARCHAR(100) COMMENT '邮箱',
|
||||
vip_level INT COMMENT 'VIP等级',
|
||||
register_date DATE COMMENT '注册日期',
|
||||
last_login DATETIME COMMENT '最后登录时间',
|
||||
status VARCHAR(10) COMMENT '状态'
|
||||
)
|
||||
UNIQUE KEY(user_id)
|
||||
COMMENT '用户信息表(按主键更新)'
|
||||
DISTRIBUTED BY HASH(user_id) BUCKETS 16
|
||||
PROPERTIES (
|
||||
'replication_num' = '3',
|
||||
'enable_unique_key_merge_based_on_replica' = 'true'
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景4:带动态分区属性
|
||||
-- ============================================================================
|
||||
-- 适用:按日自动创建和管理分区
|
||||
|
||||
CREATE TABLE IF NOT EXISTS db_name.auto_partition_table (
|
||||
stat_date DATE COMMENT '统计日期',
|
||||
department VARCHAR(100) COMMENT '部门',
|
||||
metric_value DECIMAL(18,2) SUM COMMENT '指标值',
|
||||
record_count BIGINT SUM COMMENT '记录数'
|
||||
)
|
||||
AGGREGATE KEY(stat_date, department)
|
||||
COMMENT '自动分区示例表'
|
||||
PARTITION BY RANGE(stat_date) ()
|
||||
DISTRIBUTED BY HASH(department) BUCKETS 8
|
||||
PROPERTIES (
|
||||
'replication_num' = '3',
|
||||
'dynamic_partition.enable' = 'true',
|
||||
'dynamic_partition.time_unit' = 'DAY',
|
||||
'dynamic_partition.start' = '-30', -- 保留30天历史
|
||||
'dynamic_partition.end' = '3', -- 预创建3天
|
||||
'dynamic_partition.prefix' = 'p',
|
||||
'dynamic_partition.buckets' = '8'
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景5:多分区 + 多分桶
|
||||
-- ============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS db_name.multi_partition_table (
|
||||
stat_date DATE COMMENT '统计日期',
|
||||
region VARCHAR(50) COMMENT '地区',
|
||||
city VARCHAR(50) COMMENT '城市',
|
||||
user_id BIGINT COMMENT '用户ID',
|
||||
amount DECIMAL(18,2) SUM COMMENT '金额'
|
||||
)
|
||||
AGGREGATE KEY(stat_date, region, city, user_id)
|
||||
COMMENT '多维度分区示例'
|
||||
PARTITION BY RANGE(stat_date) (
|
||||
PARTITION p202601 VALUES LESS THAN ('2026-02-01'),
|
||||
PARTITION p202602 VALUES LESS THAN ('2026-03-01')
|
||||
)
|
||||
DISTRIBUTED BY HASH(user_id) BUCKETS 32
|
||||
PROPERTIES (
|
||||
'replication_num' = '3',
|
||||
'in_memory' = 'false',
|
||||
'storage_format' = 'V2',
|
||||
'compression' = 'LZ4'
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 字段类型速查
|
||||
-- ============================================================================
|
||||
/*
|
||||
| 类型 | 说明 | 适用场景 |
|
||||
|---------------|----------------|------------------------|
|
||||
| BOOLEAN | 布尔 | 状态标志 |
|
||||
| TINYINT | 1字节整数 | 小范围枚举 |
|
||||
| SMALLINT | 2字节整数 | 小范围数值 |
|
||||
| INT | 4字节整数 | 数量、等级 |
|
||||
| BIGINT | 8字节整数 | ID、计数、大数值 |
|
||||
| LARGEINT | 16字节整数 | 超大数值 |
|
||||
| FLOAT | 4字节浮点 | 近似计算 |
|
||||
| DOUBLE | 8字节浮点 | 科学计算 |
|
||||
| DECIMAL(p,s) | 定点数 | 金额、精确数值 |
|
||||
| DATE | 日期 | 日期字段(无时间) |
|
||||
| DATETIME | 日期时间 | 时间戳(精确到秒) |
|
||||
| CHAR(n) | 定长字符串 | 固定长度编码 |
|
||||
| VARCHAR(n) | 变长字符串 | 名称、描述 |
|
||||
| STRING | 变长字符串 | 大文本(无长度限制) |
|
||||
| BITMAP | 位图 | 精确去重(仅聚合模型) |
|
||||
| HLL | HyperLogLog | 近似去重(仅聚合模型) |
|
||||
| JSON | JSON | JSON数据存储 |
|
||||
*/
|
||||
|
||||
-- ============================================================================
|
||||
-- 建表规范说明
|
||||
-- ============================================================================
|
||||
/*
|
||||
1. 模型选择
|
||||
- Duplicate Key:保留原始明细,不做预聚合
|
||||
- Aggregate Key:预聚合,相同 Key 的 Value 自动合并
|
||||
- Unique Key:按主键去重,保留最新数据
|
||||
|
||||
2. 分区设计
|
||||
- 按时间字段 RANGE 分区(最常用)
|
||||
- 支持动态分区自动管理
|
||||
- 单分区数据量建议 1GB~10GB
|
||||
|
||||
3. 分桶设计
|
||||
- 使用高基数列做 HASH 分桶
|
||||
- 分桶数 = BE节点数 × CPU核数(参考值)
|
||||
- 单桶数据量建议 100MB~1GB
|
||||
|
||||
4. 副本数
|
||||
- 生产环境建议 3 副本
|
||||
- 测试环境可设 1 副本
|
||||
|
||||
5. Key 列选择
|
||||
- Duplicate Key:高频过滤/排序字段
|
||||
- Aggregate Key:聚合维度字段
|
||||
- Unique Key:业务主键
|
||||
|
||||
6. 注意事项
|
||||
- Key 列必须在 Value 列之前
|
||||
- 分区列必须是 Key 列
|
||||
- 分桶列必须是 Key 列
|
||||
- BITMAP/HLL 仅用于 Aggregate 模型的 Value 列
|
||||
*/
|
||||
Reference in New Issue
Block a user