Add one-skill
This commit is contained in:
@@ -0,0 +1,211 @@
|
||||
-- =====================================================================
|
||||
-- @Name: KUDU-D-SQL-{表名}-CREATE
|
||||
-- @Version: 1.0
|
||||
-- @Desc: Kudu (via Impala) 建表模板
|
||||
-- @TargetDatabase: Apache Kudu (via Impala)
|
||||
-- @说明: Kudu 通过 Impala 访问,使用 Impala DDL 操作 Kudu 表
|
||||
-- =====================================================================
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景1:基础表创建(Hash 分区)
|
||||
-- ============================================================================
|
||||
-- 适用:按主键 Hash 分布数据,写入和点查性能好
|
||||
|
||||
CREATE TABLE IF NOT EXISTS db_name.kudu_basic (
|
||||
-- 主键列(Kudu 表必须有主键)
|
||||
id BIGINT NOT NULL COMMENT '主键ID',
|
||||
|
||||
-- 业务字段
|
||||
name STRING COMMENT '名称',
|
||||
category STRING COMMENT '类别',
|
||||
amount DECIMAL(18,2) COMMENT '金额',
|
||||
status STRING COMMENT '状态',
|
||||
created_at TIMESTAMP COMMENT '创建时间',
|
||||
updated_at TIMESTAMP COMMENT '更新时间'
|
||||
)
|
||||
PRIMARY KEY (id)
|
||||
PARTITION BY HASH(id) PARTITIONS 8
|
||||
STORED AS KUDU
|
||||
TBLPROPERTIES (
|
||||
'kudu.num_tablet_replicas' = '3'
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景2:Hash + Range 组合分区
|
||||
-- ============================================================================
|
||||
-- 适用:按时间范围 + Hash 组合,兼顾范围查询和写入性能
|
||||
|
||||
CREATE TABLE IF NOT EXISTS db_name.kudu_range_hash (
|
||||
-- 主键列(必须包含分区列)
|
||||
id BIGINT NOT NULL COMMENT '主键ID',
|
||||
stat_date STRING NOT NULL COMMENT '统计日期 yyyy-MM-dd',
|
||||
|
||||
-- 业务字段
|
||||
department STRING COMMENT '部门',
|
||||
metric_name STRING COMMENT '指标名称',
|
||||
metric_value DECIMAL(18,2) COMMENT '指标值',
|
||||
etl_time TIMESTAMP COMMENT '加工时间'
|
||||
)
|
||||
PRIMARY KEY (id, stat_date)
|
||||
PARTITION BY
|
||||
HASH(id) PARTITIONS 4,
|
||||
RANGE(stat_date) (
|
||||
PARTITION '2026-01-01' <= VALUES < '2026-02-01',
|
||||
PARTITION '2026-02-01' <= VALUES < '2026-03-01',
|
||||
PARTITION '2026-03-01' <= VALUES < '2026-04-01',
|
||||
PARTITION '2026-04-01' <= VALUES < '2026-05-01',
|
||||
PARTITION '2026-05-01' <= VALUES < '2026-06-01'
|
||||
)
|
||||
STORED AS KUDU
|
||||
TBLPROPERTIES (
|
||||
'kudu.num_tablet_replicas' = '3',
|
||||
'kudu.compression' = 'LZ4'
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景3:多列主键
|
||||
-- ============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS db_name.kudu_composite_pk (
|
||||
user_id BIGINT NOT NULL COMMENT '用户ID',
|
||||
order_date STRING NOT NULL COMMENT '订单日期',
|
||||
order_seq INT NOT NULL COMMENT '当日订单序号',
|
||||
|
||||
user_name STRING COMMENT '用户姓名',
|
||||
product_name STRING COMMENT '商品名称',
|
||||
quantity INT COMMENT '数量',
|
||||
total_amount DECIMAL(18,2) COMMENT '总金额',
|
||||
status STRING COMMENT '状态',
|
||||
create_time TIMESTAMP COMMENT '创建时间'
|
||||
)
|
||||
PRIMARY KEY (user_id, order_date, order_seq)
|
||||
PARTITION BY
|
||||
HASH(user_id) PARTITIONS 8,
|
||||
RANGE(order_date) (
|
||||
PARTITION '2026-01-01' <= VALUES < '2026-02-01',
|
||||
PARTITION '2026-02-01' <= VALUES < '2026-03-01',
|
||||
PARTITION '2026-03-01' <= VALUES < '2026-04-01'
|
||||
)
|
||||
STORED AS KUDU
|
||||
TBLPROPERTIES (
|
||||
'kudu.num_tablet_replicas' = '3'
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景4:纯 Range 分区
|
||||
-- ============================================================================
|
||||
-- 适用:按时间顺序写入,范围查询多
|
||||
|
||||
CREATE TABLE IF NOT EXISTS db_name.kudu_range_only (
|
||||
id BIGINT NOT NULL COMMENT '主键ID',
|
||||
stat_date STRING NOT NULL COMMENT '统计日期',
|
||||
metric_value DECIMAL(18,2) COMMENT '指标值',
|
||||
dimension STRING COMMENT '维度',
|
||||
etl_time TIMESTAMP COMMENT '加工时间'
|
||||
)
|
||||
PRIMARY KEY (id, stat_date)
|
||||
PARTITION BY RANGE(stat_date) (
|
||||
PARTITION '2026-01-01' <= VALUES < '2026-04-01',
|
||||
PARTITION '2026-04-01' <= VALUES < '2026-07-01',
|
||||
PARTITION '2026-07-01' <= VALUES < '2026-10-01',
|
||||
PARTITION '2026-10-01' <= VALUES < '2027-01-01'
|
||||
)
|
||||
STORED AS KUDU
|
||||
TBLPROPERTIES (
|
||||
'kudu.num_tablet_replicas' = '3'
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景5:外部表映射已有 Kudu 表
|
||||
-- ============================================================================
|
||||
|
||||
CREATE EXTERNAL TABLE IF NOT EXISTS db_name.kudu_external
|
||||
STORED AS KUDU
|
||||
TBLPROPERTIES (
|
||||
'kudu.master_addresses' = 'kudu-master-1:7051,kudu-master-2:7051,kudu-master-3:7051',
|
||||
'kudu.table_name' = 'impala.db_name.existing_table'
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 场景6:带压缩和副本配置
|
||||
-- ============================================================================
|
||||
|
||||
CREATE TABLE IF NOT EXISTS db_name.kudu_with_props (
|
||||
id BIGINT NOT NULL COMMENT '主键ID',
|
||||
data_date STRING NOT NULL COMMENT '数据日期',
|
||||
content STRING COMMENT '内容',
|
||||
value DOUBLE COMMENT '数值'
|
||||
)
|
||||
PRIMARY KEY (id, data_date)
|
||||
PARTITION BY
|
||||
HASH(id) PARTITIONS 8,
|
||||
RANGE(data_date) (
|
||||
PARTITION '2026-01-01' <= VALUES < '2026-02-01',
|
||||
PARTITION '2026-02-01' <= VALUES < '2026-03-01'
|
||||
)
|
||||
STORED AS KUDU
|
||||
TBLPROPERTIES (
|
||||
'kudu.num_tablet_replicas' = '3',
|
||||
'kudu.compression' = 'LZ4', -- 压缩算法
|
||||
'kudu.encryption' = 'false' -- 加密
|
||||
);
|
||||
|
||||
-- ============================================================================
|
||||
-- 字段类型速查(Kudu 支持的类型)
|
||||
-- ============================================================================
|
||||
/*
|
||||
| 类型 | 说明 | 适用场景 |
|
||||
|---------------|----------------|------------------------|
|
||||
| BOOLEAN | 布尔 | 状态标志 |
|
||||
| TINYINT | 1字节整数 | 小范围枚举 |
|
||||
| SMALLINT | 2字节整数 | 小范围数值 |
|
||||
| INT | 4字节整数 | 数量、等级 |
|
||||
| BIGINT | 8字节整数 | ID、计数 |
|
||||
| FLOAT | 4字节浮点 | 近似计算 |
|
||||
| DOUBLE | 8字节浮点 | 科学计算 |
|
||||
| DECIMAL(p,s) | 定点数 | 金额、精确数值 |
|
||||
| STRING | 变长字符串 | 名称、描述 |
|
||||
| VARCHAR(n) | 变长字符串 | 限定长度字符串 |
|
||||
| CHAR(n) | 定长字符串 | 固定长度编码 |
|
||||
| TIMESTAMP | 时间戳 | 时间字段(微秒精度) |
|
||||
| DATE | 日期 | 日期字段 |
|
||||
| BINARY | 二进制 | 二进制数据 |
|
||||
|
||||
注意:Kudu 不支持 ARRAY, MAP, STRUCT 等复杂类型
|
||||
*/
|
||||
|
||||
-- ============================================================================
|
||||
-- 建表规范说明
|
||||
-- ============================================================================
|
||||
/*
|
||||
1. 主键约束(Kudu 特有)
|
||||
- 每张 Kudu 表必须有 PRIMARY KEY
|
||||
- 主键列不能为 NULL(必须 NOT NULL)
|
||||
- 主键值不可 UPDATE(只能删除后重新插入)
|
||||
- 主键列必须包含在分区列中
|
||||
|
||||
2. 分区策略
|
||||
- Hash 分区:均匀分布,适合写入和点查
|
||||
- Range 分区:按范围查询,适合时间序列
|
||||
- Hash + Range 组合:兼顾两者优势(推荐)
|
||||
- 分区数 = tablet 数量,影响并行度
|
||||
|
||||
3. 分区设计建议
|
||||
- Hash 分区数:建议 4 的倍数,参考数据量
|
||||
- Range 分区:按时间维度,定期添加新分区
|
||||
- 单个 tablet 建议 1GB~10GB
|
||||
|
||||
4. 副本数
|
||||
- 生产环境建议 3 副本(默认)
|
||||
- Raft 协议保证一致性
|
||||
|
||||
5. 压缩
|
||||
- 推荐 LZ4(速度和压缩比平衡)
|
||||
- 可选:SNAPPY, ZLIB, LZ4
|
||||
|
||||
6. 与 Hive/Spark 表的区别
|
||||
- Kudu 表支持 UPDATE 和 DELETE
|
||||
- Kudu 表不支持 INSERT OVERWRITE
|
||||
- Kudu 表不支持复杂类型(ARRAY, MAP, STRUCT)
|
||||
- Kudu 表主键有约束,Hive/Spark 无约束
|
||||
*/
|
||||
Reference in New Issue
Block a user