Add one-skill

This commit is contained in:
Xin Wang
2026-05-13 11:03:00 +08:00
parent a4c8b29176
commit f9e36ef92d
34 changed files with 7656 additions and 0 deletions

View File

@@ -0,0 +1,235 @@
-- =====================================================================
-- @Name: HIVE-D-SQL-{表名}-QUERY
-- @Version: 1.0
-- @Desc: Hive 查询模板
-- @TargetDatabase: Hive
-- =====================================================================
-- ============================================================================
-- 1. 单表查询
-- ============================================================================
SELECT
id,
name,
amount,
created_at
FROM db_name.source_table
WHERE day_id = '${day_id}' -- 分区过滤(必须)
AND status = 'active'
ORDER BY created_at DESC
LIMIT 1000;
-- ============================================================================
-- 2. JOIN 查询
-- ============================================================================
-- 两表 JOIN
SELECT
a.id,
a.name,
a.amount,
b.category_name
FROM db_name.main_table a
JOIN db_name.dim_table b ON a.category_id = b.id
WHERE a.day_id = '${day_id}';
-- 多表 JOIN带别名
SELECT
o.order_id,
u.user_name,
p.product_name,
oi.quantity,
oi.unit_price
FROM db_name.orders o
JOIN db_name.users u ON o.user_id = u.id
JOIN db_name.order_items oi ON o.order_id = oi.order_id
JOIN db_name.products p ON oi.product_id = p.id
WHERE o.day_id = '${day_id}'
AND o.status IN ('completed', 'shipped');
-- ============================================================================
-- 3. 聚合查询
-- ============================================================================
-- GROUP BY + HAVING
SELECT
department,
COUNT(*) AS employee_count,
SUM(salary) AS total_salary,
AVG(salary) AS avg_salary,
MAX(salary) AS max_salary
FROM db_name.employees
WHERE day_id = '${day_id}'
GROUP BY department
HAVING COUNT(*) >= 5
ORDER BY total_salary DESC;
-- 多字段分组 + 去重计数
SELECT
date,
region,
COUNT(*) AS order_count,
COUNT(DISTINCT user_id) AS unique_users,
SUM(amount) AS total_amount
FROM db_name.orders
WHERE day_id = '${day_id}'
GROUP BY date, region;
-- ============================================================================
-- 4. 窗口函数
-- ============================================================================
-- ROW_NUMBER分组取Top N
SELECT *
FROM (
SELECT
department,
name,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rn
FROM db_name.employees
WHERE day_id = '${day_id}'
) t
WHERE rn <= 3;
-- 累计聚合
SELECT
date,
amount,
SUM(amount) OVER (ORDER BY date) AS cumulative_amount,
AVG(amount) OVER (
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7d
FROM daily_sales
WHERE day_id = '${day_id}';
-- LAG/LEAD环比
SELECT
date,
amount,
LAG(amount, 1) OVER (ORDER BY date) AS prev_amount,
amount - LAG(amount, 1) OVER (ORDER BY date) AS daily_change
FROM daily_sales
WHERE day_id = '${day_id}';
-- ============================================================================
-- 5. 子查询
-- ============================================================================
-- IN 子查询
SELECT *
FROM db_name.orders
WHERE user_id IN (
SELECT id FROM db_name.users WHERE vip_level >= 3
)
AND day_id = '${day_id}';
-- EXISTS 子查询
SELECT *
FROM db_name.products p
WHERE EXISTS (
SELECT 1 FROM db_name.inventory i
WHERE i.product_id = p.id
AND i.quantity > 0
)
AND p.day_id = '${day_id}';
-- ============================================================================
-- 6. 条件聚合CASE WHEN + 聚合)
-- ============================================================================
SELECT
date,
COUNT(*) AS total_orders,
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) AS completed_count,
SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled_count,
SUM(CASE WHEN amount > 1000 THEN amount ELSE 0 END) AS high_value_amount
FROM db_name.orders
WHERE day_id = '${day_id}'
GROUP BY date;
-- ============================================================================
-- 7. LATERAL VIEW + explodeHive 特有)
-- ============================================================================
-- 展开数组字段
SELECT
id,
name,
tag
FROM db_name.articles
LATERAL VIEW explode(tags) t AS tag
WHERE day_id = '${day_id}';
-- 展开数组并统计
SELECT
tag,
COUNT(*) AS article_count
FROM db_name.articles
LATERAL VIEW explode(tags) t AS tag
WHERE day_id = '${day_id}'
GROUP BY tag;
-- 展开 Map
SELECT
id,
map_key,
map_value
FROM db_name.data_table
LATERAL VIEW explode(props) m AS map_key, map_value
WHERE day_id = '${day_id}';
-- posexplode带索引展开
SELECT
id,
pos,
tag
FROM db_name.articles
LATERAL VIEW posexplode(tags) t AS pos, tag
WHERE day_id = '${day_id}';
-- ============================================================================
-- 8. 复杂类型查询
-- ============================================================================
-- ARRAY 操作
SELECT
id,
size(tags) AS tag_count, -- 数组长度
array_contains(tags, '大数据') AS has_tag, -- 包含判断
tags[0] AS first_tag -- 取第一个元素
FROM db_name.articles
WHERE day_id = '${day_id}';
-- MAP 操作
SELECT
id,
props['city'] AS city, -- 取值
map_keys(props) AS all_keys, -- 所有 key
map_values(props) AS all_values -- 所有 value
FROM db_name.user_table
WHERE day_id = '${day_id}';
-- STRUCT 操作
SELECT
id,
user_info.name AS user_name, -- 结构体字段访问
user_info.age AS user_age
FROM db_name.data_table
WHERE day_id = '${day_id}';
-- ============================================================================
-- 9. 集合聚合
-- ============================================================================
-- collect_list / collect_set
SELECT
department,
collect_list(name) AS all_names, -- 收集为数组(不去重)
collect_set(name) AS unique_names, -- 收集为数组(去重)
size(collect_set(name)) AS unique_count
FROM db_name.employees
WHERE day_id = '${day_id}'
GROUP BY department;