Add one-skill
This commit is contained in:
@@ -0,0 +1,235 @@
|
||||
-- =====================================================================
|
||||
-- @Name: HIVE-D-SQL-{表名}-QUERY
|
||||
-- @Version: 1.0
|
||||
-- @Desc: Hive 查询模板
|
||||
-- @TargetDatabase: Hive
|
||||
-- =====================================================================
|
||||
|
||||
-- ============================================================================
|
||||
-- 1. 单表查询
|
||||
-- ============================================================================
|
||||
|
||||
SELECT
|
||||
id,
|
||||
name,
|
||||
amount,
|
||||
created_at
|
||||
FROM db_name.source_table
|
||||
WHERE day_id = '${day_id}' -- 分区过滤(必须)
|
||||
AND status = 'active'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 1000;
|
||||
|
||||
-- ============================================================================
|
||||
-- 2. JOIN 查询
|
||||
-- ============================================================================
|
||||
|
||||
-- 两表 JOIN
|
||||
SELECT
|
||||
a.id,
|
||||
a.name,
|
||||
a.amount,
|
||||
b.category_name
|
||||
FROM db_name.main_table a
|
||||
JOIN db_name.dim_table b ON a.category_id = b.id
|
||||
WHERE a.day_id = '${day_id}';
|
||||
|
||||
-- 多表 JOIN(带别名)
|
||||
SELECT
|
||||
o.order_id,
|
||||
u.user_name,
|
||||
p.product_name,
|
||||
oi.quantity,
|
||||
oi.unit_price
|
||||
FROM db_name.orders o
|
||||
JOIN db_name.users u ON o.user_id = u.id
|
||||
JOIN db_name.order_items oi ON o.order_id = oi.order_id
|
||||
JOIN db_name.products p ON oi.product_id = p.id
|
||||
WHERE o.day_id = '${day_id}'
|
||||
AND o.status IN ('completed', 'shipped');
|
||||
|
||||
-- ============================================================================
|
||||
-- 3. 聚合查询
|
||||
-- ============================================================================
|
||||
|
||||
-- GROUP BY + HAVING
|
||||
SELECT
|
||||
department,
|
||||
COUNT(*) AS employee_count,
|
||||
SUM(salary) AS total_salary,
|
||||
AVG(salary) AS avg_salary,
|
||||
MAX(salary) AS max_salary
|
||||
FROM db_name.employees
|
||||
WHERE day_id = '${day_id}'
|
||||
GROUP BY department
|
||||
HAVING COUNT(*) >= 5
|
||||
ORDER BY total_salary DESC;
|
||||
|
||||
-- 多字段分组 + 去重计数
|
||||
SELECT
|
||||
date,
|
||||
region,
|
||||
COUNT(*) AS order_count,
|
||||
COUNT(DISTINCT user_id) AS unique_users,
|
||||
SUM(amount) AS total_amount
|
||||
FROM db_name.orders
|
||||
WHERE day_id = '${day_id}'
|
||||
GROUP BY date, region;
|
||||
|
||||
-- ============================================================================
|
||||
-- 4. 窗口函数
|
||||
-- ============================================================================
|
||||
|
||||
-- ROW_NUMBER(分组取Top N)
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT
|
||||
department,
|
||||
name,
|
||||
salary,
|
||||
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rn
|
||||
FROM db_name.employees
|
||||
WHERE day_id = '${day_id}'
|
||||
) t
|
||||
WHERE rn <= 3;
|
||||
|
||||
-- 累计聚合
|
||||
SELECT
|
||||
date,
|
||||
amount,
|
||||
SUM(amount) OVER (ORDER BY date) AS cumulative_amount,
|
||||
AVG(amount) OVER (
|
||||
ORDER BY date
|
||||
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
|
||||
) AS moving_avg_7d
|
||||
FROM daily_sales
|
||||
WHERE day_id = '${day_id}';
|
||||
|
||||
-- LAG/LEAD(环比)
|
||||
SELECT
|
||||
date,
|
||||
amount,
|
||||
LAG(amount, 1) OVER (ORDER BY date) AS prev_amount,
|
||||
amount - LAG(amount, 1) OVER (ORDER BY date) AS daily_change
|
||||
FROM daily_sales
|
||||
WHERE day_id = '${day_id}';
|
||||
|
||||
-- ============================================================================
|
||||
-- 5. 子查询
|
||||
-- ============================================================================
|
||||
|
||||
-- IN 子查询
|
||||
SELECT *
|
||||
FROM db_name.orders
|
||||
WHERE user_id IN (
|
||||
SELECT id FROM db_name.users WHERE vip_level >= 3
|
||||
)
|
||||
AND day_id = '${day_id}';
|
||||
|
||||
-- EXISTS 子查询
|
||||
SELECT *
|
||||
FROM db_name.products p
|
||||
WHERE EXISTS (
|
||||
SELECT 1 FROM db_name.inventory i
|
||||
WHERE i.product_id = p.id
|
||||
AND i.quantity > 0
|
||||
)
|
||||
AND p.day_id = '${day_id}';
|
||||
|
||||
-- ============================================================================
|
||||
-- 6. 条件聚合(CASE WHEN + 聚合)
|
||||
-- ============================================================================
|
||||
|
||||
SELECT
|
||||
date,
|
||||
COUNT(*) AS total_orders,
|
||||
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) AS completed_count,
|
||||
SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled_count,
|
||||
SUM(CASE WHEN amount > 1000 THEN amount ELSE 0 END) AS high_value_amount
|
||||
FROM db_name.orders
|
||||
WHERE day_id = '${day_id}'
|
||||
GROUP BY date;
|
||||
|
||||
-- ============================================================================
|
||||
-- 7. LATERAL VIEW + explode(Hive 特有)
|
||||
-- ============================================================================
|
||||
|
||||
-- 展开数组字段
|
||||
SELECT
|
||||
id,
|
||||
name,
|
||||
tag
|
||||
FROM db_name.articles
|
||||
LATERAL VIEW explode(tags) t AS tag
|
||||
WHERE day_id = '${day_id}';
|
||||
|
||||
-- 展开数组并统计
|
||||
SELECT
|
||||
tag,
|
||||
COUNT(*) AS article_count
|
||||
FROM db_name.articles
|
||||
LATERAL VIEW explode(tags) t AS tag
|
||||
WHERE day_id = '${day_id}'
|
||||
GROUP BY tag;
|
||||
|
||||
-- 展开 Map
|
||||
SELECT
|
||||
id,
|
||||
map_key,
|
||||
map_value
|
||||
FROM db_name.data_table
|
||||
LATERAL VIEW explode(props) m AS map_key, map_value
|
||||
WHERE day_id = '${day_id}';
|
||||
|
||||
-- posexplode(带索引展开)
|
||||
SELECT
|
||||
id,
|
||||
pos,
|
||||
tag
|
||||
FROM db_name.articles
|
||||
LATERAL VIEW posexplode(tags) t AS pos, tag
|
||||
WHERE day_id = '${day_id}';
|
||||
|
||||
-- ============================================================================
|
||||
-- 8. 复杂类型查询
|
||||
-- ============================================================================
|
||||
|
||||
-- ARRAY 操作
|
||||
SELECT
|
||||
id,
|
||||
size(tags) AS tag_count, -- 数组长度
|
||||
array_contains(tags, '大数据') AS has_tag, -- 包含判断
|
||||
tags[0] AS first_tag -- 取第一个元素
|
||||
FROM db_name.articles
|
||||
WHERE day_id = '${day_id}';
|
||||
|
||||
-- MAP 操作
|
||||
SELECT
|
||||
id,
|
||||
props['city'] AS city, -- 取值
|
||||
map_keys(props) AS all_keys, -- 所有 key
|
||||
map_values(props) AS all_values -- 所有 value
|
||||
FROM db_name.user_table
|
||||
WHERE day_id = '${day_id}';
|
||||
|
||||
-- STRUCT 操作
|
||||
SELECT
|
||||
id,
|
||||
user_info.name AS user_name, -- 结构体字段访问
|
||||
user_info.age AS user_age
|
||||
FROM db_name.data_table
|
||||
WHERE day_id = '${day_id}';
|
||||
|
||||
-- ============================================================================
|
||||
-- 9. 集合聚合
|
||||
-- ============================================================================
|
||||
|
||||
-- collect_list / collect_set
|
||||
SELECT
|
||||
department,
|
||||
collect_list(name) AS all_names, -- 收集为数组(不去重)
|
||||
collect_set(name) AS unique_names, -- 收集为数组(去重)
|
||||
size(collect_set(name)) AS unique_count
|
||||
FROM db_name.employees
|
||||
WHERE day_id = '${day_id}'
|
||||
GROUP BY department;
|
||||
Reference in New Issue
Block a user