Files
2026-05-13 11:03:00 +08:00

236 lines
6.3 KiB
SQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- =====================================================================
-- @Name: HIVE-D-SQL-{表名}-QUERY
-- @Version: 1.0
-- @Desc: Hive 查询模板
-- @TargetDatabase: Hive
-- =====================================================================
-- ============================================================================
-- 1. 单表查询
-- ============================================================================
SELECT
id,
name,
amount,
created_at
FROM db_name.source_table
WHERE day_id = '${day_id}' -- 分区过滤(必须)
AND status = 'active'
ORDER BY created_at DESC
LIMIT 1000;
-- ============================================================================
-- 2. JOIN 查询
-- ============================================================================
-- 两表 JOIN
SELECT
a.id,
a.name,
a.amount,
b.category_name
FROM db_name.main_table a
JOIN db_name.dim_table b ON a.category_id = b.id
WHERE a.day_id = '${day_id}';
-- 多表 JOIN带别名
SELECT
o.order_id,
u.user_name,
p.product_name,
oi.quantity,
oi.unit_price
FROM db_name.orders o
JOIN db_name.users u ON o.user_id = u.id
JOIN db_name.order_items oi ON o.order_id = oi.order_id
JOIN db_name.products p ON oi.product_id = p.id
WHERE o.day_id = '${day_id}'
AND o.status IN ('completed', 'shipped');
-- ============================================================================
-- 3. 聚合查询
-- ============================================================================
-- GROUP BY + HAVING
SELECT
department,
COUNT(*) AS employee_count,
SUM(salary) AS total_salary,
AVG(salary) AS avg_salary,
MAX(salary) AS max_salary
FROM db_name.employees
WHERE day_id = '${day_id}'
GROUP BY department
HAVING COUNT(*) >= 5
ORDER BY total_salary DESC;
-- 多字段分组 + 去重计数
SELECT
date,
region,
COUNT(*) AS order_count,
COUNT(DISTINCT user_id) AS unique_users,
SUM(amount) AS total_amount
FROM db_name.orders
WHERE day_id = '${day_id}'
GROUP BY date, region;
-- ============================================================================
-- 4. 窗口函数
-- ============================================================================
-- ROW_NUMBER分组取Top N
SELECT *
FROM (
SELECT
department,
name,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rn
FROM db_name.employees
WHERE day_id = '${day_id}'
) t
WHERE rn <= 3;
-- 累计聚合
SELECT
date,
amount,
SUM(amount) OVER (ORDER BY date) AS cumulative_amount,
AVG(amount) OVER (
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS moving_avg_7d
FROM daily_sales
WHERE day_id = '${day_id}';
-- LAG/LEAD环比
SELECT
date,
amount,
LAG(amount, 1) OVER (ORDER BY date) AS prev_amount,
amount - LAG(amount, 1) OVER (ORDER BY date) AS daily_change
FROM daily_sales
WHERE day_id = '${day_id}';
-- ============================================================================
-- 5. 子查询
-- ============================================================================
-- IN 子查询
SELECT *
FROM db_name.orders
WHERE user_id IN (
SELECT id FROM db_name.users WHERE vip_level >= 3
)
AND day_id = '${day_id}';
-- EXISTS 子查询
SELECT *
FROM db_name.products p
WHERE EXISTS (
SELECT 1 FROM db_name.inventory i
WHERE i.product_id = p.id
AND i.quantity > 0
)
AND p.day_id = '${day_id}';
-- ============================================================================
-- 6. 条件聚合CASE WHEN + 聚合)
-- ============================================================================
SELECT
date,
COUNT(*) AS total_orders,
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) AS completed_count,
SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled_count,
SUM(CASE WHEN amount > 1000 THEN amount ELSE 0 END) AS high_value_amount
FROM db_name.orders
WHERE day_id = '${day_id}'
GROUP BY date;
-- ============================================================================
-- 7. LATERAL VIEW + explodeHive 特有)
-- ============================================================================
-- 展开数组字段
SELECT
id,
name,
tag
FROM db_name.articles
LATERAL VIEW explode(tags) t AS tag
WHERE day_id = '${day_id}';
-- 展开数组并统计
SELECT
tag,
COUNT(*) AS article_count
FROM db_name.articles
LATERAL VIEW explode(tags) t AS tag
WHERE day_id = '${day_id}'
GROUP BY tag;
-- 展开 Map
SELECT
id,
map_key,
map_value
FROM db_name.data_table
LATERAL VIEW explode(props) m AS map_key, map_value
WHERE day_id = '${day_id}';
-- posexplode带索引展开
SELECT
id,
pos,
tag
FROM db_name.articles
LATERAL VIEW posexplode(tags) t AS pos, tag
WHERE day_id = '${day_id}';
-- ============================================================================
-- 8. 复杂类型查询
-- ============================================================================
-- ARRAY 操作
SELECT
id,
size(tags) AS tag_count, -- 数组长度
array_contains(tags, '大数据') AS has_tag, -- 包含判断
tags[0] AS first_tag -- 取第一个元素
FROM db_name.articles
WHERE day_id = '${day_id}';
-- MAP 操作
SELECT
id,
props['city'] AS city, -- 取值
map_keys(props) AS all_keys, -- 所有 key
map_values(props) AS all_values -- 所有 value
FROM db_name.user_table
WHERE day_id = '${day_id}';
-- STRUCT 操作
SELECT
id,
user_info.name AS user_name, -- 结构体字段访问
user_info.age AS user_age
FROM db_name.data_table
WHERE day_id = '${day_id}';
-- ============================================================================
-- 9. 集合聚合
-- ============================================================================
-- collect_list / collect_set
SELECT
department,
collect_list(name) AS all_names, -- 收集为数组(不去重)
collect_set(name) AS unique_names, -- 收集为数组(去重)
size(collect_set(name)) AS unique_count
FROM db_name.employees
WHERE day_id = '${day_id}'
GROUP BY department;