236 lines
6.3 KiB
SQL
236 lines
6.3 KiB
SQL
-- =====================================================================
|
||
-- @Name: HIVE-D-SQL-{表名}-QUERY
|
||
-- @Version: 1.0
|
||
-- @Desc: Hive 查询模板
|
||
-- @TargetDatabase: Hive
|
||
-- =====================================================================
|
||
|
||
-- ============================================================================
|
||
-- 1. 单表查询
|
||
-- ============================================================================
|
||
|
||
SELECT
|
||
id,
|
||
name,
|
||
amount,
|
||
created_at
|
||
FROM db_name.source_table
|
||
WHERE day_id = '${day_id}' -- 分区过滤(必须)
|
||
AND status = 'active'
|
||
ORDER BY created_at DESC
|
||
LIMIT 1000;
|
||
|
||
-- ============================================================================
|
||
-- 2. JOIN 查询
|
||
-- ============================================================================
|
||
|
||
-- 两表 JOIN
|
||
SELECT
|
||
a.id,
|
||
a.name,
|
||
a.amount,
|
||
b.category_name
|
||
FROM db_name.main_table a
|
||
JOIN db_name.dim_table b ON a.category_id = b.id
|
||
WHERE a.day_id = '${day_id}';
|
||
|
||
-- 多表 JOIN(带别名)
|
||
SELECT
|
||
o.order_id,
|
||
u.user_name,
|
||
p.product_name,
|
||
oi.quantity,
|
||
oi.unit_price
|
||
FROM db_name.orders o
|
||
JOIN db_name.users u ON o.user_id = u.id
|
||
JOIN db_name.order_items oi ON o.order_id = oi.order_id
|
||
JOIN db_name.products p ON oi.product_id = p.id
|
||
WHERE o.day_id = '${day_id}'
|
||
AND o.status IN ('completed', 'shipped');
|
||
|
||
-- ============================================================================
|
||
-- 3. 聚合查询
|
||
-- ============================================================================
|
||
|
||
-- GROUP BY + HAVING
|
||
SELECT
|
||
department,
|
||
COUNT(*) AS employee_count,
|
||
SUM(salary) AS total_salary,
|
||
AVG(salary) AS avg_salary,
|
||
MAX(salary) AS max_salary
|
||
FROM db_name.employees
|
||
WHERE day_id = '${day_id}'
|
||
GROUP BY department
|
||
HAVING COUNT(*) >= 5
|
||
ORDER BY total_salary DESC;
|
||
|
||
-- 多字段分组 + 去重计数
|
||
SELECT
|
||
date,
|
||
region,
|
||
COUNT(*) AS order_count,
|
||
COUNT(DISTINCT user_id) AS unique_users,
|
||
SUM(amount) AS total_amount
|
||
FROM db_name.orders
|
||
WHERE day_id = '${day_id}'
|
||
GROUP BY date, region;
|
||
|
||
-- ============================================================================
|
||
-- 4. 窗口函数
|
||
-- ============================================================================
|
||
|
||
-- ROW_NUMBER(分组取Top N)
|
||
SELECT *
|
||
FROM (
|
||
SELECT
|
||
department,
|
||
name,
|
||
salary,
|
||
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rn
|
||
FROM db_name.employees
|
||
WHERE day_id = '${day_id}'
|
||
) t
|
||
WHERE rn <= 3;
|
||
|
||
-- 累计聚合
|
||
SELECT
|
||
date,
|
||
amount,
|
||
SUM(amount) OVER (ORDER BY date) AS cumulative_amount,
|
||
AVG(amount) OVER (
|
||
ORDER BY date
|
||
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
|
||
) AS moving_avg_7d
|
||
FROM daily_sales
|
||
WHERE day_id = '${day_id}';
|
||
|
||
-- LAG/LEAD(环比)
|
||
SELECT
|
||
date,
|
||
amount,
|
||
LAG(amount, 1) OVER (ORDER BY date) AS prev_amount,
|
||
amount - LAG(amount, 1) OVER (ORDER BY date) AS daily_change
|
||
FROM daily_sales
|
||
WHERE day_id = '${day_id}';
|
||
|
||
-- ============================================================================
|
||
-- 5. 子查询
|
||
-- ============================================================================
|
||
|
||
-- IN 子查询
|
||
SELECT *
|
||
FROM db_name.orders
|
||
WHERE user_id IN (
|
||
SELECT id FROM db_name.users WHERE vip_level >= 3
|
||
)
|
||
AND day_id = '${day_id}';
|
||
|
||
-- EXISTS 子查询
|
||
SELECT *
|
||
FROM db_name.products p
|
||
WHERE EXISTS (
|
||
SELECT 1 FROM db_name.inventory i
|
||
WHERE i.product_id = p.id
|
||
AND i.quantity > 0
|
||
)
|
||
AND p.day_id = '${day_id}';
|
||
|
||
-- ============================================================================
|
||
-- 6. 条件聚合(CASE WHEN + 聚合)
|
||
-- ============================================================================
|
||
|
||
SELECT
|
||
date,
|
||
COUNT(*) AS total_orders,
|
||
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) AS completed_count,
|
||
SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled_count,
|
||
SUM(CASE WHEN amount > 1000 THEN amount ELSE 0 END) AS high_value_amount
|
||
FROM db_name.orders
|
||
WHERE day_id = '${day_id}'
|
||
GROUP BY date;
|
||
|
||
-- ============================================================================
|
||
-- 7. LATERAL VIEW + explode(Hive 特有)
|
||
-- ============================================================================
|
||
|
||
-- 展开数组字段
|
||
SELECT
|
||
id,
|
||
name,
|
||
tag
|
||
FROM db_name.articles
|
||
LATERAL VIEW explode(tags) t AS tag
|
||
WHERE day_id = '${day_id}';
|
||
|
||
-- 展开数组并统计
|
||
SELECT
|
||
tag,
|
||
COUNT(*) AS article_count
|
||
FROM db_name.articles
|
||
LATERAL VIEW explode(tags) t AS tag
|
||
WHERE day_id = '${day_id}'
|
||
GROUP BY tag;
|
||
|
||
-- 展开 Map
|
||
SELECT
|
||
id,
|
||
map_key,
|
||
map_value
|
||
FROM db_name.data_table
|
||
LATERAL VIEW explode(props) m AS map_key, map_value
|
||
WHERE day_id = '${day_id}';
|
||
|
||
-- posexplode(带索引展开)
|
||
SELECT
|
||
id,
|
||
pos,
|
||
tag
|
||
FROM db_name.articles
|
||
LATERAL VIEW posexplode(tags) t AS pos, tag
|
||
WHERE day_id = '${day_id}';
|
||
|
||
-- ============================================================================
|
||
-- 8. 复杂类型查询
|
||
-- ============================================================================
|
||
|
||
-- ARRAY 操作
|
||
SELECT
|
||
id,
|
||
size(tags) AS tag_count, -- 数组长度
|
||
array_contains(tags, '大数据') AS has_tag, -- 包含判断
|
||
tags[0] AS first_tag -- 取第一个元素
|
||
FROM db_name.articles
|
||
WHERE day_id = '${day_id}';
|
||
|
||
-- MAP 操作
|
||
SELECT
|
||
id,
|
||
props['city'] AS city, -- 取值
|
||
map_keys(props) AS all_keys, -- 所有 key
|
||
map_values(props) AS all_values -- 所有 value
|
||
FROM db_name.user_table
|
||
WHERE day_id = '${day_id}';
|
||
|
||
-- STRUCT 操作
|
||
SELECT
|
||
id,
|
||
user_info.name AS user_name, -- 结构体字段访问
|
||
user_info.age AS user_age
|
||
FROM db_name.data_table
|
||
WHERE day_id = '${day_id}';
|
||
|
||
-- ============================================================================
|
||
-- 9. 集合聚合
|
||
-- ============================================================================
|
||
|
||
-- collect_list / collect_set
|
||
SELECT
|
||
department,
|
||
collect_list(name) AS all_names, -- 收集为数组(不去重)
|
||
collect_set(name) AS unique_names, -- 收集为数组(去重)
|
||
size(collect_set(name)) AS unique_count
|
||
FROM db_name.employees
|
||
WHERE day_id = '${day_id}'
|
||
GROUP BY department;
|