-- ===================================================================== -- @Name: HIVE-D-SQL-{表名}-QUERY -- @Version: 1.0 -- @Desc: Hive 查询模板 -- @TargetDatabase: Hive -- ===================================================================== -- ============================================================================ -- 1. 单表查询 -- ============================================================================ SELECT id, name, amount, created_at FROM db_name.source_table WHERE day_id = '${day_id}' -- 分区过滤(必须) AND status = 'active' ORDER BY created_at DESC LIMIT 1000; -- ============================================================================ -- 2. JOIN 查询 -- ============================================================================ -- 两表 JOIN SELECT a.id, a.name, a.amount, b.category_name FROM db_name.main_table a JOIN db_name.dim_table b ON a.category_id = b.id WHERE a.day_id = '${day_id}'; -- 多表 JOIN(带别名) SELECT o.order_id, u.user_name, p.product_name, oi.quantity, oi.unit_price FROM db_name.orders o JOIN db_name.users u ON o.user_id = u.id JOIN db_name.order_items oi ON o.order_id = oi.order_id JOIN db_name.products p ON oi.product_id = p.id WHERE o.day_id = '${day_id}' AND o.status IN ('completed', 'shipped'); -- ============================================================================ -- 3. 聚合查询 -- ============================================================================ -- GROUP BY + HAVING SELECT department, COUNT(*) AS employee_count, SUM(salary) AS total_salary, AVG(salary) AS avg_salary, MAX(salary) AS max_salary FROM db_name.employees WHERE day_id = '${day_id}' GROUP BY department HAVING COUNT(*) >= 5 ORDER BY total_salary DESC; -- 多字段分组 + 去重计数 SELECT date, region, COUNT(*) AS order_count, COUNT(DISTINCT user_id) AS unique_users, SUM(amount) AS total_amount FROM db_name.orders WHERE day_id = '${day_id}' GROUP BY date, region; -- ============================================================================ -- 4. 窗口函数 -- ============================================================================ -- ROW_NUMBER(分组取Top N) SELECT * FROM ( SELECT department, name, salary, ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rn FROM db_name.employees WHERE day_id = '${day_id}' ) t WHERE rn <= 3; -- 累计聚合 SELECT date, amount, SUM(amount) OVER (ORDER BY date) AS cumulative_amount, AVG(amount) OVER ( ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW ) AS moving_avg_7d FROM daily_sales WHERE day_id = '${day_id}'; -- LAG/LEAD(环比) SELECT date, amount, LAG(amount, 1) OVER (ORDER BY date) AS prev_amount, amount - LAG(amount, 1) OVER (ORDER BY date) AS daily_change FROM daily_sales WHERE day_id = '${day_id}'; -- ============================================================================ -- 5. 子查询 -- ============================================================================ -- IN 子查询 SELECT * FROM db_name.orders WHERE user_id IN ( SELECT id FROM db_name.users WHERE vip_level >= 3 ) AND day_id = '${day_id}'; -- EXISTS 子查询 SELECT * FROM db_name.products p WHERE EXISTS ( SELECT 1 FROM db_name.inventory i WHERE i.product_id = p.id AND i.quantity > 0 ) AND p.day_id = '${day_id}'; -- ============================================================================ -- 6. 条件聚合(CASE WHEN + 聚合) -- ============================================================================ SELECT date, COUNT(*) AS total_orders, SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) AS completed_count, SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled_count, SUM(CASE WHEN amount > 1000 THEN amount ELSE 0 END) AS high_value_amount FROM db_name.orders WHERE day_id = '${day_id}' GROUP BY date; -- ============================================================================ -- 7. LATERAL VIEW + explode(Hive 特有) -- ============================================================================ -- 展开数组字段 SELECT id, name, tag FROM db_name.articles LATERAL VIEW explode(tags) t AS tag WHERE day_id = '${day_id}'; -- 展开数组并统计 SELECT tag, COUNT(*) AS article_count FROM db_name.articles LATERAL VIEW explode(tags) t AS tag WHERE day_id = '${day_id}' GROUP BY tag; -- 展开 Map SELECT id, map_key, map_value FROM db_name.data_table LATERAL VIEW explode(props) m AS map_key, map_value WHERE day_id = '${day_id}'; -- posexplode(带索引展开) SELECT id, pos, tag FROM db_name.articles LATERAL VIEW posexplode(tags) t AS pos, tag WHERE day_id = '${day_id}'; -- ============================================================================ -- 8. 复杂类型查询 -- ============================================================================ -- ARRAY 操作 SELECT id, size(tags) AS tag_count, -- 数组长度 array_contains(tags, '大数据') AS has_tag, -- 包含判断 tags[0] AS first_tag -- 取第一个元素 FROM db_name.articles WHERE day_id = '${day_id}'; -- MAP 操作 SELECT id, props['city'] AS city, -- 取值 map_keys(props) AS all_keys, -- 所有 key map_values(props) AS all_values -- 所有 value FROM db_name.user_table WHERE day_id = '${day_id}'; -- STRUCT 操作 SELECT id, user_info.name AS user_name, -- 结构体字段访问 user_info.age AS user_age FROM db_name.data_table WHERE day_id = '${day_id}'; -- ============================================================================ -- 9. 集合聚合 -- ============================================================================ -- collect_list / collect_set SELECT department, collect_list(name) AS all_names, -- 收集为数组(不去重) collect_set(name) AS unique_names, -- 收集为数组(去重) size(collect_set(name)) AS unique_count FROM db_name.employees WHERE day_id = '${day_id}' GROUP BY department;