Add one-skill
This commit is contained in:
@@ -0,0 +1,459 @@
|
||||
# 聚合模式速查
|
||||
|
||||
## 基本聚合
|
||||
|
||||
### 聚合函数列表
|
||||
|
||||
| 函数 | 说明 | 示例 |
|
||||
|------|------|------|
|
||||
| COUNT(*) | 计数(含NULL行) | 总行数 |
|
||||
| COUNT(col) | 计数(不含NULL) | 有效数据数 |
|
||||
| COUNT(DISTINCT col) | 去重计数 | 用户数 |
|
||||
| SUM(col) | 求和 | 总销售额 |
|
||||
| AVG(col) | 平均值 | 平均薪资 |
|
||||
| MIN(col) | 最小值 | 最小年龄 |
|
||||
| MAX(col) | 最大值 | 最高分 |
|
||||
|
||||
### 基本用法
|
||||
|
||||
```sql
|
||||
-- 单列聚合
|
||||
SELECT
|
||||
COUNT(*) AS total_rows,
|
||||
COUNT(DISTINCT user_id) AS unique_users,
|
||||
SUM(amount) AS total_amount,
|
||||
AVG(amount) AS avg_amount,
|
||||
MIN(amount) AS min_amount,
|
||||
MAX(amount) AS max_amount
|
||||
FROM orders
|
||||
|
||||
-- 分组聚合
|
||||
SELECT
|
||||
department,
|
||||
COUNT(*) AS employee_count,
|
||||
AVG(salary) AS avg_salary,
|
||||
MAX(salary) AS max_salary
|
||||
FROM employees
|
||||
GROUP BY department
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## GROUP BY
|
||||
|
||||
```sql
|
||||
-- 单字段分组
|
||||
SELECT
|
||||
category,
|
||||
COUNT(*) AS count
|
||||
FROM products
|
||||
GROUP BY category
|
||||
|
||||
-- 多字段分组
|
||||
SELECT
|
||||
category,
|
||||
brand,
|
||||
COUNT(*) AS count,
|
||||
SUM(price) AS total_price
|
||||
FROM products
|
||||
GROUP BY category, brand
|
||||
|
||||
-- 分组 + 排序
|
||||
SELECT
|
||||
department,
|
||||
COUNT(*) AS count
|
||||
FROM employees
|
||||
GROUP BY department
|
||||
ORDER BY count DESC
|
||||
|
||||
-- 分组 + LIMIT(取Top N组)
|
||||
SELECT
|
||||
category,
|
||||
COUNT(*) AS count
|
||||
FROM products
|
||||
GROUP BY category
|
||||
ORDER BY count DESC
|
||||
LIMIT 10
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## HAVING(分组过滤)
|
||||
|
||||
```sql
|
||||
-- HAVING vs WHERE
|
||||
-- WHERE:过滤原始行(GROUP BY 前)
|
||||
-- HAVING:过滤分组结果(GROUP BY 后)
|
||||
|
||||
-- 示例:筛选订单数大于10的用户
|
||||
SELECT
|
||||
user_id,
|
||||
COUNT(*) AS order_count,
|
||||
SUM(amount) AS total_amount
|
||||
FROM orders
|
||||
GROUP BY user_id
|
||||
HAVING COUNT(*) > 10
|
||||
|
||||
-- 多条件 HAVING
|
||||
SELECT
|
||||
department,
|
||||
AVG(salary) AS avg_salary
|
||||
FROM employees
|
||||
GROUP BY department
|
||||
HAVING AVG(salary) > 5000
|
||||
AND COUNT(*) >= 5
|
||||
|
||||
-- HAVING + ORDER BY
|
||||
SELECT
|
||||
user_id,
|
||||
COUNT(*) AS order_count
|
||||
FROM orders
|
||||
GROUP BY user_id
|
||||
HAVING COUNT(*) >= 5
|
||||
ORDER BY order_count DESC
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 去重计数
|
||||
|
||||
```sql
|
||||
-- COUNT(DISTINCT)
|
||||
SELECT
|
||||
COUNT(DISTINCT user_id) AS unique_users
|
||||
FROM orders
|
||||
|
||||
-- 分组去重计数
|
||||
SELECT
|
||||
date,
|
||||
COUNT(DISTINCT user_id) AS unique_users,
|
||||
COUNT(*) AS total_orders
|
||||
FROM orders
|
||||
GROUP BY date
|
||||
|
||||
-- 多字段去重计数
|
||||
SELECT
|
||||
COUNT(DISTINCT user_id, product_id) AS unique_user_product_pairs
|
||||
FROM order_items
|
||||
|
||||
-- 大数据量近似去重(性能优化)
|
||||
SELECT
|
||||
approx_count_distinct(user_id) AS approx_unique_users
|
||||
FROM orders
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 集合聚合(数组结果)
|
||||
|
||||
```sql
|
||||
-- collect_list:收集为数组(不去重)
|
||||
SELECT
|
||||
user_id,
|
||||
collect_list(product_id) AS products
|
||||
FROM orders
|
||||
GROUP BY user_id
|
||||
|
||||
-- collect_set:收集为数组(去重)
|
||||
SELECT
|
||||
user_id,
|
||||
collect_set(product_id) AS unique_products
|
||||
FROM orders
|
||||
GROUP BY user_id
|
||||
|
||||
-- 取数组大小
|
||||
SELECT
|
||||
user_id,
|
||||
size(collect_list(product_id)) AS product_count,
|
||||
size(collect_set(product_id)) AS unique_product_count
|
||||
FROM orders
|
||||
GROUP BY user_id
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 多级聚合(ROLLUP / CUBE / GROUPING SETS)
|
||||
|
||||
### ROLLUP(层级汇总)
|
||||
|
||||
```sql
|
||||
-- 从右到左递减分组级别
|
||||
SELECT
|
||||
COALESCE(year, '总计') AS year,
|
||||
COALESCE(month, '全年') AS month,
|
||||
COALESCE(region, '全国') AS region,
|
||||
SUM(sales) AS total_sales
|
||||
FROM sales_data
|
||||
GROUP BY ROLLUP (year, month, region)
|
||||
|
||||
-- 结果包含:
|
||||
-- 1. year + month + region 分组
|
||||
-- 2. year + month 汇总(region为NULL)
|
||||
-- 3. year 汇总(month,region为NULL)
|
||||
-- 4. 全表汇总(year,month,region为NULL)
|
||||
```
|
||||
|
||||
### CUBE(全维度组合)
|
||||
|
||||
```sql
|
||||
-- 所有分组组合
|
||||
SELECT
|
||||
COALESCE(year, '总计') AS year,
|
||||
COALESCE(month, '全月') AS month,
|
||||
COALESCE(region, '全国') AS region,
|
||||
SUM(sales) AS total_sales
|
||||
FROM sales_data
|
||||
GROUP BY CUBE (year, month, region)
|
||||
|
||||
-- 结果包含所有组合:
|
||||
-- year+month+region, year+month, year+region, month+region
|
||||
-- year, month, region, 全表汇总
|
||||
```
|
||||
|
||||
### GROUPING SETS(自定义组合)
|
||||
|
||||
```sql
|
||||
-- 指定分组组合
|
||||
SELECT
|
||||
year,
|
||||
month,
|
||||
region,
|
||||
SUM(sales) AS total_sales
|
||||
FROM sales_data
|
||||
GROUP BY GROUPING SETS (
|
||||
(year, month),
|
||||
(year, region),
|
||||
(region),
|
||||
()
|
||||
)
|
||||
|
||||
-- 等价于多个 GROUP BY 合并
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## GROUPING 函数(判断汇总级别)
|
||||
|
||||
```sql
|
||||
-- GROUPING(col):判断该列是否为汇总产生的NULL
|
||||
-- 0 = 真实值, 1 = 汇总NULL
|
||||
|
||||
SELECT
|
||||
year,
|
||||
month,
|
||||
SUM(sales) AS total_sales,
|
||||
GROUPING(year) AS is_year_total,
|
||||
GROUPING(month) AS is_month_total
|
||||
FROM sales_data
|
||||
GROUP BY ROLLUP (year, month)
|
||||
|
||||
-- 用 GROUPING 区分真实NULL和汇总NULL
|
||||
SELECT
|
||||
CASE WHEN GROUPING(region) = 1 THEN '全国汇总' ELSE region END AS region,
|
||||
SUM(sales) AS total_sales
|
||||
FROM sales_data
|
||||
GROUP BY ROLLUP (region)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 条件聚合(CASE WHEN + 聚合)
|
||||
|
||||
```sql
|
||||
-- 分条件统计
|
||||
SELECT
|
||||
date,
|
||||
COUNT(*) AS total_orders,
|
||||
SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) AS completed_count,
|
||||
SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled_count,
|
||||
SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) AS pending_count
|
||||
FROM orders
|
||||
GROUP BY date
|
||||
|
||||
-- 分条件求和
|
||||
SELECT
|
||||
department,
|
||||
SUM(salary) AS total_salary,
|
||||
SUM(CASE WHEN gender = 'M' THEN salary ELSE 0 END) AS male_salary,
|
||||
SUM(CASE WHEN gender = 'F' THEN salary ELSE 0 END) AS female_salary
|
||||
FROM employees
|
||||
GROUP BY department
|
||||
|
||||
-- 条件平均值
|
||||
SELECT
|
||||
category,
|
||||
AVG(CASE WHEN price > 100 THEN price ELSE NULL END) AS high_price_avg
|
||||
FROM products
|
||||
GROUP BY category
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 聚合 + 窗口函数
|
||||
|
||||
```sql
|
||||
-- 分组内占比
|
||||
SELECT
|
||||
department,
|
||||
salary,
|
||||
ROUND(salary / SUM(salary) OVER (PARTITION BY department) * 100, 2) AS salary_pct
|
||||
FROM employees
|
||||
|
||||
-- 分组累计
|
||||
SELECT
|
||||
date,
|
||||
department,
|
||||
amount,
|
||||
SUM(amount) OVER (PARTITION BY department ORDER BY date) AS cumulative
|
||||
FROM sales
|
||||
|
||||
-- 分组排名
|
||||
SELECT
|
||||
*,
|
||||
RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS salary_rank
|
||||
FROM employees
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 多表聚合
|
||||
|
||||
```sql
|
||||
-- JOIN 后聚合
|
||||
SELECT
|
||||
u.department,
|
||||
COUNT(o.id) AS order_count,
|
||||
SUM(o.amount) AS total_amount
|
||||
FROM users u
|
||||
LEFT JOIN orders o ON u.id = o.user_id
|
||||
GROUP BY u.department
|
||||
|
||||
-- 子查询聚合
|
||||
SELECT
|
||||
dept_stats.department,
|
||||
dept_stats.avg_salary,
|
||||
emp_count.employee_count
|
||||
FROM (
|
||||
SELECT department, AVG(salary) AS avg_salary
|
||||
FROM employees
|
||||
GROUP BY department
|
||||
) dept_stats
|
||||
JOIN (
|
||||
SELECT department, COUNT(*) AS employee_count
|
||||
FROM employees
|
||||
GROUP BY department
|
||||
) emp_count ON dept_stats.department = emp_count.department
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 常见问题
|
||||
|
||||
### 问题1:GROUP BY 字段遗漏
|
||||
|
||||
```sql
|
||||
-- 错误:SELECT 字段不在 GROUP BY 中
|
||||
SELECT
|
||||
department,
|
||||
name, -- 错误!name 未分组
|
||||
AVG(salary)
|
||||
FROM employees
|
||||
GROUP BY department
|
||||
|
||||
-- 解决1:添加到 GROUP BY
|
||||
SELECT
|
||||
department,
|
||||
name,
|
||||
AVG(salary)
|
||||
FROM employees
|
||||
GROUP BY department, name
|
||||
|
||||
-- 解决2:使用聚合函数处理
|
||||
SELECT
|
||||
department,
|
||||
collect_list(name) AS names, -- 收集所有name
|
||||
AVG(salary)
|
||||
FROM employees
|
||||
GROUP BY department
|
||||
```
|
||||
|
||||
### 问题2:NULL 影响聚合
|
||||
|
||||
```sql
|
||||
-- COUNT(*) 包含 NULL 行
|
||||
-- COUNT(col) 不包含 NULL
|
||||
|
||||
SELECT
|
||||
COUNT(*) AS total, -- 包含 NULL 行
|
||||
COUNT(amount) AS valid, -- 不包含 amount 为 NULL 的行
|
||||
COUNT(DISTINCT amount) AS unique_values
|
||||
FROM orders
|
||||
|
||||
-- SUM/AVG/MIN/MAX 自动忽略 NULL
|
||||
SELECT AVG(price) FROM products -- NULL 自动排除
|
||||
```
|
||||
|
||||
### 问题3:聚合结果精度
|
||||
|
||||
```sql
|
||||
-- AVG 可能精度丢失
|
||||
SELECT
|
||||
AVG(amount) AS avg_amount -- 可能精度不够
|
||||
AVG(CAST(amount AS DECIMAL(18,6))) AS precise_avg -- 高精度
|
||||
FROM orders
|
||||
|
||||
-- ROUND 控制精度
|
||||
SELECT
|
||||
ROUND(AVG(amount), 2) AS avg_amount
|
||||
FROM orders
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 聚合性能优化
|
||||
|
||||
```sql
|
||||
-- 1. 先过滤再聚合
|
||||
SELECT
|
||||
department,
|
||||
COUNT(*) AS count
|
||||
FROM employees
|
||||
WHERE hire_date >= '2024-01-01' -- 先过滤
|
||||
GROUP BY department
|
||||
|
||||
-- 2. 大数据量用近似聚合
|
||||
SELECT
|
||||
approx_count_distinct(user_id) AS users -- 比 COUNT(DISTINCT) 快
|
||||
FROM orders
|
||||
|
||||
-- 3. 减少分组字段数量
|
||||
SELECT
|
||||
category, -- 减少分组字段
|
||||
COUNT(*) AS count
|
||||
FROM products
|
||||
GROUP BY category -- 比 GROUP BY category, brand 快
|
||||
|
||||
-- 4. 避免复杂计算在 GROUP BY 前
|
||||
SELECT
|
||||
department,
|
||||
AVG(salary * 1.1) AS adjusted_avg -- 先计算再聚合
|
||||
FROM employees
|
||||
GROUP BY department
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 聚合模式选择指南
|
||||
|
||||
| 需求 | 推荐方式 |
|
||||
|------|----------|
|
||||
| 简单统计 | GROUP BY + 聚合函数 |
|
||||
| 分条件统计 | CASE WHEN + SUM/COUNT |
|
||||
| 去重计数 | COUNT(DISTINCT) |
|
||||
| 大数据去重 | approx_count_distinct |
|
||||
| 收集数组 | collect_list / collect_set |
|
||||
| 层级汇总 | ROLLUP |
|
||||
| 全维度汇总 | CUBE |
|
||||
| 自定义组合 | GROUPING SETS |
|
||||
| 分组内计算 | 窗口函数 |
|
||||
| 多条件过滤 | HAVING |
|
||||
Reference in New Issue
Block a user