Add one-skill
This commit is contained in:
@@ -0,0 +1,306 @@
|
||||
# 窗口函数速查
|
||||
|
||||
## 基本语法
|
||||
|
||||
```sql
|
||||
函数名() OVER (
|
||||
PARTITION BY 分组字段 -- 可选:分组
|
||||
ORDER BY 排序字段 -- 可选:排序
|
||||
ROWS/RANGE 窗口范围 -- 可选:窗口范围
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 排序函数
|
||||
|
||||
| 函数 | 说明 | 特点 | 适用场景 |
|
||||
|------|------|------|----------|
|
||||
| ROW_NUMBER() | 连续排名 | 不跳号,相同值不同排名 | 每组取前N条、去重 |
|
||||
| RANK() | 排名 | 跳号,相同值相同排名 | 成绩排名 |
|
||||
| DENSE_RANK() | 紧密排名 | 不跳号,相同值相同排名 | 连续名次 |
|
||||
| NTILE(n) | 分桶 | 分成n组 | 数据分片、抽样 |
|
||||
|
||||
### ROW_NUMBER 示例
|
||||
|
||||
```sql
|
||||
-- 每个部门薪资最高的员工(取第一名)
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT
|
||||
name,
|
||||
department,
|
||||
salary,
|
||||
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rn
|
||||
FROM employees
|
||||
) t
|
||||
WHERE rn = 1
|
||||
|
||||
-- 去重:每个用户取最新订单
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT
|
||||
*,
|
||||
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) AS rn
|
||||
FROM orders
|
||||
) t
|
||||
WHERE rn = 1
|
||||
```
|
||||
|
||||
### RANK 与 DENSE_RANK 示例
|
||||
|
||||
```sql
|
||||
-- 成绩排名(跳号)
|
||||
SELECT
|
||||
name,
|
||||
score,
|
||||
RANK() OVER (ORDER BY score DESC) AS rank, -- 1,2,2,4,5...
|
||||
DENSE_RANK() OVER (ORDER BY score DESC) AS dense -- 1,2,2,3,4...
|
||||
FROM students
|
||||
```
|
||||
|
||||
### NTILE 示例
|
||||
|
||||
```sql
|
||||
-- 将用户分成4组(业绩分位)
|
||||
SELECT
|
||||
name,
|
||||
sales,
|
||||
NTILE(4) OVER (ORDER BY sales DESC) AS quartile -- 1(最高)到4(最低)
|
||||
FROM sales_data
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 聚合函数(窗口内)
|
||||
|
||||
| 函数 | 说明 |
|
||||
|------|------|
|
||||
| SUM(col) OVER | 累计求和 |
|
||||
| AVG(col) OVER | 累计平均 |
|
||||
| COUNT(col) OVER | 窗口内计数 |
|
||||
| MAX(col) OVER | 窗口内最大值 |
|
||||
| MIN(col) OVER | 窗口内最小值 |
|
||||
|
||||
### 累计求和示例
|
||||
|
||||
```sql
|
||||
-- 累计销售额(从开始到当前)
|
||||
SELECT
|
||||
date,
|
||||
amount,
|
||||
SUM(amount) OVER (ORDER BY date) AS cumulative_amount
|
||||
FROM daily_sales
|
||||
|
||||
-- 分组累计(每个部门累计)
|
||||
SELECT
|
||||
date,
|
||||
department,
|
||||
amount,
|
||||
SUM(amount) OVER (PARTITION BY department ORDER BY date) AS dept_cumulative
|
||||
FROM sales_data
|
||||
```
|
||||
|
||||
### 移动平均示例
|
||||
|
||||
```sql
|
||||
-- 7天移动平均
|
||||
SELECT
|
||||
date,
|
||||
amount,
|
||||
AVG(amount) OVER (
|
||||
ORDER BY date
|
||||
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
|
||||
) AS moving_avg_7d
|
||||
FROM daily_sales
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 偏移函数
|
||||
|
||||
| 函数 | 说明 | 适用场景 |
|
||||
|------|------|----------|
|
||||
| LAG(col, n) | 取前n行的值 | 环比、查看历史值 |
|
||||
| LAG(col, n, default) | 取前n行,无值返回default | 防止NULL |
|
||||
| LEAD(col, n) | 取后n行的值 | 查看未来值 |
|
||||
| LEAD(col, n, default) | 取后n行,无值返回default | 防止NULL |
|
||||
| FIRST_VALUE(col) | 窗口第一个值 | 组内首个值 |
|
||||
| LAST_VALUE(col) | 窗口最后一个值 | 组内末尾值 |
|
||||
|
||||
### LAG 示例(环比计算)
|
||||
|
||||
```sql
|
||||
-- 日环比增长
|
||||
SELECT
|
||||
date,
|
||||
amount,
|
||||
LAG(amount, 1) OVER (ORDER BY date) AS prev_day,
|
||||
amount - LAG(amount, 1) OVER (ORDER BY date) AS daily_growth,
|
||||
ROUND((amount - LAG(amount, 1) OVER (ORDER BY date))
|
||||
/ LAG(amount, 1) OVER (ORDER BY date) * 100, 2) AS growth_rate_pct
|
||||
FROM daily_sales
|
||||
|
||||
-- 月度同比(取去年同期)
|
||||
SELECT
|
||||
month,
|
||||
revenue,
|
||||
LAG(revenue, 12) OVER (ORDER BY month) AS prev_year_revenue,
|
||||
ROUND((revenue - LAG(revenue, 12) OVER (ORDER BY month))
|
||||
/ LAG(revenue, 12) OVER (ORDER BY month) * 100, 2) AS yoy_growth_pct
|
||||
FROM monthly_revenue
|
||||
```
|
||||
|
||||
### LEAD 示例
|
||||
|
||||
```sql
|
||||
-- 查看下一行数据
|
||||
SELECT
|
||||
date,
|
||||
amount,
|
||||
LEAD(amount, 1) OVER (ORDER BY date) AS next_day_amount
|
||||
FROM daily_sales
|
||||
```
|
||||
|
||||
### FIRST_VALUE / LAST_VALUE 示例
|
||||
|
||||
```sql
|
||||
-- 每个部门薪资最高和最低的人
|
||||
SELECT
|
||||
name,
|
||||
department,
|
||||
salary,
|
||||
FIRST_VALUE(name) OVER (PARTITION BY department ORDER BY salary DESC) AS highest_paid,
|
||||
LAST_VALUE(name) OVER (
|
||||
PARTITION BY department
|
||||
ORDER BY salary DESC
|
||||
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
|
||||
) AS lowest_paid
|
||||
FROM employees
|
||||
```
|
||||
|
||||
**注意**:LAST_VALUE 默认窗口是 `ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`,需要显式指定全窗口才能取到最后值。
|
||||
|
||||
---
|
||||
|
||||
## 窗口范围定义
|
||||
|
||||
### ROWS(基于行数)
|
||||
|
||||
```sql
|
||||
ROWS BETWEEN 3 PRECEDING AND CURRENT ROW -- 前3行到当前行
|
||||
ROWS BETWEEN 6 PRECEDING AND 1 FOLLOWING -- 前6行到后1行
|
||||
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW -- 从开始到当前
|
||||
ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING -- 当前行到后3行
|
||||
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING -- 全窗口
|
||||
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING -- 当前行及前后各1行(共3行)
|
||||
```
|
||||
|
||||
### RANGE(基于值范围)
|
||||
|
||||
```sql
|
||||
RANGE BETWEEN 100 PRECEDING AND CURRENT ROW -- 值差100以内
|
||||
RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW -- 7天内
|
||||
```
|
||||
|
||||
**ROWS vs RANGE**:
|
||||
- ROWS:固定行数,不受值影响
|
||||
- RANGE:根据排序字段的值计算范围
|
||||
|
||||
---
|
||||
|
||||
## 实战案例
|
||||
|
||||
### 案例1:分组取Top N
|
||||
|
||||
```sql
|
||||
-- 每个部门薪资前3名
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT
|
||||
name,
|
||||
department,
|
||||
salary,
|
||||
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rn
|
||||
FROM employees
|
||||
) t
|
||||
WHERE rn <= 3
|
||||
```
|
||||
|
||||
### 案例2:连续问题判断
|
||||
|
||||
```sql
|
||||
-- 判断是否连续增长(连续3天增长)
|
||||
SELECT
|
||||
date,
|
||||
amount,
|
||||
CASE
|
||||
WHEN amount > LAG(amount, 1) OVER (ORDER BY date)
|
||||
AND LAG(amount, 1) OVER (ORDER BY date) > LAG(amount, 2) OVER (ORDER BY date)
|
||||
THEN '连续增长'
|
||||
ELSE '非连续增长'
|
||||
END AS trend
|
||||
FROM daily_sales
|
||||
```
|
||||
|
||||
### 案例3:组内占比
|
||||
|
||||
```sql
|
||||
-- 每个部门各员工薪资占比
|
||||
SELECT
|
||||
name,
|
||||
department,
|
||||
salary,
|
||||
ROUND(salary / SUM(salary) OVER (PARTITION BY department) * 100, 2) AS salary_pct
|
||||
FROM employees
|
||||
```
|
||||
|
||||
### 案例4:累计百分比(帕累托分析)
|
||||
|
||||
```sql
|
||||
-- 80/20分析:哪些客户贡献了80%销售额
|
||||
SELECT
|
||||
customer_id,
|
||||
sales_amount,
|
||||
ROUND(SUM(sales_amount) OVER (ORDER BY sales_amount DESC)
|
||||
/ SUM(sales_amount) OVER () * 100, 2) AS cumulative_pct
|
||||
FROM customer_sales
|
||||
ORDER BY sales_amount DESC
|
||||
```
|
||||
|
||||
### 案例5:缺失值填充
|
||||
|
||||
```sql
|
||||
-- 用前一个有效值填充NULL
|
||||
SELECT
|
||||
date,
|
||||
amount,
|
||||
LAST_VALUE(amount IGNORE NULLS) OVER (ORDER BY date) AS filled_amount
|
||||
FROM sales_data
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 性能优化建议
|
||||
|
||||
1. **减少 PARTITION BY 分组数量**:分组越多,计算越慢
|
||||
2. **合理使用窗口范围**:避免全窗口扫描
|
||||
3. **先过滤再窗口**:WHERE 条件前置,减少数据量
|
||||
4. **避免嵌套窗口函数**:多次调用会重复计算
|
||||
|
||||
```sql
|
||||
-- 推荐:先过滤
|
||||
SELECT
|
||||
*,
|
||||
ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) AS rn
|
||||
FROM employees
|
||||
WHERE hire_date >= '2024-01-01' -- 先过滤,减少数据量
|
||||
|
||||
-- 不推荐:先窗口再过滤(窗口函数在全部数据上执行)
|
||||
SELECT *
|
||||
FROM (
|
||||
SELECT *, ROW_NUMBER() OVER (...) AS rn
|
||||
FROM employees
|
||||
) t
|
||||
WHERE hire_date >= '2024-01-01'
|
||||
```
|
||||
Reference in New Issue
Block a user