Files
smart-data-dev-skill/one-skill/smart-data-developer/references/sql/reference/window-functions.md
2026-05-13 11:03:00 +08:00

7.0 KiB
Raw Blame History

窗口函数速查

基本语法

函数名() OVER (
    PARTITION BY 分组字段      -- 可选:分组
    ORDER BY 排序字段          -- 可选:排序
    ROWS/RANGE 窗口范围        -- 可选:窗口范围
)

排序函数

函数 说明 特点 适用场景
ROW_NUMBER() 连续排名 不跳号,相同值不同排名 每组取前N条、去重
RANK() 排名 跳号,相同值相同排名 成绩排名
DENSE_RANK() 紧密排名 不跳号,相同值相同排名 连续名次
NTILE(n) 分桶 分成n组 数据分片、抽样

ROW_NUMBER 示例

-- 每个部门薪资最高的员工(取第一名)
SELECT *
FROM (
    SELECT 
        name,
        department,
        salary,
        ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rn
    FROM employees
) t
WHERE rn = 1

-- 去重:每个用户取最新订单
SELECT *
FROM (
    SELECT 
        *,
        ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) AS rn
    FROM orders
) t
WHERE rn = 1

RANK 与 DENSE_RANK 示例

-- 成绩排名(跳号)
SELECT 
    name,
    score,
    RANK() OVER (ORDER BY score DESC) AS rank,         -- 1,2,2,4,5...
    DENSE_RANK() OVER (ORDER BY score DESC) AS dense   -- 1,2,2,3,4...
FROM students

NTILE 示例

-- 将用户分成4组业绩分位
SELECT 
    name,
    sales,
    NTILE(4) OVER (ORDER BY sales DESC) AS quartile   -- 1(最高)到4(最低)
FROM sales_data

聚合函数(窗口内)

函数 说明
SUM(col) OVER 累计求和
AVG(col) OVER 累计平均
COUNT(col) OVER 窗口内计数
MAX(col) OVER 窗口内最大值
MIN(col) OVER 窗口内最小值

累计求和示例

-- 累计销售额(从开始到当前)
SELECT 
    date,
    amount,
    SUM(amount) OVER (ORDER BY date) AS cumulative_amount
FROM daily_sales

-- 分组累计(每个部门累计)
SELECT 
    date,
    department,
    amount,
    SUM(amount) OVER (PARTITION BY department ORDER BY date) AS dept_cumulative
FROM sales_data

移动平均示例

-- 7天移动平均
SELECT 
    date,
    amount,
    AVG(amount) OVER (
        ORDER BY date
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    ) AS moving_avg_7d
FROM daily_sales

偏移函数

函数 说明 适用场景
LAG(col, n) 取前n行的值 环比、查看历史值
LAG(col, n, default) 取前n行无值返回default 防止NULL
LEAD(col, n) 取后n行的值 查看未来值
LEAD(col, n, default) 取后n行无值返回default 防止NULL
FIRST_VALUE(col) 窗口第一个值 组内首个值
LAST_VALUE(col) 窗口最后一个值 组内末尾值

LAG 示例(环比计算)

-- 日环比增长
SELECT 
    date,
    amount,
    LAG(amount, 1) OVER (ORDER BY date) AS prev_day,
    amount - LAG(amount, 1) OVER (ORDER BY date) AS daily_growth,
    ROUND((amount - LAG(amount, 1) OVER (ORDER BY date)) 
        / LAG(amount, 1) OVER (ORDER BY date) * 100, 2) AS growth_rate_pct
FROM daily_sales

-- 月度同比(取去年同期)
SELECT 
    month,
    revenue,
    LAG(revenue, 12) OVER (ORDER BY month) AS prev_year_revenue,
    ROUND((revenue - LAG(revenue, 12) OVER (ORDER BY month)) 
        / LAG(revenue, 12) OVER (ORDER BY month) * 100, 2) AS yoy_growth_pct
FROM monthly_revenue

LEAD 示例

-- 查看下一行数据
SELECT 
    date,
    amount,
    LEAD(amount, 1) OVER (ORDER BY date) AS next_day_amount
FROM daily_sales

FIRST_VALUE / LAST_VALUE 示例

-- 每个部门薪资最高和最低的人
SELECT 
    name,
    department,
    salary,
    FIRST_VALUE(name) OVER (PARTITION BY department ORDER BY salary DESC) AS highest_paid,
    LAST_VALUE(name) OVER (
        PARTITION BY department 
        ORDER BY salary DESC
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS lowest_paid
FROM employees

注意LAST_VALUE 默认窗口是 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW,需要显式指定全窗口才能取到最后值。


窗口范围定义

ROWS基于行数

ROWS BETWEEN 3 PRECEDING AND CURRENT ROW       -- 前3行到当前行
ROWS BETWEEN 6 PRECEDING AND 1 FOLLOWING       -- 前6行到后1行
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW  -- 从开始到当前
ROWS BETWEEN CURRENT ROW AND 3 FOLLOWING       -- 当前行到后3行
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING  -- 全窗口
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING       -- 当前行及前后各1行共3行

RANGE基于值范围

RANGE BETWEEN 100 PRECEDING AND CURRENT ROW    -- 值差100以内
RANGE BETWEEN INTERVAL '7' DAY PRECEDING AND CURRENT ROW  -- 7天内

ROWS vs RANGE

  • ROWS固定行数不受值影响
  • RANGE根据排序字段的值计算范围

实战案例

案例1分组取Top N

-- 每个部门薪资前3名
SELECT *
FROM (
    SELECT 
        name,
        department,
        salary,
        ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rn
    FROM employees
) t
WHERE rn <= 3

案例2连续问题判断

-- 判断是否连续增长连续3天增长
SELECT 
    date,
    amount,
    CASE 
        WHEN amount > LAG(amount, 1) OVER (ORDER BY date)
         AND LAG(amount, 1) OVER (ORDER BY date) > LAG(amount, 2) OVER (ORDER BY date)
        THEN '连续增长'
        ELSE '非连续增长'
    END AS trend
FROM daily_sales

案例3组内占比

-- 每个部门各员工薪资占比
SELECT 
    name,
    department,
    salary,
    ROUND(salary / SUM(salary) OVER (PARTITION BY department) * 100, 2) AS salary_pct
FROM employees

案例4累计百分比帕累托分析

-- 80/20分析哪些客户贡献了80%销售额
SELECT 
    customer_id,
    sales_amount,
    ROUND(SUM(sales_amount) OVER (ORDER BY sales_amount DESC) 
        / SUM(sales_amount) OVER () * 100, 2) AS cumulative_pct
FROM customer_sales
ORDER BY sales_amount DESC

案例5缺失值填充

-- 用前一个有效值填充NULL
SELECT 
    date,
    amount,
    LAST_VALUE(amount IGNORE NULLS) OVER (ORDER BY date) AS filled_amount
FROM sales_data

性能优化建议

  1. 减少 PARTITION BY 分组数量:分组越多,计算越慢
  2. 合理使用窗口范围:避免全窗口扫描
  3. 先过滤再窗口WHERE 条件前置,减少数据量
  4. 避免嵌套窗口函数:多次调用会重复计算
-- 推荐:先过滤
SELECT 
    *,
    ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC) AS rn
FROM employees
WHERE hire_date >= '2024-01-01'  -- 先过滤,减少数据量

-- 不推荐:先窗口再过滤(窗口函数在全部数据上执行)
SELECT *
FROM (
    SELECT *, ROW_NUMBER() OVER (...) AS rn
    FROM employees
) t
WHERE hire_date >= '2024-01-01'