-- ===================================================================== -- @SparkSqlName: PAIMONA-D-SQL-{表名}-QUERY -- @Version: 1.0 -- @Desc: 标准 SELECT 查询模板 -- @TargetTables: 无(查询输出) -- @SourceTables: {源表列表} -- @TargetDatabase: Paimon -- @SourceDatabase: Paimon -- ===================================================================== -- ============================================================================ -- 基础查询示例 -- ============================================================================ -- 1. 单表查询 SELECT id, name, amount, created_at FROM source_table WHERE day_id = '${day_id}' -- 分区过滤 AND status = 'active' -- 业务过滤 ORDER BY created_at DESC LIMIT 1000; -- ============================================================================ -- JOIN 查询示例 -- ============================================================================ -- 2. 两表 JOIN SELECT a.id, a.name, b.category_name FROM main_table a JOIN dim_table b ON a.category_id = b.id WHERE a.day_id = '${day_id}' AND b.is_active = true; -- 3. 多表 JOIN(带别名) SELECT o.order_id, u.user_name, p.product_name, oi.quantity, oi.unit_price FROM orders o JOIN users u ON o.user_id = u.id JOIN order_items oi ON o.order_id = oi.order_id JOIN products p ON oi.product_id = p.id WHERE o.day_id = '${day_id}' AND o.status IN ('completed', 'shipped'); -- ============================================================================ -- 聚合查询示例 -- ============================================================================ -- 4. GROUP BY 聚合 SELECT department, COUNT(*) AS employee_count, SUM(salary) AS total_salary, AVG(salary) AS avg_salary, MAX(salary) AS max_salary, MIN(salary) AS min_salary FROM employees WHERE day_id = '${day_id}' GROUP BY department HAVING COUNT(*) >= 5 ORDER BY total_salary DESC; -- 5. 多字段分组 + 去重计数 SELECT date, region, COUNT(*) AS order_count, COUNT(DISTINCT user_id) AS unique_users, SUM(amount) AS total_amount FROM orders WHERE day_id = '${day_id}' GROUP BY date, region; -- ============================================================================ -- 窗口函数示例 -- ============================================================================ -- 6. ROW_NUMBER(分组取Top N) SELECT * FROM ( SELECT department, name, salary, ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rn FROM employees WHERE day_id = '${day_id}' ) t WHERE rn <= 3; -- 每个部门薪资前3名 -- 7. 累计聚合 SELECT date, amount, SUM(amount) OVER (ORDER BY date) AS cumulative_amount, AVG(amount) OVER ( ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW ) AS moving_avg_7d FROM daily_sales WHERE day_id = '${day_id}'; -- 8. LAG/LEAD(环比计算) SELECT date, amount, LAG(amount, 1) OVER (ORDER BY date) AS prev_amount, amount - LAG(amount, 1) OVER (ORDER BY date) AS daily_change, ROUND((amount - LAG(amount, 1) OVER (ORDER BY date)) / LAG(amount, 1) OVER (ORDER BY date) * 100, 2) AS growth_rate_pct FROM daily_sales WHERE day_id = '${day_id}'; -- ============================================================================ -- 子查询示例 -- ============================================================================ -- 9. IN 子查询 SELECT * FROM orders WHERE user_id IN ( SELECT id FROM users WHERE vip_level >= 3 ) AND day_id = '${day_id}'; -- 10. EXISTS 子查询 SELECT * FROM products p WHERE EXISTS ( SELECT 1 FROM inventory i WHERE i.product_id = p.id AND i.quantity > 0 ) AND p.day_id = '${day_id}'; -- ============================================================================ -- 条件聚合示例 -- ============================================================================ -- 11. CASE WHEN + 聚合 SELECT date, COUNT(*) AS total_orders, SUM(CASE WHEN status = 'completed' THEN 1 ELSE 0 END) AS completed_count, SUM(CASE WHEN status = 'cancelled' THEN 1 ELSE 0 END) AS cancelled_count, SUM(CASE WHEN status = 'pending' THEN 1 ELSE 0 END) AS pending_count, SUM(CASE WHEN amount > 1000 THEN amount ELSE 0 END) AS high_value_amount FROM orders WHERE day_id = '${day_id}' GROUP BY date;