-- ===================================================================== -- @Name: DORIS-D-SQL-{表名}-INSERT -- @Version: 1.0 -- @Desc: Apache Doris 数据插入模板 -- @TargetDatabase: Apache Doris -- ===================================================================== -- ============================================================================ -- 场景1:INSERT INTO(追加写入) -- ============================================================================ -- 适用:向 Doris 表追加数据,不会删除已有数据 INSERT INTO db_name.target_table SELECT stat_date, department, region, order_count, total_amount FROM db_name.source_table WHERE stat_date = '${day_id}'; -- ============================================================================ -- 场景2:INSERT OVERWRITE(覆盖写入) -- ============================================================================ -- 适用:覆盖目标表(或指定分区)的全部数据 -- 注意:Doris 2.0+ 支持,且仅适用于 Partition 表 -- 覆盖整表 INSERT OVERWRITE db_name.target_table SELECT stat_date, department, region, order_count, total_amount FROM db_name.source_table; -- 覆盖指定分区(推荐) INSERT OVERWRITE db_name.target_table PARTITION(p202605) SELECT department, region, order_count, total_amount FROM db_name.source_table WHERE stat_date >= '2026-05-01' AND stat_date < '2026-06-01'; -- ============================================================================ -- 场景3:从查询结果写入(ETL 场景) -- ============================================================================ -- 简单转换后写入 INSERT INTO db_name.target_table SELECT order_date, department, COUNT(*) AS order_count, COUNT(DISTINCT user_id) AS unique_users, SUM(total_amount) AS total_amount, AVG(total_amount) AS avg_amount FROM db_name.source_orders o LEFT JOIN db_name.dim_department d ON o.dept_id = d.dept_id WHERE o.order_date = '${day_id}' GROUP BY order_date, department; -- ============================================================================ -- 场景4:批量 VALUES 写入 -- ============================================================================ INSERT INTO db_name.target_table (stat_date, department, amount) VALUES ('2026-05-01', '市场部', 10000.00), ('2026-05-01', '技术部', 25000.00), ('2026-05-01', '运营部', 18000.00); -- ============================================================================ -- 场景5:Stream Load(数据导入) -- ============================================================================ -- 适用:大批量数据导入(百万级以上) -- 注意:Stream Load 通过 HTTP 协议提交,不是 SQL 语法 /* -- curl 命令示例 curl --location-trusted -u user:password \ -H "label:load_order_20260501" \ -H "column_separator:," \ -H "columns:order_id,order_date,user_id,total_amount" \ -T data.csv \ http://fe_host:8030/api/db_name/orders/_stream_load */ -- ============================================================================ -- 场景6:Broker Load(外部数据源导入) -- ============================================================================ /* LOAD LABEL db_name.load_label_20260501 ( DATA INFILE('hdfs://namenode:8020/path/to/data/*') INTO TABLE target_table COLUMNS TERMINATED BY ',' (stat_date, department, region, amount) SET (amount = amount * 1.0) ) WITH BROKER 'broker_name' ( 'username' = 'hdfs_user', 'password' = 'hdfs_password' ) PROPERTIES ( 'timeout' = '3600', 'max_filter_ratio' = '0.01' ); */ -- ============================================================================ -- 关键规则说明 -- ============================================================================ /* 1. INSERT INTO vs INSERT OVERWRITE - INSERT INTO:追加数据,不删除已有数据 - INSERT OVERWRITE:覆盖数据(Doris 2.0+ 支持) - 日常增量推荐 INSERT INTO,全量刷新推荐 INSERT OVERWRITE 2. Doris 不使用临时表链式处理 - 与 Spark 不同,Doris 通常用单条 SQL 或 CTE 完成 ETL - 直接 INSERT INTO ... SELECT ... 即可 3. 字段顺序 - SELECT 字段顺序必须与目标表列定义顺序一致 - 或显式指定列名:INSERT INTO table (col1, col2) SELECT ... 4. 数据导入方式选择 - 少量数据:INSERT INTO ... SELECT ... 或 INSERT INTO ... VALUES ... - 大批量导入:Stream Load(HTTP PUT,最高性能) - HDFS 导入:Broker Load - 外部数据源:Routine Load(Kafka 等) 5. 性能建议 - 批量写入优于逐条写入 - Stream Load 是最高性能的导入方式 - 建议攒批后一次性写入,避免频繁小批量导入 */