产品概述
基本概念
集群架构
产品优势
应用场景
-- 旧版本指定分桶个数的创建语法DISTRIBUTED BY HASH(site) BUCKETS 20-- 新版本使用自动分桶推算的创建语法DISTRIBUTED BY HASH(site) BUCKETS AUTO properties("estimate_partition_size" = "100G")
int N = 计算N值;int M = 计算M值;int y = BE节点个数;int x = min(M, N, 128);if (x < N && x < y) {return y;}return x;
case1:数据量 100 MB,10 台 BE 机器,2TB *3 块盘数据量 N = 1BE 磁盘 M = 10* (2TB/50GB) * 3 = 1230x = min(M, N, 128) = 1最终: 1case2:数据量 1GB, 3 台 BE 机器,500GB *2块盘数据量 N = 2BE 磁盘 M = 3* (500GB/50GB) * 2 = 60x = min(M, N, 128) = 2最终: 2case3:数据量100GB,3台BE机器,500GB *2块盘数据量N = 20BE磁盘M = 3* (500GB/50GB) * 2 = 60x = min(M, N, 128) = 20最终: 20case4:数据量500GB,3台BE机器,1TB *1块盘数据量N = 100BE磁盘M = 3* (1TB /50GB) * 1 = 60x = min(M, N, 128) = 63最终: 63case5:数据量500GB,10台BE机器,2TB *3块盘数据量 N = 100BE磁盘 M = 10* (2TB / 50GB) * 3 = 1230x = min(M, N, 128) = 100最终: 100case 6:数据量1TB,10台BE机器,2TB *3块盘数据量 N = 205BE磁盘M = 10* (2TB / 50GB) * 3 = 1230x = min(M, N, 128) = 128最终: 128case 7:数据量500GB,1台BE机器,100TB *1块盘数据量 N = 100BE磁盘M = 1* (100TB / 50GB) * 1 = 2048x = min(M, N, 128) = 100最终: 100case 8:数据量1TB, 200台BE机器,4TB *7块盘数据量 N = 205BE磁盘M = 200* (4TB / 50GB) * 7 = 114800x = min(M, N, 128) = 128最终: 200
S7 - S6 = delta1,S6 - S5 = delta2,...S2 - S1 = delta6
今天的estimate_partition_size = EMA(S1, ..., S7)
CREATE TABLE IF NOT EXISTS example_auto_bucket_tbl ( user_id VARCHAR(128) NOT NULL COMMENT "用户id", date DATE NOT NULL COMMENT "数据灌入日期时间", data varchar(512) NOT NULL ) ENGINE=OLAP DUPLICATE KEY(user_id) PARTITION BY RANGE(date)() DISTRIBUTED BY HASH(user_id) BUCKETS AUTO PROPERTIES ( "replication_num" = "1", # 开启动态分区 "dynamic_partition.enable" = "true", # 按月进行动态分区 "dynamic_partition.time_unit" = "MONTH", # 保留5个历史分区 "dynamic_partition.start" = "-5", # 提前创建10个未来分区 "dynamic_partition.end" = "3", # 分区命名前缀 "dynamic_partition.prefix" = "p_", # 开启创建历史分区 "dynamic_partition.create_history_partition" = "true", # 创建3个历史分区 "dynamic_partition.history_partition_num" = "3", # 预计分区的数据量 "estimate_partition_size" = "1G" );



ALTER TABLE example_auto_bucket_tbl SET(# 关闭动态分区"dynamic_partition.enable" = "false");
# 关闭动态分区后,修改表的分桶数量alter table example_auto_bucket_tbl modify DISTRIBUTION DISTRIBUTED BY HASH(`user_id`) BUCKETS 16;
文档反馈