VALUES LESS THAN (...) 仅指定上界,系统会将前一个分区的上界作为该分区的下界,生成一个左闭右开的区间。同时,也支持通过 VALUES [...) 指定上下界,生成一个左闭右开的区间。VALUES [...) 同时指定上下界比较容易理解。这里举例说明,当使用 VALUES LESS THAN (...) 语句进行分区的增删操作时,分区范围的变化情况。-- Range PartitionCREATE TABLE IF NOT EXISTS example_db.expamle_range_tbl(`user_id` LARGEINT NOT NULL COMMENT "用户id",`date` DATE NOT NULL COMMENT "数据灌入日期时间",`timestamp` DATETIME NOT NULL COMMENT "数据灌入的时间戳",`city` VARCHAR(20) COMMENT "用户所在城市",`age` SMALLINT COMMENT "用户年龄",`sex` TINYINT COMMENT "用户性别",`last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",`cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费",`max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间",`min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间")ENGINE=OLAPAGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`)PARTITION BY RANGE(`date`)(PARTITION `p201701` VALUES LESS THAN ("2017-02-01"),PARTITION `p201702` VALUES LESS THAN ("2017-03-01"),PARTITION `p201703` VALUES LESS THAN ("2017-04-01"))DISTRIBUTED BY HASH(`user_id`) BUCKETS 16PROPERTIES("replication_num" = "3");
expamle_range_tbl 示例,当建表完成后,查看分区会发现自动生成了如下3个分区:show partitions from expamle_range_tbl;
p201701: [MIN_VALUE, 2017-02-01)p201702: [2017-02-01, 2017-03-01)p201703: [2017-03-01, 2017-04-01)
alter table expamle_range_tbl add partition p201705 VALUES LESS THAN ("2017-06-01");show partitions from expamle_range_tbl;
p201701: [MIN_VALUE, 2017-02-01)p201702: [2017-02-01, 2017-03-01)p201703: [2017-03-01, 2017-04-01)p201705: [2017-04-01, 2017-06-01)
alter table expamle_range_tbl drop partition p201703;show partitions from expamle_range_tbl;
p201701: [MIN_VALUE, 2017-02-01)p201702: [2017-02-01, 2017-03-01)p201705: [2017-04-01, 2017-06-01)
alter table expamle_range_tbl drop partition p201702;show partitions from expamle_range_tbl;
p201701: [MIN_VALUE, 2017-02-01)p201705: [2017-04-01, 2017-06-01)
alter table expamle_range_tbl add partition p201702new VALUES LESS THAN ("2017-03-01");show partitions from expamle_range_tbl;
p201701: [MIN_VALUE, 2017-02-01)p201702new: [2017-02-01, 2017-03-01)p201705: [2017-04-01, 2017-06-01)
alter table expamle_range_tbl drop partition p201701;alter table expamle_range_tbl add partition p201612 VALUES LESS THAN ("2017-01-01");show partitions from expamle_range_tbl;
p201612: [MIN_VALUE, 2017-01-01)p201702new: [2017-02-01, 2017-03-01)p201705: [2017-04-01, 2017-06-01)
VALUES LESS THAN 语句增加分区时,分区的下界紧接上一个分区的上界。-- Range PartitionCREATE TABLE IF NOT EXISTS example_db.expamle_range_multi_partiton_key_tbl(`user_id` LARGEINT NOT NULL COMMENT "用户id",`date` DATE NOT NULL COMMENT "数据灌入日期时间",`timestamp` DATETIME NOT NULL COMMENT "数据灌入的时间戳",`city` VARCHAR(20) COMMENT "用户所在城市",`age` SMALLINT COMMENT "用户年龄",`sex` TINYINT COMMENT "用户性别",`last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",`cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费",`max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间",`min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间")ENGINE=OLAPAGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`)PARTITION BY RANGE(`date`, `user_id`)(PARTITION `p201701_1000` VALUES LESS THAN ("2017-02-01", "1000"),PARTITION `p201702_2000` VALUES LESS THAN ("2017-03-01", "2000"),PARTITION `p201703_all` VALUES LESS THAN ("2017-04-01"))DISTRIBUTED BY HASH(`user_id`) BUCKETS 16PROPERTIES("replication_num" = "3");
date(DATE 类型)和 user_id(INT 类型)作为分区列。以上示例最终得到的分区如下:show partitions from expamle_range_multi_partiton_key_tbl;
* p201701_1000: [(MIN_VALUE, MIN_VALUE), ("2017-02-01", "1000") )* p201702_2000: [("2017-02-01", "1000"), ("2017-03-01", "2000") )* p201703_all: [("2017-03-01", "2000"), ("2017-04-01", MIN_VALUE))
date 列的分区值,所以 user_id 列的分区值会默认填充 MIN_VALUE。当用户插入数据时,分区列值会按照顺序依次比较,最终得到对应的分区。* 数据 --> 分区* 2017-01-01, 200 --> p201701_1000* 2017-01-01, 2000 --> p201701_1000* 2017-02-01, 100 --> p201701_1000* 2017-02-01, 2000 --> p201702_2000* 2017-02-15, 5000 --> p201702_2000* 2017-03-01, 2000 --> p201703_all* 2017-03-10, 1 --> p201703_all* 2017-04-01, 1000 --> 无法导入* 2017-05-01, 1000 --> 无法导入
insert into expamle_range_multi_partiton_key_tbl values (200, '2017-01-01', '2017-01-01 12:00:05', 'Beijing', 25, 1, '2017-01-01 12:00:05', 100, 30, 10);insert into expamle_range_multi_partiton_key_tbl values (2000, '2017-01-01', '2017-01-01 16:10:05', 'Shanghai', 33, 1, '2017-01-01 16:10:05', 800, 50, 1);insert into expamle_range_multi_partiton_key_tbl values (200, '2017-02-01', '2017-01-01 16:10:05', 'Guangzhou', 22, 0, '2017-02-01 16:10:05', 80, 200, 1);show partitions from expamle_range_multi_partiton_key_tbl\\G
BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, LARGEINT, DATE, DATETIME, CHAR, VARCHAR 数据类型,分区值为枚举值。只有当数据为目标分区枚举值其中之一时,才可以命中分区。Partition 支持通过 VALUES IN (...) 来指定每个分区包含的枚举值。
下面通过示例说明,进行分区的增删操作时,分区的变化。-- List PartitionCREATE TABLE IF NOT EXISTS example_db.expamle_list_tbl(`user_id` LARGEINT NOT NULL COMMENT "用户id",`date` DATE NOT NULL COMMENT "数据灌入日期时间",`timestamp` DATETIME NOT NULL COMMENT "数据灌入的时间戳",`city` VARCHAR(20) NOT NULL COMMENT "用户所在城市",`age` SMALLINT COMMENT "用户年龄",`sex` TINYINT COMMENT "用户性别",`last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",`cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费",`max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间",`min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间")ENGINE=olapAGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`)PARTITION BY LIST(`city`)(PARTITION `p_cn` VALUES IN ("Beijing", "Shanghai", "Hong Kong"),PARTITION `p_usa` VALUES IN ("New York", "San Francisco"),PARTITION `p_jp` VALUES IN ("Tokyo"))DISTRIBUTED BY HASH(`user_id`) BUCKETS 16PROPERTIES("replication_num" = "3");
example_list_tbl 示例,当建表完成后,会自动生成如下3个分区:show partitions from expamle_list_tbl;
p_cn: ("Beijing", "Shanghai", "Hong Kong")p_usa: ("New York", "San Francisco")p_jp: ("Tokyo")
alter table expamle_list_tbl add partition p_uk VALUES IN ("London");show partitions from expamle_list_tbl;
p_cn: ("Beijing", "Shanghai", "Hong Kong")p_usa: ("New York", "San Francisco")p_jp: ("Tokyo")p_uk: ("London")
alter table expamle_list_tbl drop partition p_jp;show partitions from expamle_list_tbl;
p_cn: ("Beijing", "Shanghai", "Hong Kong")p_usa: ("New York", "San Francisco")p_uk: ("London")
-- List PartitionCREATE TABLE IF NOT EXISTS example_db.expamle_list_multi_partiton_key_tbl(`user_id` LARGEINT NOT NULL COMMENT "用户id",`date` DATE NOT NULL COMMENT "数据灌入日期时间",`timestamp` DATETIME NOT NULL COMMENT "数据灌入的时间戳",`city` VARCHAR(20) NOT NULL COMMENT "用户所在城市",`age` SMALLINT COMMENT "用户年龄",`sex` TINYINT COMMENT "用户性别",`last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",`cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费",`max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间",`min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间")ENGINE=olapAGGREGATE KEY(`user_id`, `date`, `timestamp`, `city`, `age`, `sex`)PARTITION BY LIST(`user_id`, `city`)(PARTITION `p1_city` VALUES IN (("1", "Beijing"), ("1", "Shanghai")),PARTITION `p2_city` VALUES IN (("2", "Beijing"), ("2", "Shanghai")),PARTITION `p3_city` VALUES IN (("3", "Beijing"), ("3", "Shanghai")))DISTRIBUTED BY HASH(`user_id`) BUCKETS 16PROPERTIES("replication_num" = "3");
user_id(INT 类型) 和 city(VARCHAR 类型) 作为分区列。以上示例最终得到的分区如下:show partitions from expamle_list_multi_partiton_key_tbl;
* p1_city: [("1", "Beijing"), ("1", "Shanghai")]* p2_city: [("2", "Beijing"), ("2", "Shanghai")]* p3_city: [("3", "Beijing"), ("3", "Shanghai")]
* 数据 ---> 分区* 1, Beijing ---> p1_city* 1, Shanghai ---> p1_city* 2, Shanghai ---> p2_city* 3, Beijing ---> p3_city* 1, Tianjin ---> 无法导入* 4, Beijing ---> 无法导入
insert into expamle_list_multi_partiton_key_tbl values (1, '2017-01-01', '2017-01-01 12:00:05', 'Beijing', 25, 1, '2017-01-01 12:00:05', 100, 30, 10);show partitions from expamle_list_multi_partiton_key_tbl\\G
DISTRIBUTED ... 语句描述的是数据在各个分区内的划分规则。如果不使用 Partition,则描述的是对整个表的数据的划分规则。ADD PARTITION),可以单独指定新分区的 Bucket 数量。可以利用这个功能方便的应对数据缩小或膨胀。SHOW DATA 命令查看,结果除以副本数,即表的数据量。Failed to create partition [xxx] . TimeoutFailed to create partition,因为如前文所述,Doris 会为没有指定 Partition 的表创建一个不可更改的默认的 Partition。
当遇到这个错误时,通常是 BE 在创建数据分片时遇到了问题。可以参照以下步骤排查:Failed to create partition 日志。在该日志中,会出现一系列类似 {10001-10010} 字样的数字对。数字对的第一个数字表示 Backend ID,第二个数字表示 Tablet ID。如上这个数字对,表示 ID 为 10001 的 Backend 上,创建 ID 为 10010 的 Tablet 失败了。Too many open files。打开的文件句柄数超过了 Linux 系统限制。需修改 Linux 系统的句柄数限制。tablet_create_timeout_second=xxx 以及 max_create_table_timeout_second=xxx 来延长超时时间。其中 tablet_create_timeout_second 默认是1秒,max_create_table_timeout_second 默认是60秒,总体的超时时间为 min(tablet_create_timeout_second * replication_num, max_create_table_timeout_second),具体参数设置参见 FE 配置项 。max_dynamic_partition_num和max_multi_partition_num参数,分别对自动分区创建分区和批量创建分区的数量进行了限制。replication_allocation和buckets参数。dynamic_partition.replication_allocation和dynamic_partition.buckets参数。dynamic_partition.buckets,修改后新自动分区创建的bucket分桶数量创建分区,对于已创建的分区不生效。文档反馈