create database tpch;use tpch;drop table if exists customer;create tablecustomer(c_custkeybigint(20) not null,c_namevarchar(25) not null,c_addressvarchar(40) not null,c_nationkeybigint(20) not null,c_phonechar(15) not null,c_acctbaldecimal(15,2) not null,c_mktsegmentchar(10) not null,c_commentvarchar(117) not null,primary key (c_custkey)) engine=innodb default charset=utf8mb4 collate=utf8mb4_bin;drop table if exists lineitem;create tablelineitem(l_orderkeybigint(20) not null,l_partkeybigint(20) not null,l_suppkeybigint(20) not null,l_linenumberbigint(20) not null,l_quantitydecimal(15,2) not null,l_extendedpricedecimal(15,2) not null,l_discountdecimal(15,2) not null,l_taxdecimal(15,2) not null,l_returnflagchar(1) not null,l_linestatuschar(1) not null,l_shipdatedate not null,l_commitdatedate not null,l_receiptdatedate not null,l_shipinstructchar(25) not null,l_shipmodechar(10) not null,l_commentvarchar(44) not null,primary key (l_orderkey,l_linenumber)) engine=innodb default charset=utf8mb4 collate=utf8mb4_bin;drop table if exists nation;create tablenation(n_nationkeybigint(20) not null,n_namechar(25) not null,n_regionkeybigint(20) not null,n_commentvarchar(152) default null,primary key (n_nationkey)) engine=innodb default charset=utf8mb4 collate=utf8mb4_bin;drop table if exists orders;create tableorders(o_orderkeybigint(20) not null,o_custkeybigint(20) not null,o_orderstatuschar(1) not null,o_totalpricedecimal(15,2) not null,o_orderdatedate not null,o_orderprioritychar(15) not null,o_clerkchar(15) not null,o_shipprioritybigint(20) not null,o_commentvarchar(79) not null,primary key (o_orderkey)) engine=innodb default charset=utf8mb4 collate=utf8mb4_bin;drop table if exists part;create tablepart(p_partkeybigint(20) not null,p_namevarchar(55) not null,p_mfgrchar(25) not null,p_brandchar(10) not null,p_typevarchar(25) not null,p_sizebigint(20) not null,p_containerchar(10) not null,p_retailpricedecimal(15,2) not null,p_commentvarchar(23) not null,primary key (p_partkey)) engine=innodb default charset=utf8mb4 collate=utf8mb4_bin;drop table if exists partsupp;create tablepartsupp(ps_partkeybigint(20) not null,ps_suppkeybigint(20) not null,ps_availqtybigint(20) not null,ps_supplycostdecimal(15,2) not null,ps_commentvarchar(199) not null,primary key (ps_partkey,ps_suppkey)) engine=innodb default charset=utf8mb4 collate=utf8mb4_bin;drop table if exists region;create tableregion(r_regionkeybigint(20) not null,r_namechar(25) not null,r_commentvarchar(152) default null,primary key (r_regionkey)) engine=innodb default charset=utf8mb4 collate=utf8mb4_bin;drop table if exists supplier;create tablesupplier(s_suppkeybigint(20) not null,s_namechar(25) not null,s_addressvarchar(40) not null,s_nationkeybigint(20) not null,s_phonechar(15) not null,s_acctbaldecimal(15,2) not null,s_commentvarchar(101) not null,primary key (s_suppkey)) engine=innodb default charset=utf8mb4 collate=utf8mb4_bin;## Viewcreate view revenue0 (supplier_no, total_revenue) asselect l_suppkey, sum(l_extendedprice * (1 - l_discount))fromlineitemwherel_shipdate >= date '1995-02-01'and l_shipdate < date '1995-02-01' + interval '3' monthgroup byl_suppkey;
unzip TPC-H_Tools_v3.0.0.zipcd TPC-H_Tools_v3.0.0/dbgenmake
scale=100chunk=10for i inseq1$chunkdo./dbgen -s $scale -C $chunk -S $i -fdone
Table Name | Number of Records |
customer | 15000000 |
lineitem | 600037902 |
nation | 25 |
orders | 150000000 |
part | 20000000 |
partsupp | 80000000 |
region | 5 |
supplier | 1000000 |
ls *.tblcustomer.tbl lineitem.tbl nation.tbl orders.tbl partsupp.tbl part.tbl region.tbl supplier.tbl
cd /data/tpchdataHOST=172.16.0.22PORT=3306USER=rootPassword=xxxxxDATABASE=tpchls *.tbl* | while read filenamedotablename=echo$filename|awk-F'.tbl''{print $1}'mysql -u${USER} -h${HOST} -P${PORT} -p${PASSWORD} ${DATABASE} -e "LOAD DATA LOCAL INFILE '${filename}' INTO TABLE ${tablename} FIELDS TERMINATED BY '|' LINES TERMINATED BY '\\n';"done
mysql -uroot -p'' -h10.1.1.3 -P3306 -cshow replication tables;
ANALYZE TABLE customer;ANALYZE TABLE lineitem;ANALYZE TABLE nation;ANALYZE TABLE orders;ANALYZE TABLE part;ANALYZE TABLE partsupp;ANALYZE TABLE region;ANALYZE TABLE supplier;
# Q1select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, count(*) as count_order from lineitem where l_shipdate <= date_sub('1998-12-01', interval 108 day) group by l_returnflag, l_linestatus order by l_returnflag, l_linestatus;# Q2select /*+ SET_VAR(libra_enable_runtime_filter=1) SET_VAR(libra_enable_cost_based_runtime_filter=0) PX_JOIN_FILTER_ID(0[BLOOM_FILTER],2[BLOOM_FILTER]) */ s_acctbal, s_name, n_name, p_partkey, p_mfgr, s_address, s_phone, s_comment from part, supplier, partsupp, nation, region where p_partkey = ps_partkey and s_suppkey = ps_suppkey and p_size = 30 and p_type like '%STEEL'and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'ASIA'and ps_supplycost = (select min(ps_supplycost) from partsupp, supplier, nation, region where p_partkey = ps_partkey and s_suppkey = ps_suppkey and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'ASIA') order by s_acctbal desc, n_name, s_name, p_partkey limit 100;# Q3select l_orderkey, sum(l_extendedprice * (1 - l_discount)) as revenue, o_orderdate, o_shippriority from customer, orders, lineitem where c_mktsegment = 'AUTOMOBILE' and c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate < '1995-03-13' and l_shipdate > '1995-03-13' group by l_orderkey, o_orderdate, o_shippriority order by revenue desc, o_orderdate limit 10;# Q4select o_orderpriority, count(*) as order_count from orders where o_orderdate >= '1995-01-01' and o_orderdate < date_add('1995-01-01', interval '3' month) and exists ( select * from lineitem where l_orderkey = o_orderkey and l_commitdate < l_receiptdate ) group by o_orderpriority order by o_orderpriority;# Q5select /*+ SET_VAR(libra_enable_runtime_filter=1) SET_VAR(libra_enable_cost_based_runtime_filter=0) PX_JOIN_FILTER_ID(3[BLOOM_FILTER],4[BLOOM_FILTER]) */ n_name, sum(l_extendedprice * (1 - l_discount)) as revenue from customer, orders, lineitem, supplier, nation, region where c_custkey = o_custkey and l_orderkey = o_orderkey and l_suppkey = s_suppkey and c_nationkey = s_nationkey and s_nationkey = n_nationkey and n_regionkey = r_regionkey and r_name = 'MIDDLE EAST' and o_orderdate >= '1994-01-01' and o_orderdate < date_add('1994-01-01', interval '1' year) group by n_name order by revenue desc;# Q6select sum(l_extendedprice * l_discount) as revenue from lineitem where l_shipdate >= '1994-01-01' and l_shipdate < date_add('1994-01-01', interval '1' year) and l_discount between 0.06 - 0.01 and 0.06 + 0.01 and l_quantity < 24;# Q7select /*+ SET_VAR(libra_enable_runtime_filter=1) SET_VAR(libra_enable_cost_based_runtime_filter=0) PX_JOIN_FILTER_ID(1[BLOOM_FILTER], 3[BLOOM_FILTER]) */ supp_nation, cust_nation, l_year, sum(volume) as revenue from ( select /*+ leading((n1,supplier),lineitem,((n2,customer),orders)) */ n1.n_name as supp_nation, n2.n_name as cust_nation, extract(year from l_shipdate) as l_year, l_extendedprice * (1 - l_discount) as volume from supplier, lineitem, orders, customer, nation n1, nation n2 where s_suppkey = l_suppkey and o_orderkey = l_orderkey and c_custkey = o_custkey and s_nationkey = n1.n_nationkey and c_nationkey = n2.n_nationkey and ( (n1.n_name = 'JAPAN' and n2.n_name = 'INDIA') or (n1.n_name = 'INDIA' and n2.n_name = 'JAPAN') ) and l_shipdate between '1995-01-01' and '1996-12-31' ) as shipping group by supp_nation, cust_nation, l_year order by supp_nation, cust_nation, l_year;# Q8select /*+ SET_VAR(libra_enable_runtime_filter=1) SET_VAR(libra_enable_cost_based_runtime_filter=0) PX_JOIN_FILTER_ID(1[BLOOM_FILTER], 2[BLOOM_FILTER],5[BLOOM_FILTER]) */ o_year, sum(case when nation = 'INDIA' then volume else 0 end) / sum(volume) as mkt_share from ( select /*+ leading(region,n1,customer,orders,(part,lineitem),(supplier,n2)) */ extract(year from o_orderdate) as o_year, l_extendedprice * (1 - l_discount) as volume, n2.n_name as nation from part, supplier, lineitem, orders, customer, nation n1, nation n2, region where p_partkey = l_partkey and s_suppkey = l_suppkey and l_orderkey = o_orderkey and o_custkey = c_custkey and c_nationkey = n1.n_nationkey and n1.n_regionkey = r_regionkey and r_name = 'ASIA' and s_nationkey = n2.n_nationkey and o_orderdate between '1995-01-01' and '1996-12-31' and p_type = 'SMALL PLATED COPPER' ) as all_nations group by o_year order by o_year;# Q9select /*+ SET_VAR(libra_enable_runtime_filter=1) SET_VAR(libra_enable_cost_based_runtime_filter=0) PX_JOIN_FILTER_ID(4[BLOOM_FILTER]) JOIN_FILTER_NDV_AMP_RATIO(4:0.1) */ nation, o_year, sum(amount) as sum_profit from ( select /*+ hash_join_probe(orders) */n_name as nation, extract(year from o_orderdate) as o_year, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount from part, supplier, lineitem, partsupp, orders, nation where s_suppkey = l_suppkey and ps_suppkey = l_suppkey and ps_partkey = l_partkey and p_partkey = l_partkey and o_orderkey = l_orderkey and s_nationkey = n_nationkey and p_name like '%dim%' ) as profit group by nation, o_year order by nation, o_year desc;# Q10select /*+ HASH_JOIN_PROBE((nation,customer)), leading(nation, customer,(orders, lineitem)) */ c_custkey, c_name, sum(l_extendedprice * (1 - l_discount)) as revenue, c_acctbal, n_name, c_address, c_phone, c_comment from customer, orders, lineitem, nation where c_custkey = o_custkey and l_orderkey = o_orderkey and o_orderdate >= '1993-08-01' and o_orderdate < date_add('1993-08-01', interval '3' month) and l_returnflag = 'R' and c_nationkey = n_nationkey group by c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment order by revenue desc limit 20;# Q11select ps_partkey, sum(ps_supplycost * ps_availqty) as value from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'MOZAMBIQUE' group by ps_partkey having sum(ps_supplycost * ps_availqty) > ( select sum(ps_supplycost * ps_availqty) * 0.0001000000 from partsupp, supplier, nation where ps_suppkey = s_suppkey and s_nationkey = n_nationkey and n_name = 'MOZAMBIQUE' ) order by value desc;# Q12select /*+ SET_VAR(libra_enable_runtime_filter=1) SET_VAR(libra_enable_cost_based_runtime_filter=0) PX_JOIN_FILTER_ID(0[BLOOM_FILTER]) */ l_shipmode, sum(case when o_orderpriority = '1-URGENT' or o_orderpriority = '2-HIGH' then 1 else 0 end) as high_line_count, sum(case when o_orderpriority <> '1-URGENT' and o_orderpriority <> '2-HIGH' then 1 else 0 end) as low_line_count from orders, lineitem where o_orderkey = l_orderkey and l_shipmode in ('RAIL', 'FOB') and l_commitdate < l_receiptdate and l_shipdate < l_commitdate and l_receiptdate >= '1997-01-01' and l_receiptdate < date_add('1997-01-01', interval '1' year) group by l_shipmode order by l_shipmode;# Q13select c_count, count(*) as custdist from (select c_custkey, count(o_orderkey) as c_count from customer left outer join orders on c_custkey = o_custkey and o_comment not like '%pending%deposits%'group by c_custkey ) c_orders group by c_count order by custdist desc, c_count desc;# Q14select 100.00 * sum(case when p_type like 'PROMO%'then l_extendedprice * (1 - l_discount) else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue from lineitem, part where l_partkey = p_partkey and l_shipdate >= '1996-12-01'and l_shipdate < date_add('1996-12-01', interval '1' month);# Q15select s_suppkey, s_name, s_address, s_phone, total_revenue from supplier, revenue0 where s_suppkey = supplier_no and total_revenue = (select max(total_revenue) from revenue0 ) order by s_suppkey;# Q16select p_brand, p_type, p_size, count(distinct ps_suppkey) as supplier_cnt from partsupp, part where p_partkey = ps_partkey and p_brand <> 'Brand#34'and p_type not like 'LARGE BRUSHED%'and p_size in (48, 19, 12, 4, 41, 7, 21, 39) and ps_suppkey not in (select s_suppkey from supplier where s_comment like '%Customer%Complaints%') group by p_brand, p_type, p_size order by supplier_cnt desc, p_brand, p_type, p_size;# Q17select /*+ SET_VAR(libra_enable_runtime_filter=1) SET_VAR(libra_enable_cost_based_runtime_filter=0) PX_JOIN_FILTER_ID(0[BLOOM_FILTER],1[BLOOM_FILTER]) */ sum(l_extendedprice) / 7.0 as avg_yearly from lineitem, part where p_partkey = l_partkey and p_brand = 'Brand#44'and p_container = 'WRAP PKG'and l_quantity < (select 0.2 * avg(l_quantity) from lineitem where l_partkey = p_partkey );# Q18select /*+ PX_JOIN_FILTER_ID(0,1,2) leading(customer, (orders, lineitem@sel_2)) hash_join_probe(customer) JOIN_FILTER_NDV_AMP_RATIO(0:0.00001) JOIN_FILTER_NDV_AMP_RATIO(2:0.00001) JOIN_FILTER_NDV_AMP_RATIO(1:0.0001) */ c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, sum(l_quantity) from customer, orders, lineitem where o_orderkey in (select l_orderkey from lineitem group by l_orderkey having sum(l_quantity) > 314 ) and c_custkey = o_custkey and o_orderkey = l_orderkey group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice order by o_totalprice desc, o_orderdate limit 100;# Q19select /*+ SET_VAR(libra_enable_runtime_filter=1) SET_VAR(libra_enable_cost_based_runtime_filter=0) PX_JOIN_FILTER_ID(0[BLOOM_FILTER]) */ sum(l_extendedprice* (1 - l_discount)) as revenue from lineitem, part where (p_partkey = l_partkey and p_brand = 'Brand#52'and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') and l_quantity >= 4 and l_quantity <= 4 + 10 and p_size between 1 and 5 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON') or (p_partkey = l_partkey and p_brand = 'Brand#11'and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') and l_quantity >= 18 and l_quantity <= 18 + 10 and p_size between 1 and 10 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON') or (p_partkey = l_partkey and p_brand = 'Brand#51'and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') and l_quantity >= 29 and l_quantity <= 29 + 10 and p_size between 1 and 15 and l_shipmode in ('AIR', 'AIR REG') and l_shipinstruct = 'DELIVER IN PERSON');# Q20select s_name, s_address from supplier, nation where s_suppkey in (select ps_suppkey from partsupp where ps_partkey in (select p_partkey from part where p_name like 'green%') and ps_availqty > (select 0.5 * sum(l_quantity) from lineitem where l_partkey = ps_partkey and l_suppkey = ps_suppkey and l_shipdate >= '1993-01-01'and l_shipdate < date_add('1993-01-01', interval '1' year) ) ) and s_nationkey = n_nationkey and n_name = 'ALGERIA'order by s_name;# Q21select /*+ SET_VAR(libra_enable_runtime_filter=1) SET_VAR(libra_enable_cost_based_runtime_filter=0) JOIN_FILTER_NDV_AMP_RATIO(0:0.5) JOIN_FILTER_NDV_AMP_RATIO(1:0.5) PX_JOIN_FILTER_ID(3[BLOOM_FILTER],1,0) */ s_name, count(*) as numwait from supplier, lineitem l1, orders, nation where s_suppkey = l1.l_suppkey and o_orderkey = l1.l_orderkey and o_orderstatus = 'F' and l1.l_receiptdate > l1.l_commitdate and exists ( select * from lineitem l2 where l2.l_orderkey = l1.l_orderkey and l2.l_suppkey <> l1.l_suppkey ) and not exists ( select * from lineitem l3 where l3.l_orderkey = l1.l_orderkey and l3.l_suppkey <> l1.l_suppkey and l3.l_receiptdate > l3.l_commitdate ) and s_nationkey = n_nationkey and n_name = 'EGYPT' group by s_name order by numwait desc, s_name limit 100;# Q22select cntrycode, count(*) as numcust, sum(c_acctbal) as totacctbal from (select substring(c_phone from 1 for 2) as cntrycode, c_acctbal from customer where substring(c_phone from 1 for 2) in ('20', '40', '22', '30', '39', '42', '21') and c_acctbal > (select avg(c_acctbal) from customer where c_acctbal > 0.00 and substring(c_phone from 1 for 2) in ('20', '40', '22', '30', '39', '42', '21') ) and not exists (select * from orders where o_custkey = c_custkey ) ) as custsale group by cntrycode order by cntrycode;
Feedback