SELECT 语句:从零个或多个表中检索数据行。[ WITH with_query [, ...] ]SELECT [ ALL | DISTINCT ] select_expression [, ...][ FROM from_item [, ...] ][ WHERE condition ][ GROUP BY [ ALL | DISTINCT ] grouping_element [, ...] ][ HAVING condition ][ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] select ][ ORDER BY expression [ ASC | DESC ] [ NULLS FIRST | NULLS LAST] [, ...] ][ LIMIT [ count | ALL ] ]
with_query 语法如下:subquery_table_name [ ( column_name [, ...] ) ] AS (subquery)
[ INNER ] JOINLEFT [ OUTER ] JOINRIGHT [ OUTER ] JOINFULL [ OUTER ] JOINCROSS JOINON join_condition,如果使用 join_condition,您可以为多个表中的联接键指定列名称;如果使用 join_column,则要求 join_column 在两个表中都存在。UNION、INTERSECT 和 EXCEPT 将多个结果组合在一起,UNION 将第一个查询生成的行与第二个查询生成的行组合在一起。为了消除重复,UNION 构建一个散列表,这会消耗内存。为了更好的性能,建议使用 UNION ALL。INTERSECT 仅返回第一个和第二个查询的结果中存在的行。EXCEPT 返回第一个查询结果中的行,不包括第二个查询找到的行。expression 对结果集进行排序。然后,第二个 expression 应用于具有第一个表达式中的匹配值的行,以此类推。WITH x AS (SELECT a, MAX(b) AS b FROM t GROUP BY a)SELECT a, b FROM x;
WITHt1 AS (SELECT a, MAX(b) AS b FROM x GROUP BY a),t2 AS (SELECT a, AVG(d) AS d FROM y GROUP BY a)SELECT t1.*, t2.*FROM t1JOIN t2 ON t1.a = t2.a;
SELECT count(*), nationkey FROM customer GROUP BY 2;
SELECT count(*), nationkey FROM customer GROUP BY nationkey;
SELECT count(*) FROM customer GROUP BY mktsegment;
SELECT origin_state, origin_zip, destination_state, sum(package_weight)FROM shippingGROUP BY GROUPING SETS ((origin_state),(origin_state, origin_zip),(destination_state));
SELECT origin_state, NULL, NULL, sum(package_weight)FROM shipping GROUP BY origin_stateUNION ALLSELECT origin_state, origin_zip, NULL, sum(package_weight)FROM shipping GROUP BY origin_state, origin_zipUNION ALLSELECT NULL, NULL, destination_state, sum(package_weight)FROM shipping GROUP BY destination_state;
SELECT count(*), mktsegment, nationkey,CAST(sum(acctbal) AS bigint) AS totalbalFROM customerGROUP BY mktsegment, nationkeyHAVING sum(acctbal) > 5700000ORDER BY totalbal DESC;
SELECT nameFROM nationWHERE regionkey IN (SELECT regionkey FROM region)
SELECT column_name(s)FROM table_nameWHERE EXISTS(SELECT column_name FROM table_name WHERE condition)
SELECT *FROM table_1JOIN table_2USING (key_A, key_B)
SELECT *FROM (VALUES(1, 3, 10),(2, 4, 20)) AS table_1 (key_A, key_B, y1)LEFT JOIN (VALUES(1, 3, 100),(2, 4, 200)) AS table_2 (key_A, key_B, y2)USING (key_A, key_B);
SELECT *FROM nationCROSS JOIN region
SELECT orderdate FROM orders LIMIT 5
语法:ORDER BY expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [, ...]
SELECT name, age FROM person ORDER BY age
SELECT * FROM studentORDER BY student_id
SELECT * FROM studentORDER BY student_id,student_name
SELECT * FROM (VALUES 13, 42)EXCEPTSELECT 13
SELECT * FROM (VALUES 13, 42)INTERSECTSELECT 1
UNION ALL运算符。SELECT 13UNIONSELECT 42
SELECT id FROM aUNION ALLSELECT id FROM b;
BERNOULLI:选择每一行作为表样本,概率为样本百分比。使用 Bernoulli 方法对表进行采样时,将扫描表的所有物理块并跳过某些行(基于采样百分比与运行时计算的随机值之间的比较)。结果中包含一行的概率独立于任何其他行。这不会减少从磁盘读取采样表所需的时间。如果进一步处理采样输出,可能会影响总查询时间。SYSTEM:这种采样方法将表划分为逻辑数据段,并以此粒度对表进行采样。此采样方法要么从特定数据段中选择所有行,要么跳过它(基于采样百分比和运行时计算的随机值之间的比较)。在系统采样中选择的行将取决于所使用的连接器。例如,当与 Hive 一起使用时,它取决于数据在 HDFS 上的布局方式。这种方法不能保证独立的抽样概率。SELECT *FROM users TABLESAMPLE BERNOULLI (50);
SELECT *FROM users TABLESAMPLE SYSTEM (75);
SELECT * FROM personPIVOT (SUM(age) AS a, AVG(class) AS cFOR name IN ('John' AS john, 'Mike' AS mike));
LATERAL VIEW [ OUTER ] generator_function ( expression [ , ... ] ) [ table_alias ] AS column_alias [ , ... ]
Select 'dlc''test'
CREATE EXTERNAL TABLE IF NOT EXISTS `csv_test_2222` (`_c0` STRING,`_c1` INTEGER,`_c2` INTEGER,`_c3` INTEGER)ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' WITH SERDEPROPERTIES ('separatorChar' = '''','quoteChar' = '''')STORED AS `textfile`LOCATION 'cosn://dlc-nj-1258469122/csv/100M/
文档反馈