ASCII(<str> string)
> SELECT ascii('222');50
-- SparkSQLBASE64(<str> string|binary)-- PrestoBASE64(<str> binary)
> SELECT base64('tencent');dGVuY2VudA==
BIT_LENGTH(<expr> string|binary)
> select bit_length('tencent');56> select bit_length(binary('tencent'));56
CHAR(<expr> integer)
> SELECT char(65);A
CHR(<expr> integer)
> SELECT chr(65);A
CHAR_LENGTH(<expr> string|binary)
> select char_length(binary('tencent'));7> select char_length('tencent');7
CHARACTER_LENGTH(<expr> string|binary)
> select character_length(binary('tencent')); 7 > select character_length('tencent'); 7
CONCAT_WS(<sep> string[, <s> string|array<string>}]+)
> SELECT concat_ws(' ', 'tencent', 'dlc');tencent dlc
DECODE(<expr> binary|string, <charset>string)
> SELECT decode(encode('abc', 'utf-8'), 'utf-8');abc
ELT(<n> integer, <s1> string, <s2> string, ...)
> SELECT elt(1, 'scala', 'java');scala
ENCODE(<expr> binary|string, <charset> string)
> SELECT encode('abc', 'utf-8');abc> SELECT encode(x'616263', 'utf-8');abc
FIND_IN_SET(<str> string, <str_array> string)
> SELECT find_in_set('ab','abc,b,ab,c,def');3
FORMAT_NUMBER(<expr1> integer|double|decimal, <expr2> string|integer)
> SELECT format_number(12332.123456, 4);12,332.1235> SELECT format_number(12332.123456, '##################.###');12332.123
FORMAT_STRING(<str> string, obj <T>, ...)
> SELECT format_string("Hello World %d %s", 100, "days");Hello World 100 days
INITCAP(<str> string)
> SELECT initcap('sPark sql');Spark Sql
INSTR(<str> string, <substr> string)
> SELECT instr('SparkSQL', 'SQL');6
LCASE(<str> string)
> SELECT lcase('SparkSQL');sparksql
LENGTH(<expr> string|binary)
> SELECT length('Spark SQL ');10
LEVENSHTEIN(<s1> string, <s2> string)
> SELECT levenshtein('kitten', 'sitting');3
LIKE(<s1> str, <s2> pattern)<str> like <pattern>[ ESCAPE <escape>]
> SELECT like('Spark', '_park');true> SET spark.sql.parser.escapedStringLiterals=true;spark.sql.parser.escapedStringLiterals true> SELECT '%SystemDrive%\\Users\\John' like '\\%SystemDrive\\%\\\\Users%';true> SET spark.sql.parser.escapedStringLiterals=false;spark.sql.parser.escapedStringLiterals false> SELECT '%SystemDrive%\\\\Users\\\\John' like '\\%SystemDrive\\%\\\\\\\\Users%';false> SELECT '%SystemDrive%/Users/John' like '/%SystemDrive/%//Users%' ESCAPE '/';true
LOWER(<str> string)
> SELECT lower('TENCENT');tencent
LOCATE(<substr> string, <str> string[, <pos> integer])
> SELECT locate('bar', 'foobarbar');4> SELECT locate('bar', 'foobarbar', 5);7
OCTET_LENGTH(<expr> string|binary)
> SELECT octet_length('Spark SQL');9
LPAD(<str> string, <len> integer[, <pad> string])
> SELECT lpad('hi', 5, '??');???hi> SELECT lpad('hi', 1, '??');h> SELECT lpad('hi', 5);hi
LTRIM(<str> string)
> SELECT ltrim(' SparkSQL ');SparkSQL
PARSE_URL(<url> string, <path> string[, <key> string])
> SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST');spark.apache.org> SELECT parse_url('http://spark.apache.org/path?query=1', 'QUERY');query=1> SELECT parse_url('http://spark.apache.org/path?query=1', 'QUERY', 'query');1
POSITION(<substr> string, <str> string[, <pos> integer])
> SELECT position('bar', 'foobarbar');4> SELECT position('bar', 'foobarbar', 5);7> SELECT POSITION('bar' IN 'foobarbar');4
PRINTF(<str> string, obj <T>, ...)
> SELECT printf("Hello World %d %s", 100, "days");Hello World 100 days
REPEAT(<str> string, <n> integer)
> SELECT repeat('123', 2);123123
REPLACE(<str> string, <search> string[, <replace> string])
> SELECT replace('ABCabc', 'abc', 'DEF');ABCDEF
OVERLAY(<input> string, <replace> string, <pos> integer[, <len> integer])
> SELECT overlay('Spark SQL' PLACING '_' FROM 6);Spark_SQL> SELECT overlay('Spark SQL' PLACING 'CORE' FROM 7);Spark CORE> SELECT overlay('Spark SQL' PLACING 'ANSI ' FROM 7 FOR 0);Spark ANSI SQL> SELECT overlay('Spark SQL' PLACING 'tructured' FROM 2 FOR 4);Structured SQL> SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('_', 'utf-8') FROM 6);Spark_SQL> SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('CORE', 'utf-8') FROM 7);Spark CORE> SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('ANSI ', 'utf-8') FROM 7 FOR 0);Spark ANSI SQL> SELECT overlay(encode('Spark SQL', 'utf-8') PLACING encode('tructured', 'utf-8') FROM 2 FOR 4);Structured SQL
RPAD(<str> string, <len> integer[, <pad> string])
> SELECT rpad('hi', 5, '??');hi???> SELECT rpad('hi', 1, '??');h> SELECT rpad('hi', 5);hi
RTRIM(<str> string)
> SELECT rtrim(' SparkSQL ');SparkSQL
SENTENCES(<str> string[, <lang> string, <country> string])
> SELECT sentences('Hi there! Good morning.');[["Hi","there"],["Good","morning"]]
SOUNDEX(<str> string)
> SELECT soundex('Miller');M460
SPACE(<n> integer)
> SELECT concat(space(2), '1');1
SPLIT(<str> string, <regex> string, <limit> integer)
> SELECT split('oneAtwoBthreeC', '[ABC]');["one","two","three",""]> SELECT split('oneAtwoBthreeC', '[ABC]', -1);["one","two","three",""]> SELECT split('oneAtwoBthreeC', '[ABC]', 2);["one","twoBthreeC"]
SUBSTRING(<str> string, <pos> integer[, <len> integer])SUBSTRING(<str> FROM <pos>[ FOR <len>])
> SELECT substring('Spark SQL', 5);k SQL> SELECT substring('Spark SQL', -3);SQL> SELECT substring('Spark SQL', 5, 1);k> SELECT substring('Spark SQL' FROM 5);k SQL> SELECT substring('Spark SQL' FROM -3);SQL> SELECT substring('Spark SQL' FROM 5 FOR 1);k
SUBSTR(<str> string, <pos> integer[, <len> integer])SUBSTR(<str> FROM <pos>[ FOR <len>])
> SELECT substr('Spark SQL', 5);k SQL> SELECT substr('Spark SQL', -3);SQL> SELECT substr('Spark SQL', 5, 1);k> SELECT substr('Spark SQL' FROM 5);k SQL> SELECT substr('Spark SQL' FROM -3);SQL> SELECT substr('Spark SQL' FROM 5 FOR 1);k
LEFT(<str> string, <len> integer)
> SELECT left('tencent', 3);ten
RIGHT(<str> string, <len> integer)
> SELECT left('tencent', 3);ten
SUBSTRING_INDEX(<str> string, <delim> string, <count> integer)
> SELECT substring_index('cloud.tencent.com', '.', 2);cloud.tencent
TRANSLATE(<input> string, <from> string, <to> string)
> SELECT translate('AaBbCc', 'abc', '123');A1B2C3
TRIM(<str> string)trim(BOTH FROM str)trim(LEADING FROM str)trim(TRAILING FROM str)trim(trimStr FROM str)trim(BOTH trimStr FROM str)trim(LEADING trimStr FROM str)trim(TRAILING trimStr FROM str)
> SELECT trim(' SparkSQL ');SparkSQL> SELECT trim(BOTH FROM ' SparkSQL ');SparkSQL> SELECT trim(LEADING FROM ' SparkSQL ');SparkSQL> SELECT trim(TRAILING FROM ' SparkSQL ');SparkSQL> SELECT trim('SL' FROM 'SSparkSQLS');parkSQ> SELECT trim(BOTH 'SL' FROM 'SSparkSQLS');parkSQ> SELECT trim(LEADING 'SL' FROM 'SSparkSQLS');parkSQLS> SELECT trim(TRAILING 'SL' FROM 'SSparkSQLS');SSparkSQ
BTRIM(<str> string[, <trimStr> string])
> SELECT btrim(' SparkSQL ');SparkSQL> SELECT btrim(encode(' SparkSQL ', 'utf-8'));SparkSQL> SELECT btrim('SSparkSQLS', 'SL');parkSQ> SELECT btrim(encode('SSparkSQLS', 'utf-8'), encode('SL', 'utf-8'));parkSQ
UCASE(<str> string)
> SELECT ucase('SparkSQL');SPARKSQL
UNBASE64(<str> string)
> SELECT unbase64('U3BhcmsgU1FM');Spark SQL
UNHEX(<str> string)
> select unhex('74656E63656E74');tencent
UPPER(<str> string)
> SELECT upper('tencent');TENCENT
UUID()
> SELECT uuid();46707d92-02f4-4817-8116-a4c3b23e6266
XPATH(<xml> string, <xpath> string)
> SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');["b1","b2","b3"]
XPATH_BOOLEAN(<xml> string, <xpath> string)
> SELECT xpath_boolean('<a><b>1</b></a>','a/b');true
XPATH_DOUBLE(<xml> string, <xpath> string)
> SELECT xpath_double('<a><b>1</b><b>2</b></a>', 'sum(a/b)');3.0
XPATH_NUMBER(<xml> string, <xpath> string)
> SELECT xpath_number('<a><b>1</b><b>2</b></a>', 'sum(a/b)');3.0
XPATH_FLOAT(<xml> string, <xpath> string)
> SELECT xpath_float('<a><b>1</b><b>2</b></a>', 'sum(a/b)');3.0
XPATH_INT(<xml> string, <xpath> string)
> SELECT xpath_int('<a><b>1</b><b>2</b></a>', 'sum(a/b)');3
XPATH_LONG(<xml> string, <xpath> string)
> SELECT xpath_long('<a><b>1</b><b>2</b></a>', 'sum(a/b)');3
XPATH_SHORT(<xml> string, <xpath> string)
> SELECT xpath_short('<a><b>1</b><b>2</b></a>', 'sum(a/b)');3
XPATH_STRING(<xml> string, <xpath> string)
> SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c');cc
REGEXP_EXTRACT(<str> string, <regexp> string[, <idx> integer])
> SELECT regexp_extract('100-200', '(\\\\d+)-(\\\\d+)', 1);100
REGEXP_EXTRACT_ALL(<str> string, <regexp> string[, <idx> integer])
> SELECT regexp_extract_all('100-200, 300-400', '(\\\\d+)-(\\\\d+)', 1);["100","300"]
REGEXP_REPLACE(<str> string, <regexp> string, <rep> string[, <position> integer])
> SELECT regexp_replace('100-200', '(\\\\d+)', 'num');num-num
REGEXP_LIKE(<str> string, <regexp> string)
> SELECT regexp_like('%SystemDrive%\\Users\\John', '%SystemDrive%\\Users.*');true
REGEXP(<str> string, <regexp> string)
> SELECT regexp('%SystemDrive%\\Users\\John', '%SystemDrive%\\Users.*');true
CONCAT(<s1> string, <s2> string, ...)
> SELECT concat('Spark', 'SQL');SparkSQL
str_to_map(<text> string[, <pairDelim> string[, <keyValueDelim> string]])
> SELECT str_to_map('a:1,b:2,c:3', ',', ':');{"a":"1","b":"2","c":"3"}> SELECT str_to_map('a');{"a":null}
REVERSE(<str> string)
> SELECT reverse('Spark SQL');LQS krapS
RLIKE(<str> string, <regexp> string)
> SELECT rlike('%SystemDrive%\\Users\\John', '%SystemDrive%\\Users.*');true
FROM_CSV(<cvsStr> string, <schema> string, <options> map<string, string>)
> SELECT from_csv('1, 0.8', 'a INT, b DOUBLE');{"a":1,"b":0.8}> SELECT from_csv('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));{"time":2015-08-26 00:00:00}
SCHEMA_OF_CSV(<csvStr> string[, options map<string, string>])
> SELECT schema_of_csv('1,abc');STRUCT<`_c0`: INT, `_c1`: STRING>
TO_CSV(<expr> struct[, <options> map<string, string>])
> SELECT to_csv(named_struct('a', 1, 'b', 2));1,2> SELECT to_csv(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'));26/08/2015
NGRAMS(<a> array<array<string>>, <N> integer, <K> integer, <pf> integer)
CONTEXT_NGRAMS((array <array <string>>, array <string>, int, int))
文档反馈