[LanguageManual]
DDL
LanguageManual DDL 对 Hive Data Definition Language 操作进行了详细的说明,本文只选择比较常见的操作来介绍。
库操作
创建库
1
2
3
4
| CREATE (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];
|
删除库
1
| DROP (DATABASE|SCHEMA) [IF EXISTS] database_name [RESTRICT|CASCADE];
|
修改库
1
2
3
4
5
| ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, ...); -- (Note: SCHEMA added in Hive 0.14.0)
ALTER (DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role; -- (Note: Hive 0.13.0 and later; SCHEMA added in Hive 0.14.0)
ALTER (DATABASE|SCHEMA) database_name SET LOCATION hdfs_path; -- (Note: Hive 2.2.1, 2.4.0 and later)
|
使用库
1
2
3
4
| USE database_name;
-- 查看当前使用的库
SELECT current_database();
|
表操作
参考了 Hive:Alter Table/Partition/Column ,翻译的还不错。
创建表
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
| CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name -- (Note: TEMPORARY available in Hive 0.14.0 and later)
[(col_name data_type [column_constraint_specification] [COMMENT col_comment], ... [constraint_specification])]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
[SKEWED BY (col_name, col_name, ...) -- (Note: Available in Hive 0.10.0 and later)]
ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
[STORED AS DIRECTORIES]
[
[ROW FORMAT row_format]
[STORED AS file_format]
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -- (Note: Available in Hive 0.6.0 and later)
]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)] -- (Note: Available in Hive 0.6.0 and later)
[AS select_statement]; -- (Note: Available in Hive 0.5.0 and later; not supported for external tables)
|
- data_type:
- primitive_type
- array_type
- map_type
- struct_type
- union_type – (Note: Available in Hive 0.7.0 and later)
- primitive_type:
- TINYINT
- SMALLINT
- INT
- BIGINT
- BOOLEAN
- FLOAT
- DOUBLE
- DOUBLE PRECISION – (Note: Available in Hive 2.2.0 and later)
- STRING
- BINARY – (Note: Available in Hive 0.8.0 and later)
- TIMESTAMP – (Note: Available in Hive 0.8.0 and later)
- DECIMAL – (Note: Available in Hive 0.11.0 and later)
- DECIMAL(precision, scale) – (Note: Available in Hive 0.13.0 and later)
- DATE – (Note: Available in Hive 0.12.0 and later)
- VARCHAR – (Note: Available in Hive 0.12.0 and later)
- CHAR – (Note: Available in Hive 0.13.0 and later)
- array_type
- map_type
- MAP < primitive_type, data_type >
- struct_type
- STRUCT < col_name : data_type [COMMENT col_comment], …>
- union_type
- UNIONTYPE < data_type, data_type, … > – (Note: Available in Hive 0.7.0 and later)
- row_format
- DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char] [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char] [NULL DEFINED AS char] – (Note: Available in Hive 0.13 and later)
- SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, …)]
- file_format:
- SEQUENCEFILE
- TEXTFILE – (Default, depending on hive.default.fileformat configuration)
- RCFILE – (Note: Available in Hive 0.6.0 and later)
- ORC – (Note: Available in Hive 0.11.0 and later)
- PARQUET – (Note: Available in Hive 0.13.0 and later)
- AVRO – (Note: Available in Hive 0.14.0 and later)
- JSONFILE – (Note: Available in Hive 4.0.0 and later)
- INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
- column_constraint_specification:
- [ PRIMARY KEY|UNIQUE|NOT NULL|DEFAULT [default_value]|CHECK [check_expression] ENABLE|DISABLE NOVALIDATE RELY/NORELY ]
- default_value:
- [ LITERAL|CURRENT_USER()|CURRENT_DATE()|CURRENT_TIMESTAMP()|NULL ]
- constraint_specification:
- [, PRIMARY KEY (col_name, …) DISABLE NOVALIDATE RELY/NORELY ] [, PRIMARY KEY (col_name, …) DISABLE NOVALIDATE RELY/NORELY ] [, CONSTRAINT constraint_name FOREIGN KEY (col_name, …) REFERENCES table_name(col_name, …) DISABLE NOVALIDATE [, CONSTRAINT constraint_name UNIQUE (col_name, …) DISABLE NOVALIDATE RELY/NORELY ] [, CONSTRAINT constraint_name CHECK [check_expression] ENABLE|DISABLE NOVALIDATE RELY/NORELY ]
使用 CTAS 创建表
1
2
3
4
5
6
7
| CREATE TABLE new_key_value_store
ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
STORED AS RCFile
AS
SELECT (key % 1024) new_key, concat(key, value) key_value_pair
FROM key_value_store
SORT BY new_key, key_value_pair;
|
复制表结构
1
2
3
| CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];
|
删除表
1
| DROP TABLE [IF EXISTS] table_name [PURGE]; -- (Note: PURGE available in Hive 0.14.0 and later)
|
截断表
1
| TRUNCATE [TABLE] table_name [PARTITION partition_spec];
|
- partition_spec:
- (partition_column = partition_col_value, partition_column = partition_col_value, …)
修改表
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
| -- 重命名表名
ALTER TABLE table_name RENAME TO new_table_name;
-- 修改表属性
ALTER TABLE table_name SET TBLPROPERTIES table_properties;
-- 修改表注释
ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
-- 修改表序列化和反序列化属性
ALTER TABLE table_name [PARTITION partition_spec] SET SERDE serde_class_name [WITH SERDEPROPERTIES serde_properties];
ALTER TABLE table_name [PARTITION partition_spec] SET SERDEPROPERTIES serde_properties;
-- 修改表存储属性
ALTER TABLE table_name CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name, ...)]
INTO num_buckets BUCKETS;
-- 修改表倾斜属性
ALTER TABLE table_name SKEWED BY (col_name1, col_name2, ...)
ON ([(col_name1_value, col_name2_value, ...) [, (col_name1_value, col_name2_value), ...]
[STORED AS DIRECTORIES];
ALTER TABLE table_name SET SKEWED LOCATION (col_name1="location1" [, col_name2="location2", ...] );
ALTER TABLE table_name NOT SKEWED;
ALTER TABLE table_name NOT STORED AS DIRECTORIES;
-- 修改表的约束
ALTER TABLE table_name ADD CONSTRAINT constraint_name PRIMARY KEY (column, ...) DISABLE NOVALIDATE;
ALTER TABLE table_name ADD CONSTRAINT constraint_name FOREIGN KEY (column, ...) REFERENCES table_name(column, ...) DISABLE NOVALIDATE RELY;
ALTER TABLE table_name ADD CONSTRAINT constraint_name UNIQUE (column, ...) DISABLE NOVALIDATE;
ALTER TABLE table_name CHANGE COLUMN column_name column_name data_type CONSTRAINT constraint_name NOT NULL ENABLE;
ALTER TABLE table_name CHANGE COLUMN column_name column_name data_type CONSTRAINT constraint_name DEFAULT default_value ENABLE;
ALTER TABLE table_name CHANGE COLUMN column_name column_name data_type CONSTRAINT constraint_name CHECK check_expression ENABLE;
ALTER TABLE table_name DROP CONSTRAINT constraint_name;
|
- table_properties:
- (property_name = property_value, property_name = property_value, … )
- serde_properties:
- (property_name = property_value, property_name = property_value, … )
修改分区
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
| -- 增加分区
ALTER TABLE table_name ADD [IF NOT EXISTS] PARTITION partition_spec [LOCATION 'location'][, PARTITION partition_spec [LOCATION 'location'], ...];
-- 修改分区名称
ALTER TABLE table_name PARTITION partition_spec RENAME TO PARTITION partition_spec;
-- 转换分区
-- Move partition from table_name_1 to table_name_2
ALTER TABLE table_name_2 EXCHANGE PARTITION (partition_spec) WITH TABLE table_name_1;
-- multiple partitions
ALTER TABLE table_name_2 EXCHANGE PARTITION (partition_spec, partition_spec2, ...) WITH TABLE table_name_1;
-- 恢复分区
MSCK [REPAIR] TABLE table_name [ADD/DROP/SYNC PARTITIONS];
-- 删除分区
ALTER TABLE table_name DROP [IF EXISTS] PARTITION partition_spec[, PARTITION partition_spec, ...]
[IGNORE PROTECTION] [PURGE]; -- (Note: PURGE available in Hive 1.2.0 and later, IGNORE PROTECTION not available 2.0.0 and later)
-- (解)存档分区
ALTER TABLE table_name ARCHIVE PARTITION partition_spec;
ALTER TABLE table_name UNARCHIVE PARTITION partition_spec;
|
- partition_spec:
- (partition_column = partition_col_value, partition_column = partition_col_value, …)
修改表或者分区
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
| -- 修改文件格式类型
ALTER TABLE table_name [PARTITION partition_spec] SET FILEFORMAT file_format;
-- 修改数据位置
ALTER TABLE table_name [PARTITION partition_spec] SET LOCATION "new location";
-- 修改 TOUCH
ALTER TABLE table_name TOUCH [PARTITION partition_spec];
-- 修改保护机制
ALTER TABLE table_name [PARTITION partition_spec] ENABLE|DISABLE NO_DROP [CASCADE];
ALTER TABLE table_name [PARTITION partition_spec] ENABLE|DISABLE OFFLINE;
-- 修改事务压缩
ALTER TABLE table_name [PARTITION (partition_key = 'partition_value' [, ...])]
COMPACT 'compaction_type'[AND WAIT]
[WITH OVERWRITE TBLPROPERTIES ("property"="value" [, ...])];
-- 修改连接
ALTER TABLE table_name [PARTITION (partition_key = 'partition_value' [, ...])] CONCATENATE;
-- 修改更新列
ALTER TABLE table_name [PARTITION (partition_key = 'partition_value' [, ...])] UPDATE COLUMNS;
|
修改列
1
2
3
4
5
6
7
8
9
| -- 修改列名/类型/位置/注释
ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
[COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];
-- 增加/替换列
ALTER TABLE table_name
[PARTITION partition_spec] -- (Note: Hive 0.14.0 and later)
ADD|REPLACE COLUMNS (col_name data_type [COMMENT col_comment], ...)
[CASCADE|RESTRICT] -- (Note: Hive 1.1.0 and later)
|
视图操作
创建视图
1
2
3
4
| CREATE VIEW [IF NOT EXISTS] [db_name.]view_name [(column_name [COMMENT column_comment], ...) ]
[COMMENT view_comment]
[TBLPROPERTIES (property_name = property_value, ...)]
AS SELECT ...;
|
删除视图
1
| DROP VIEW [IF EXISTS] [db_name.]view_name;
|
修改视图
1
2
3
4
5
| -- 修改视图属性
ALTER VIEW [db_name.]view_name SET TBLPROPERTIES table_properties;
-- 修改视图结构
ALTER VIEW [db_name.]view_name AS select_statement;
|
- table_properties:
- (property_name = property_value, property_name = property_value, …)
物化视图操作
创建物化视图
1
2
3
4
5
6
7
8
9
10
11
12
13
| CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db_name.]materialized_view_name
[DISABLE REWRITE]
[COMMENT materialized_view_comment]
[PARTITIONED ON (col_name, ...)]
[CLUSTERED ON (col_name, ...) | DISTRIBUTED ON (col_name, ...) SORTED ON (col_name, ...)]
[
[ROW FORMAT row_format]
[STORED AS file_format]
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]
]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)]
AS SELECT ...;
|
删除物化视图
1
| DROP MATERIALIZED VIEW [db_name.]materialized_view_name;
|
修改物化视图
1
| ALTER MATERIALIZED VIEW [db_name.]materialized_view_name ENABLE|DISABLE REWRITE;
|
索引操作
创建索引
1
2
3
4
5
6
7
8
9
10
11
12
13
| CREATE INDEX index_name
ON TABLE base_table_name (col_name, ...)
AS index_type
[WITH DEFERRED REBUILD]
[IDXPROPERTIES (property_name=property_value, ...)]
[IN TABLE index_table_name]
[
[ ROW FORMAT ...] STORED AS ...
| STORED BY ...
]
[LOCATION hdfs_path]
[TBLPROPERTIES (...)]
[COMMENT "index comment"];
|
删除索引
1
| DROP INDEX [IF EXISTS] index_name ON table_name;
|
修改索引
1
| ALTER INDEX index_name ON table_name [PARTITION partition_spec] REBUILD;
|
宏操作
创建宏
1
2
3
4
5
6
| CREATE TEMPORARY MACRO macro_name([col_name col_type, ...]) expression;
-- demo
CREATE TEMPORARY MACRO fixed_number() 42;
CREATE TEMPORARY MACRO string_len_plus_two(x string) length(x) + 2;
CREATE TEMPORARY MACRO simple_add (x int, y int) x + y;
|
删除宏
1
| DROP TEMPORARY MACRO [IF EXISTS] macro_name;
|
函数操作
创建临时函数
1
| CREATE TEMPORARY FUNCTION function_name AS class_name;
|
删除临时函数
1
| DROP TEMPORARY FUNCTION [IF EXISTS] function_name;
|
创建永久函数
1
2
| CREATE FUNCTION [db_name.]function_name AS class_name
[USING JAR|FILE|ARCHIVE 'file_uri' [, JAR|FILE|ARCHIVE 'file_uri'] ];
|
删除永久函数
1
| DROP FUNCTION [IF EXISTS] function_name;
|
重新加载函数
1
| RELOAD (FUNCTIONS|FUNCTION);
|
角色和权限操作
创建角色
删除角色
角色授权
1
2
3
4
5
6
7
8
9
| GRANT
priv_type [, priv_type ] ...
ON table_or_view_name
TO principal_specification [, principal_specification] ...
[WITH GRANT OPTION];
GRANT role_name [, role_name] ...
TO principal_specification [, principal_specification] ...
[ WITH ADMIN OPTION ];
|
角色回收权限
1
2
3
4
5
6
7
| REVOKE [GRANT OPTION FOR]
priv_type [, priv_type ] ...
ON table_or_view_name
FROM principal_specification [, principal_specification] ... ;
REVOKE [ADMIN OPTION FOR] role_name [, role_name] ...
FROM principal_specification [, principal_specification] ... ;
|
- principal_specification
- priv_type
- INSERT
- SELECT
- UPDATE
- DELETE
- ALL
SHOW
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
| -- 库
SHOW (DATABASES|SCHEMAS) [LIKE 'identifier_with_wildcards'];
-- 表
SHOW CREATE TABLE ([db_name.]table_name|view_name);
SHOW TABLES [IN database_name] ['identifier_with_wildcards'];
SHOW TABLE EXTENDED [IN|FROM database_name] LIKE 'identifier_with_wildcards' [PARTITION(partition_spec)];
-- 视图
SHOW VIEWS [IN/FROM database_name] [LIKE 'pattern_with_wildcards'];
-- 物化视图
SHOW MATERIALIZED VIEWS [IN/FROM database_name] [LIKE 'pattern_with_wildcards’];
-- 索引
SHOW [FORMATTED] (INDEX|INDEXES) ON table_with_index [(FROM|IN) db_name];
-- 分区
SHOW PARTITIONS table_name;
SHOW PARTITIONS [db_name.]table_name [PARTITION(partition_spec)]; -- (Note: Hive 0.13.0 and later)
SHOW PARTITIONS [db_name.]table_name [PARTITION(partition_spec)] [WHERE where_condition] [ORDER BY col_list] [LIMIT rows]; -- (Note: Hive 4.0.0 and later)
-- 列
SHOW COLUMNS (FROM|IN) table_name [(FROM|IN) db_name];
-- 函数
SHOW FUNCTIONS [LIKE "<pattern>"];
-- 属性
SHOW TBLPROPERTIES tblname;
SHOW TBLPROPERTIES tblname("foo");
-- 锁
SHOW LOCKS <table_name>;
SHOW LOCKS <table_name> EXTENDED;
SHOW LOCKS <table_name> PARTITION (<partition_spec>);
SHOW LOCKS <table_name> PARTITION (<partition_spec>) EXTENDED;
SHOW LOCKS (DATABASE|SCHEMA) database_name; -- (Note: Hive 0.13.0 and later; SCHEMA added in Hive 0.14.0)
-- 配置
SHOW CONF <configuration_name>;
-- 事务
SHOW TRANSACTIONS;
-- 事务压缩
SHOW COMPACTIONS;
-- 角色权限
SHOW ROLE GRANT (USER|ROLE) principal_name;
SHOW GRANT [principal_specification] ON (ALL | [TABLE] table_or_view_name);
SHOW ROLES;
SHOW CURRENT ROLES;
SHOW PRINCIPALS role_name;
|
DESCRIBE
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
| -- 库
DESCRIBE DATABASE [EXTENDED] db_name;
DESCRIBE SCHEMA [EXTENDED] db_name; -- (Note: Hive 1.1.0 and later)
-- 表/视图/物化视图/列
DESCRIBE [EXTENDED|FORMATTED]
table_name[.col_name ( [.field_name] | [.'$elem$'] | [.'$key$'] | [.'$value$'] )* ];
-- (Note: Hive 1.x.x and 0.x.x only. See "Hive 2.0+: New Syntax" below)
DESCRIBE [EXTENDED|FORMATTED]
[db_name.]table_name[ col_name ( [.field_name] | [.'$elem$'] | [.'$key$'] | [.'$value$'] )* ];
-- (Note: Hive 1.x.x and 0.x.x only. See "Hive 2.0+: New Syntax" below)
-- 列
DESCRIBE FORMATTED [db_name.]table_name column_name; -- (Note: Hive 0.14.0 and later)
DESCRIBE FORMATTED [db_name.]table_name column_name PARTITION (partition_spec); -- (Note: Hive 0.14.0 to 1.x.x)
-- (see "Hive 2.0+: New Syntax" below)
-- 分区
DESCRIBE [EXTENDED|FORMATTED] table_name[.column_name] PARTITION partition_spec;
-- (Note: Hive 1.x.x and 0.x.x only. See "Hive 2.0+: New Syntax" below)
DESCRIBE [EXTENDED|FORMATTED] [db_name.]table_name [column_name] PARTITION partition_spec;
-- (Note: Hive 1.x.x and 0.x.x only. See "Hive 2.0+: New Syntax" below)
-- Hive 2.0+: 语法更新
DESCRIBE [EXTENDED | FORMATTED]
[db_name.]table_name [PARTITION partition_spec] [col_name ( [.field_name] | [.'$elem$'] | [.'$key$'] | [.'$value$'] )* ];
-- example
DESCRIBE FORMATTED default.src_table PARTITION (part_col = 100) columnA;
DESCRIBE default.src_thrift lintString.$elem$.myint;
|
ABORT
1
2
| -- 终止事务
ABORT TRANSACTIONS transactionID [ transactionID ...];
|
DML
LanguageManual DML 对 Hive Data Manipulation Language 操作进行了详细的说明,本文只选择比较常见的操作来介绍。