hive partition
create hive partition table
先建立 Hive partition table :
create table danieldb.students_partition(id string,name string,tel string,job string) partitioned by (year string,month string) row format delimited fields terminated by ',' lines terminated by '\n' stored as textfile;
再建立 partition :
alter table students_partition add partition(year=2017,month=12)
再把檔案 students.txt put 到剛建立的 partition 目錄:
hdfs dfs -put students.txt /tmp/daniel/hivedb/students_partition/year=2018/month=01
查詢時就會看到 year 和 month 會是 2018 及 01.
建 partition 時也可直接指到某個目錄,代表裡面檔案都是屬於該 partition 的.
alter table students_partition add partition(year=2018,month=02) location '/tmp/daniel/hivedb/temp/partition'
再把檔案 put 到 /tmp/daniel/hivedb/temp/partition
hdfs dfs -put students.txt /tmp/daniel/hivedb/temp/partition
查詢時就會看到 year 和 month 會是 2018 及 02.
hive.mapred.mode 參數
如果設為 strict 時,partition table 在 select 時沒有指定 partition 則會出錯 :
hive> set hive.mapred.mode=strict;
hive> select * from students_partition;
FAILED: SemanticException [Error 10041]: No partition predicate found for Alias "students_partition" Table "students_partition"
有給 partition 時才可以查詢 :
hive> select * from students_partition where year=2018;
OK
1 Daniel 29612736 PG 2018 01
2 Sam 283747612 SD 2018 01
3 Andy 39827162 PM 2018 01
...
如果設為 nonstrict 則 select 時不給 partition 也可以 :
hive> set hive.mapred.mode=nonstrict;
hive> select * from students_partition;
OK
1 Daniel 29612736 PG 2017 12
2 Sam 283747612 SD 2017 12
3 Andy 39827162 PM 2017 12
...
總結
查看 table 的 partitions :
show partitions students_partition;
查看某個 partition 底下的 partition :
hive> show partitions students_partition partition(year=2018);
OK
year=2018/month=01
year=2018/month=02
Time taken: 0.253 seconds, Fetched: 2 row(s)
執行下列指令可以看到 year=2018,month=02 這 partition 的目錄放在 location:hdfs://quickstart.cloudera:8020/tmp/daniel/hivedb/temp/partition 裡 :
hive> describe extended students_partition partition(year=2018,month=02);
OK
id string
name string
tel string
job string
year string
month string
# Partition Information
# col_name data_type comment
year string
month string
Detailed Partition Information Partition(values:[2018, 02], dbName:danieldb, tableName:students_partition, createTime:1517219747, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:string, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:tel, type:string, comment:null), FieldSchema(name:job, type:string, comment:null), FieldSchema(name:year, type:string, comment:null), FieldSchema(name:month, type:string, comment:null)], location:hdfs://quickstart.cloudera:8020/tmp/daniel/hivedb/temp/partition, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{line.delim=
, field.delim=,, serialization.format=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), parameters:{totalSize=283, numRows=-1, rawDataSize=-1, COLUMN_STATS_ACCURATE=false, numFiles=1, transient_lastDdlTime=1517219747})
Time taken: 0.157 seconds, Fetched: 15 row(s)