這裡建立了表page_view,有表的注釋,乙個欄位ip的注釋,分割槽有兩列,分別是dt和country。create table page_view(
viewtime int,
userid bigint,
page_url string,
referrer_url string,
ip string comment 'ip address of the user')
comment 'this is the page view table'
partitioned by(dt string, country string)
row format delimited
fields terminated by '\001'
collection items terminated by '\002'
map keys terminated by '\003'
stored as textfile;
[row format delimited]關鍵字,是用來設定建立的表在載入資料的時候,支援的列分隔符。
[stored as file_format]關鍵字是用來設定載入資料的資料型別,預設是textfile,如果檔案資料是純文字,就是使用 [stored as textfile],然後從本地直接拷貝到hdfs上,hive直接可以識別資料。
建立表,有指定external就是外部表,沒有指定就是內部表,內部表在drop的時候會從hdfs上刪除資料,而外部表不會刪除。create external table page_view(
viewtime int,
userid bigint,
page_url string,
referrer_url string,
ip string comment 'ip address of the user',
country string comment 'country of origination')
comment 'this is the staging page view table'
row format delimited fields terminated by '\054'
stored as textfile
location '/user/hadoop/warehouse/page_view';
-- 這種需要先建立分割槽
use test;
alter table fct_path_list_off_5levels partition (date="2017-09-14") set location 'hdfs://nameservice1/user/hive/warehouse/test.db/fct_path_list_off_5levels/date=2017-09-14';
-- 建立分割槽的時候指定資料檔案
use test;
alter table fct_path_list_off_5levels add partition (date="2017-09-14") location 'hdfs://nameservice1/user/hive/warehouse/test.db/fct_path_list_off_5levels/date=2017-09-14';
