1.簡單的 wordcount
對t2表中的sentence列進行空格分隔,統計單詞出現的數量select word,count(1) from
(select explode(split(sentence,' ')) as word from t2
) tgroup by word;
對單詞進行倒序排列,order by只產生乙個reduceselect word,count(1) as n from
(select explode(split(sentence,' ')) as word from t2
) tgroup by word
order by n desc;
檢視表中的分割槽create table t3(sentence string)
partitioned by(dt string) //分割槽
row format delimited fields terminated by '\n'; //建立內部表
load data local inpath '本地路徑' into table t3; //把本地資料匯入內部表
create external table t2(sentence string)
row format delimited fields terminated by '\n'
stored as textfile
location '/file'; //把hdfs中的file目錄下的資料匯入外部表
show partitions tablename
分割槽篩選資料insert overwrite table t3 partition(dt='201911')
select * from t2 limit 100;
表的分桶,建立4個桶的表select * from t3 where dt between '201911' and '201912'
分桶取樣1/4set hive.enforce.bucketing = true;
create table t1(
user_id int,
item_id string,
rating string
)clustered by(user_id)
into 4 bucket;
select * from t1 tablesample(bucket 1 out of 4 on user_id);
create table t2 as select * from t1 tablesample(bucket 1 out of 4 on user_id);
