以下操作基於hadoop3.1.2、hive3.1.2,其他版本欄位有差異
1、通過hdfs命令匯出csv格式檔案
2、到hive建外表及統計hdfs dfsadmin -fetchimage myfile
hdfs oiv -i myfile -o fsimage.csv -p delimited
統計一級目錄大小create external table
default
.hdfs_info(
path string,
replication string,
modificationtime string,
accesstime string,
preferredblocksize string,
blockscount string,
filesize string,
nsquota string,
dsquota string,
permission string,
username string,
groupname string)
row format delimited fields
terminated
by'\t'
location
'hdfs://qingfeng_cluster:8020/tmp/prod_hdfs'
;
統計二級目錄大小select joinedpath, sumsize
from
(select joinedpath,
round
(sum
(filesize)
/1024
/1024
/1024,2
)as sumsize
from
(select concat(
'/',split(path,
'\/')[
1])as joinedpath,accesstime,filesize,username
from
default
.hdfs_info
)tgroup
by joinedpath
)horder
by sumsize desc
;
**目錄下小於100k檔案統計select joinedpath, sumsize
from
(select joinedpath,
round
(sum
(filesize)
/1024
/1024
/1024,2
)as sumsize
from
(select concat(
'/',split(path,
'\/')[
1],'/'
,split(path,
'\/')[
2])as joinedpath,accesstime,filesize,username
from
default
.hdfs_info
)tgroup
by joinedpath
)horder
by sumsize desc
;
其他各級目錄小檔案統計select concat(
'/',split(path,
'\/')[
1],'/'
,split(path,
'\/')[
2],'/'
,split(path,
'\/')[
3])as path ,
count(*
)as small_file_num
from
(select relative_size,path
from
(select
(case filesize <
100*
1024
when
true
then
'small'
else
'large'
end)
as relative_size, path
from
default
.hdfs_info) tmp
where
relative_size=
'small'
) tmp2
group
by concat(
'/',split(path,
'\/')[
1],'/'
,split(path,
'\/')[
2],'/'
,split(path,
'\/')[
3])order
by small_file_num desc
;
select joinedpath,
from_unixtime(ceil(accesstime/
1000),
'yyyy-mm-dd hh:mm:ss'
)as accesstime,
from_unixtime(ceil(modificatetime/
1000),
'yyyy-mm-dd hh:mm:ss'
)as modificatetime,
sumsize
from
(select joinedpath,
min(accesstime)
as accesstime,
max(modificatetime)
as modificatetime,
round
(sum
(filesize)
/1024
/1024
/1024,2
)as sumsize
from
(select concat(
'/',split(path,
'\/')[
1],'/'
,split(path,
'\/')[
2],'/'
,split(path,
'\/')[
3],'/'
,split(path,
'\/')[
4],'/'
,split(path,
'\/')[
5])as joinedpath,
accesstime,
modificatetime,
filesize,
username
from
default
.hdfs_info
where concat(
'/',split(path,
'\/')[
1],'/'
,split(path,
'\/')[
2],'/'
,split(path,
'\/')[
3],'/'
,split(path,
'\/')[
4])=
'/user/hive/warehouse/default.db'
)t where joinedpath !=
'null'
group
by joinedpath)h
order
by sumsize desc
;
合併 hdfs 檔案
待研究,只做儲存 將hdfs中不同目錄下面的資料合在一起,並存放在指定的目錄中,示例如 sqoop merge new data test p1 person onto test p2 person target dir test merged jar file opt data sqoop per...
HDFS 檔案許可權
hadoop fs ls countout rw r r 3 root supergroup 1311 2018 06 20 00 11 coun rw r r 3root supergroup 1311 2018 06 20 00 11 countout part r 00000 檔案訪問許可權 ...
hdfs檔案上傳
登陸hdfs的檔案埠查詢檔案路徑,登陸埠預設為50070 hadoop fs ls hadoop dfs mkdir tmp data1.終端輸入 vim test1.txt鍵入內容儲存wq 2.將伺服器上路徑資料為 test 1.txt 的傳輸到hdfs中的 tmp data下 hadoop fs...