在usr下,mkdir /wang
hadoop fs -mkdir /upload
hadoop fs -chmod g+w /upload
上傳資料
[root@master wang]# hadoop fs -put emp.csv /upload
[root@master wang]# hadoop fs -put dept.csv /upload
服務端:hiveserver2 &
客戶端:
使用anonymous登入
beeline -u jdbc:hive2://master:10000/default
****使用root登入,我們用root使用者登入,不然沒有寫的許可權
beeline -u jdbc:hive2://master:10000/default -n root
建立員工表,使用建立表的語句來指明輸入檔案間隔符,然後load data到這個表
create table emp001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) row format delimited fields terminated by ','; # 字段分割用逗號
建立部門表
create table dept001(deptno int,dname string,loc string) row format delimited fields terminated by ',';
匯入資料
load data inpath '/upload/emp.csv' into table emp001;
load data inpath '/upload/dept.csv' into table dept001;
根據員工的部門號建立分割槽
create table emp_part001(empno int,ename string,job string,mgr int,hiredate string,sal int,comm int) partitioned by (deptno int) row format delimited fields terminated by ',';
往分割槽表中插入資料:指明匯入的資料的分割槽(通過子查詢匯入資料)
insert into table emp_part001 partition(deptno=10) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=10;
insert into table emp_part001 partition(deptno=20) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=20;
insert into table emp_part001 partition(deptno=30) select empno,ename,job,mgr,hiredate,sal,comm from emp001 where deptno=30;
建立乙個桶表,根據員工的職位 (job)進行分桶
create table emp_bucket001 (empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) clustered by (job) into 4 buckets row format delimited fields terminated by ',';
通過子查詢插入資料
insert into emp_bucket001 select * from emp001;
查詢員工資訊:員工號 姓名 薪水
select empno,ename,sal from emp001;
多表連線查詢
select dept001.dname,emp001.ename from emp001,dept001 where emp001.deptno=dept001.deptno;
員工總數
select count(empno) as emp_num from emp001;
去除重複值
select distinct deptno from emp001;
查詢公司共有多少種職位
select count(distinct job) from emp001;
統計僱傭時間為2023年的總人數
select count(hiredate) as result from emp001 where hiredate like '%1981%';
統計各部門的薪水總和
select deptno,sum(sal) from emp001 group by deptno;
統計每個職位有多少個員工
select job, count(*) as emp_num from emp001 group by job order by emp_num asc;
查詢入職時間最早的員工
select ename,hiredate from emp001
join
(select min(hiredate) as min_hiredate from emp001) t1
where hiredate=t1.min_hiredate;
判斷薪水級別
select ename,empno,sal,
case when sal<2000 then 'low' when sal >=2000
and sal <3000 then 'middle'
else 'high'
end as level
from emp001
order by sal desc;
根據職位給員工漲工資,把漲前、漲後的薪水顯示出來
select empno,ename,job,sal,
case job when 'president' then sal+1000
when 'manager' then sal+800
else sal+400
endfrom emp001;
統計上半年入職員工最多的地區
# cast用於轉換資料型別
# substr用於擷取字串
select t1.loc,count(*)as emp_count
from
(select dept001.loc,emp001.ename,
cast(substr(emp001.hiredate,6,2) as int) as hire_month
from dept001 join emp001
on dept001.deptno=emp001.deptno) t1
where t1.hire_month<=6
group by t1.loc
order by emp_count desc
limit 1;
hive應用例項1
我們沿用之前hadoop wordcount的結果資料 hadoop icity0 hadoop fs cat wc out part r 00000 warning hadoop home is deprecated.beautiful1 day1 dear2 hello2 hometown1 h...
Hive應用例項 WordCount
參照書籍 大資料技術原理與應用 準備工作 1.找到所在路徑,啟動hadoop,啟動hive cd usr local hadoop sbin start dfs.sh 啟動hadoop hive 啟動hive 2.準備兩個txt檔案,為詞頻統計準備資料 cd mkdir hivetest 建立hiv...
Hive (十六)Hive 執行過程例項分析
二 join 三 group by 四 distinct 1 hive 將 hql 轉換成一組操作符 operator 比如 groupbyoperator,joinoperator 等 2 操作符 operator 是 hive 的最小處理單元 3 每個操作符代表乙個 hdfs 操作或者 mapr...