1.從檔案匯入資料
1)mysql (mysql需要先建立表).
create table tmp_table(user varchar(32),ageint,is_male boolean);
create table tmp_table_2(age int,optionsvarchar(50)); --用於join
load data local infile '/tmp/data_file_1'into table tmp_table ;
load data local infile '/tmp/data_file_2'into table tmp_table_2;
2)pig
tmp_table=load '/tmp/data_file_1' usingpigstorage('\t') as (user:chararray, age:int,is_male:int);
tmp_table_2= load '/tmp/data_file_2' usingpigstorage('\t') as (age:int,options:chararray);
2.查詢整張表
1)mysql
select * from tmp_table;
2)pig
dump tmp_table;
3.查詢前50行
1)mysql
select * from tmp_table limit 50;
2)pig
tmp_table_limit = limit tmp_table 50;
dump tmp_table_limit;
4.查詢某些列
1)mysql
select user from tmp_table;
2)pig
tmp_table_user = foreach tmp_table generateuser;
dump tmp_table_user;
5.給列取別名
1)mysql
select user as user_name,age as user_agefrom tmp_table;
2)pig
tmp_table_column_alias = foreach tmp_tablegenerate user as user_name,age as user_age;
dump tmp_table_column_alias;
6.排序
1)mysql
select * from tmp_table order by age;
2)pig
tmp_table_order = order tmp_table by ageasc;
dump tmp_table_order;
7.條件查詢
1)mysql
select * from tmp_table where age>20;
2) pig
tmp_table_where = filter tmp_table by age> 20;
dump tmp_table_where;
8.內連線innerjoin(相等連線,自然連線)
1)mysql
select * from tmp_table a join tmp_table_2b on a.age=b.age;
2)pig
tmp_table_inner_join = join tmp_table byage,tmp_table_2 by age;
dump tmp_table_inner_join;
9.左連線leftjoin(返回左表所有行,如果沒有匹配的行則值為空)
1)mysql
select * from tmp_table a left jointmp_table_2 b on a.age=b.age;
2)pig
tmp_table_left_join = join tmp_table by age left outer,tmp_table_2 by age;
dump tmp_table_left_join;
10.右連線right join(保留右邊表所有行,沒有匹配的值則為空)
1)mysql
select * from tmp_table a right jointmp_table_2 b on a.age=b.age;
2)pig
tmp_table_right_join= join tmp_table by age right outer,tmp_table_2 by age;
dump tmp_table_right_join;
11.全連線full join(完整外部連線返回左表和右表中的所有行)
1)mysql
select * from tmp_table a join tmp_table_2b on a.age=b.age
union select * from tmp_table a left jointmp_table_2 b on a.age=b.age
union select * from tmp_table a right jointmp_table_2 b on a.age=b.age;
2)pig
tmp_table_full_join = join tmp_table by agefull outer,tmp_table_2 by age;
dump tmp_table_full_join;
12.同時對多張表交叉查詢(笛卡爾積)
1)mysql
select * from tmp_table,tmp_table_2;
2)pig
tmp_table_cross = crosstmp_table,tmp_table_2;
dump tmp_table_cross;
13.分組group by
1)mysql
select * from tmp_table group by is_male;
2)pig
tmp_table_group = group tmp_table byis_male;
dump tmp_table_group;
14.分組並統計
1)mysql
select is_male,count(*) from tmp_table group by is_male;
2)pig
tmp_table_group_count = group tmp_table byis_male;
tmp_table_group_count = foreachtmp_table_group_count generate group,count($1);//注意這是統計第一列屬性。第0列屬性是group名字。
dump tmp_table_group_count;
這裡要注意:
15.查詢去重distinct
1)mysql
select distinct is_male from tmp_table;
2)pig
tmp_table_distinct = foreach tmp_tablegenerate is_male;
tmp_table_distinct = distincttmp_table_distinct;
dump tmp_table_distinct;
pig mysql pig對應sql的基本命令
1.從檔案匯入資料 1 mysql mysql需要先建立表 create table tmp table user varchar 32 ageint,is male boolean create table tmp table 2 age int,optionsvarchar 50 用於join ...
SQ 模糊查詢
between.and.在資料庫內部是做作特殊優化的,執行效率比 and 等這種方式快 between a and b 相當於 字段 a and欄位 b 例如 select from dbo.mystudent where s age between 20 and 30 between and還可以...
SQ優化策略
1.對查詢進行優化,要盡量避免全表掃瞄,首先應考慮在進行條件判斷的字段上建立索引 2.應盡量避免在where子句中對字段進行null值判斷,否則將導致引擎放棄使用索引而進行全表掃瞄 select from scott emp where job is null 3.應盡量避免在where字句中使用 ...