pig mysql pig對應sql的基本命令

2021-10-18 12:48:57 字數 3636 閱讀 1286

1.從檔案匯入資料

1)mysql (mysql需要先建立表).

create table tmp_table(user varchar(32),ageint,is_male boolean);

create table tmp_table_2(age int,optionsvarchar(50)); --用於join

load data local infile '/tmp/data_file_1'into table tmp_table ;

load data local infile '/tmp/data_file_2'into table tmp_table_2;

2)pig

tmp_table=load '/tmp/data_file_1' usingpigstorage('\t') as (user:chararray, age:int,is_male:int);

tmp_table_2= load '/tmp/data_file_2' usingpigstorage('\t') as (age:int,options:chararray);

2.查詢整張表

1)mysql

select * from tmp_table;

2)pig

dump tmp_table;

3.查詢前50行

1)mysql

select * from tmp_table limit 50;

2)pig

tmp_table_limit = limit tmp_table 50;

dump tmp_table_limit;

4.查詢某些列

1)mysql

select user from tmp_table;

2)pig

tmp_table_user = foreach tmp_table generateuser;

dump tmp_table_user;

5.給列取別名

1)mysql

select user as user_name,age as user_agefrom tmp_table;

2)pig

tmp_table_column_alias = foreach tmp_tablegenerate user as user_name,age as user_age;

dump tmp_table_column_alias;

6.排序

1)mysql

select * from tmp_table order by age;

2)pig

tmp_table_order = order tmp_table by ageasc;

dump tmp_table_order;

7.條件查詢

1)mysql

select * from tmp_table where age>20;

2) pig

tmp_table_where = filter tmp_table by age> 20;

dump tmp_table_where;

8.內連線innerjoin(相等連線,自然連線)

1)mysql

select * from tmp_table a join tmp_table_2b on a.age=b.age;

2)pig

tmp_table_inner_join = join tmp_table byage,tmp_table_2 by age;

dump tmp_table_inner_join;

9.左連線leftjoin(返回左表所有行,如果沒有匹配的行則值為空)

1)mysql

select * from tmp_table a left jointmp_table_2 b on a.age=b.age;

2)pig

tmp_table_left_join = join tmp_table by age left outer,tmp_table_2 by age;

dump tmp_table_left_join;

10.右連線right join(保留右邊表所有行,沒有匹配的值則為空)

1)mysql

select * from tmp_table a right jointmp_table_2 b on a.age=b.age;

2)pig

tmp_table_right_join= join tmp_table by age right outer,tmp_table_2 by age;

dump tmp_table_right_join;

11.全連線full join(完整外部連線返回左表和右表中的所有行)

1)mysql

select * from tmp_table a join tmp_table_2b on a.age=b.age

union select * from tmp_table a left jointmp_table_2 b on a.age=b.age

union select * from tmp_table a right jointmp_table_2 b on a.age=b.age;

2)pig

tmp_table_full_join = join tmp_table by agefull outer,tmp_table_2 by age;

dump tmp_table_full_join;

12.同時對多張表交叉查詢(笛卡爾積)

1)mysql

select * from tmp_table,tmp_table_2;

2)pig

tmp_table_cross = crosstmp_table,tmp_table_2;

dump tmp_table_cross;

13.分組group by

1)mysql

select * from tmp_table group by is_male;

2)pig

tmp_table_group = group tmp_table byis_male;

dump tmp_table_group;

14.分組並統計

1)mysql

select is_male,count(*) from tmp_table group by is_male;

2)pig

tmp_table_group_count = group tmp_table byis_male;

tmp_table_group_count = foreachtmp_table_group_count generate group,count($1);//注意這是統計第一列屬性。第0列屬性是group名字。

dump tmp_table_group_count;

這裡要注意:

15.查詢去重distinct

1)mysql

select distinct is_male from tmp_table;

2)pig

tmp_table_distinct = foreach tmp_tablegenerate is_male;

tmp_table_distinct = distincttmp_table_distinct;

dump tmp_table_distinct;

pig mysql pig對應sql的基本命令

1.從檔案匯入資料 1 mysql mysql需要先建立表 create table tmp table user varchar 32 ageint,is male boolean create table tmp table 2 age int,optionsvarchar 50 用於join ...

SQ 模糊查詢

between.and.在資料庫內部是做作特殊優化的,執行效率比 and 等這種方式快 between a and b 相當於 字段 a and欄位 b 例如 select from dbo.mystudent where s age between 20 and 30 between and還可以...

SQ優化策略

1.對查詢進行優化,要盡量避免全表掃瞄,首先應考慮在進行條件判斷的字段上建立索引 2.應盡量避免在where子句中對字段進行null值判斷,否則將導致引擎放棄使用索引而進行全表掃瞄 select from scott emp where job is null 3.應盡量避免在where字句中使用 ...