1、獲取使用者連續登入天數
--整合源表,保證乙個使用者每天一條記錄
drop table if exists xxnisj1112_uid_basic;
create table xxnisj1112_uid_basic as
select uid,pt_day
from oss_bi_all_user_login_log
where pt_day between '2018-10-01' and '2018-11-11'
group by uid,pt_day;
--使用者連續登入天數的獲取
with tab_loginday_lead as(
select uid,pt_day login_day,row_number()over(partition by uid order by pt_day asc) rn
from xxnisj1112_uid_basic
where uid=206538)
select uid,date_sub(login_day,rn),count(*) continuous_login_days
from tab_loginday_lead
group by uid,date_sub(login_day,rn);
說明:這種方法構思奇特和巧妙,能夠有效獲取使用者連續登入的天數;但不能夠獲知使用者是從哪一天開始到哪一天結束連續登入的詳情。
2、採用倉庫拉鍊演算法實現
倉庫拉鍊演算法在hive中的實現**如下:
--ods層的使用者資料切片表的結構:
create external table ods.user (
user_num string comment '使用者編號',
mobile string comment '手機號碼',
reg_date string comment '註冊日期'
comment '使用者資料表'
partitioned by (dt string)
row format delimited fields terminated by '\t' lines terminated by '\n'
stored as orc
location '/ods/user';
)--使用者每日更新表:
create external table ods.user_update (
user_num string comment '使用者編號',
mobile string comment '手機號碼',
reg_date string comment '註冊日期'
comment '每日使用者資料更新表'
partitioned by (dt string)
row format delimited fields terminated by '\t' lines terminated by '\n'
stored as orc
location '/ods/user_update';
)--拉鍊表:
create external table dws.user_his (
user_num string comment '使用者編號',
mobile string comment '手機號碼',
reg_date string comment '使用者編號',
t_start_date ,
t_end_date
comment '使用者資料拉鍊表'
row format delimited fields terminated by '\t' lines terminated by '\n'
stored as orc
location '/dws/user_his';
)--實現sql語句:
insert overwrite table dws.user_his
select * from
( select a.user_num,
a.mobile,
a.reg_date,
a.t_start_time,
case
when a.t_end_time = '9999-12-31' and b.user_num is not null then '2017-01-01'
else a.t_end_time
end as t_end_time
from dws.user_his as a
left join ods.user_update as b
on a.user_num = b.user_num
union all
select c.user_num,
c.mobile,
c.reg_date,
'2017-01-02' as t_start_time,
'9999-12-31' as t_end_time
from ods.user_update as c
) as t
說明:能夠獲知使用者的連續登入天數,並能準確獲知連續登入的起始日期;缺點是,資料不能一次性算出,需要每日資料更新計算。
3、進行的其他嘗試
曾經嘗試著用一些其他更簡便高效的方法,進行中...
with tab_loginday_lead as(
select uid,pt_day login_day,lag(pt_day,1,date_sub(pt_day,1))over(partition by uid order by pt_day asc) last_login_day
from xxnisj1112_uid_basic)
select uid,login_day,last_login_day,login_diff,sum(case when login_diff=1 then login_diff else 0 end)over(partition by uid order by login_day asc) continuous_login_days,row_number()over(partition by uid order by login_day asc) rn
from (select uid,login_day,last_login_day,datediff(login_day,last_login_day) login_diff
from tab_loginday_lead) a1
limit 1000;
with tab_loginday_lead as(
select uid,pt_day login_day,lag(pt_day,1,date_sub(pt_day,1))over(partition by uid order by pt_day asc) last_login_day
from xxnisj1112_uid_basic
where uid=206538)
select uid,login_day,last_login_day,login_diff,sum(case when login_diff=1 then login_diff else 0 end)over(partition by uid order by login_day asc) continuous_login_days,row_number()over(partition by uid order by login_day asc) rn
from (select uid,login_day,last_login_day,datediff(login_day,last_login_day) login_diff
from tab_loginday_lead) a1
limit 1000;
hive實現連續N天登入使用者計算
三 計算連續7天的使用者列表 四 按照連續登入天數分組檢視使用者分布總結無 無具體hive是什麼,能幹啥,本文不做過多闡述,具體可以檢視 aapache hive 的官方使用者文件。假設有表 tmp user login 分割槽表記錄使用者每天的登入資訊,有欄位user id,last day,分割...
hive上連續登入天數的查詢
1 資料測試表及測試資料 測試表表結構 hive desc data room okroomid string pt month string pt day string partition information col name data type comment pt month string...
SPARK SQL連續三天登入的使用者
sid,dt,money shop1,2019 01 18,500 shop1,2019 02 10,500 shop1,2019 02 10,200 shop1,2019 02 11,600 shop1,2019 02 12,400 shop1,2019 02 13,200 shop1,2019 ...