通過關聯訂購關係這個操作做了乙個關於join操作的試驗。
以前採用上下行表直接關聯,2個表資料量大約是2200w左右和1400w左右,並且2個表都是屬於寬表,字段內容多,占用空間大,但join的時候用到的字段很少(2個左右),因此很多記憶體都耗在了儲存不必要的字段值。每次關聯操作耗時在2個小時以上。
通過細化join相關表後,首先減少了單錶的資料元數目,並且只在細化表中只儲存了join操作必須的字段。因此對訂購關係這張表來說,資料量減少了70%以上。
由於sga得到了充分利用,因此join的效率也數10倍的得到了提公升。
下面是採用細化分表後在join的詳細實施過程:
減少join表的資料量後的詳細操作記錄:
#elapsed: (order)00:02:25.63 elapsed: 00:00:48.45 13916602
create table test.tmp_user_info
asselect user_num,service_id,subchannel_code sub_channel_code,channel_code,reg_mode,unreg_mode,enable_status,
first_reg_time,last_reg_time,last_unreg_time from mqq.t_wx_user_info2 where (last_unreg_time like '2007-02-05 %' and enable_status='n') or enable_status='y'
#elapsed: 00:06:38.53 22674664
create table test.tmp_oicall_info_20070206
tablespace tbs_sub
asselect t1.misc_msg_id,
t1.msg_id,
t1.msg_type,
t1.fee_type,
t1.fee_value,
t1.send_address,
t1.recv_address,
t1.fee_address,
t1.down_station,
t1.outter_id,
t1.service_id,
t1.oss_recv_code,
t1.mt_type,
t1.mt_content,
t1.mt_time,
t1.carry_msg,
t1.carry_id,
t1.rpt_flag,
t1.rpt_state,
t1.gateway_rpt_state,
t1.link_id,
t1.err_detail from mqqflow.t_mt_sms_200702 partition(p_mt_sms_20070206) t1
#elapsed: 00:05:07.04
#elapsed: 00:04:29.64 elapsed: 00:04:34.68 (說明前面create table的時候,不需要order by)
create table test.tmp_sms_mt_info
asselect t1.user_num,t1.service_id,sub_channel_code,t1.channel_code channel_code,reg_mode,unreg_mode,enable_status,
first_reg_time,last_reg_time,last_unreg_time
from test.tmp_user_info_2 t1,test.tmp_oicall_info t2 where t1.service_id=t2.service_id and t1.user_num=t2.recv_address
create index idx_tmp_sms_mt_info on test.tmp_sms_mt_info
(user_num, service_id)
nologging
tablespace tbs_sub
pctfree 10
initrans 2
maxtrans 255
storage (
initial 64k
minextents 1
maxextents 2147483645
pctincrease 0
buffer_pool default
)noparallel;
///select count(*) from mqqflow.t_sso_order_2007 partition(p_sso_order_200702) where stat_date=to_date('2007-02-07', 'yyyy-mm-dd') 2044 行
///create table tmp_mt_sms
tablespace tbs_stat
pctused 0
pctfree 10
initrans 1
maxtrans 255
storage (
initial 64k
minextents 1
maxextents 2147483645
pctincrease 0
buffer_pool default
)logging
nocompress
nocache
noparallel
monitoring
asselect
t1.misc_msg_id,
t1.msg_id,
t1.msg_type,
t1.fee_type,
t1.fee_value,
t1.send_address,
t1.recv_address,
t1.fee_address,
t1.down_station,
t1.outter_id,
t1.service_id,
t1.oss_recv_code,
t1.mt_type,
t1.mt_content,
t1.mt_time,
t1.carry_msg,
t1.carry_id,
t2.channel_code,
t2.sub_channel_code,
t2.user_num,
t2.reg_mode,
t2.unreg_mode,
t2.enable_status,
t2.first_reg_time,
t2.last_reg_time,
t2.last_unreg_time,
t1.rpt_flag,
t1.rpt_state,
t1.gateway_rpt_state,
t1.link_id,
t1.err_detail
from mqqflow.t_mt_sms_200702 partition(p_mt_sms_20070207) t1
left join
test.tmp_sms_mt_info t2
on t2.user_num = t1.recv_address
and t1.service_id = t2.service_id
# elapsed: 00:13:05.74
create table test. tmp_mt_sms
tablespace tbs_sub
asselect
t1.misc_msg_id,
t1.msg_id,
t1.msg_type,
t1.fee_type,
t1.fee_value,
t1.send_address,
t1.recv_address,
t1.fee_address,
t1.down_station,
t1.outter_id,
t1.service_id,
t1.oss_recv_code,
t1.mt_type,
t1.mt_content,
t1.mt_time,
t1.carry_msg,
t1.carry_id,
decode(t3.link_id,null,t2.channel_code,'web') channel_code,
t2.sub_channel_code,
t2.reg_mode,
t2.unreg_mode,
t2.enable_status,
t2.first_reg_time,
t2.last_reg_time,
t2.last_unreg_time,
t1.rpt_flag,
t1.rpt_state,
t1.gateway_rpt_state,
t1.link_id,
t1.err_detail
from test.tmp_oicall_info_20070206 t1
left join test.tmp_sso_mt
t3on t3.stat_date=to_date('2007-02-06', 'yyyy-mm-dd')
and t3.link_id = t1.link_id
left join
test.tmp_user_info t2
on t2.user_num = t1.recv_address
and t1.service_id = t2.service_id
oracle中大資料量更新的測試
在試驗中嘗試了2種更新資料的方法 1.update table set where 2.先根據更新條件建立臨時表,再刪掉未更新之前的表,最後把臨時表更名為原始表名 通過試驗很明顯的可以認識到update的效率是非常之低的。通過在網上跟其他oracle使用者的討論,也都一致的認為,大資料量更新應該採用...
Oracle大資料量遷移
prompt 生成歷史表,使用nologging create table his test nologging as select from test prompt 檢驗新舊表的資料量是否一致 select count 1 from test select count 1 from his tes...
大資料量演算法
給40億個不重複的unsigned int的整數,沒排過序的,然後再給乙個數,如何快速判斷這個數是否在那40億個數當中 位圖思想解法 include stdio.h include stdlib.h include memory.h define max num 4294967295 int mai...