Oracle 大資料量去重實驗

[sql]view plain

copy

-- 環境：64位11.2g

-- 一、建立測試表，生成2000萬測試資料，其中200萬重複

create

table

test_t

( id number (8) not

null

primary

key,

name

varchar2 (32)

);

begin

fori

in1 .. 18000000

loop

insert

into

test_t (id,

name

) values

(i, sys_guid ());

endloop;

commit

; end

; /

insert

into

test_t (id,

name

) select

18000000 + (id / 9),

name

from

test_t

where

mod (id, 9) = 0;

commit

; create

index

idx_test_anme

ontest_t (

name

);

analyze table

test_t compute

statistics

fortable

forall

indexes

forall

indexed columns;

-- 二、兩種去重方案對比執行時間

-- 方案1：建立中間表，生成輔助物件，刪除原表，改表名

create

table

temp

asselect

* from

test_t a

where

a.rowid = (

select

min(b.rowid)

from

test_t b

where

a.name

= b.

name

);

-- 用時：08:14.79

create

index

idx

ontemp

(name

);

-- 用時：02:45.73

alter

table

temp

addconstraint

temp_pk

primary

key(id);

-- 用時：01:15.79

-- 總用時：12分16秒

-- 方案2：直接刪除原表中的重複資料

delete

test_t a

where

a.rowid > (

select

min(b.rowid)

from

test_t b

where

a.name

= b.

name

);

-- 用時：2個小時沒出來，中斷退出。

資料庫大資料量去重實現方式

前提表a 字段 id a1 a2 可以有多個字段，在此為方便只列出3個字段，其中id是主鍵，因此在乙個表中是唯一的方式一 delete from a where a.id not in select max id from a group by a1,a2 說明資料量很大時候，執行非常慢。大資...

Oracle大資料量遷移

prompt 生成歷史表，使用nologging create table his test nologging as select from test prompt 檢驗新舊表的資料量是否一致 select count 1 from test select count 1 from his tes...

大資料量演算法

給40億個不重複的unsigned int的整數，沒排過序的，然後再給乙個數，如何快速判斷這個數是否在那40億個數當中位圖思想解法 include stdio.h include stdlib.h include memory.h define max num 4294967295 int mai...

Oracle 大資料量去重實驗

資料庫大資料量去重實現方式

Oracle大資料量遷移

大資料量演算法

相關推薦