全文檢索初步學習筆記

本文主要是學習oracle text 對伺服器本地檔案進行檢索的學習筆記，主要參考oracle全文檢索這個電子書和網上的一些例子。

首先將dba許可權賦予你的登入賬號

begin

ctx_ddl.create_preference ('my_datastore_prefs', 'file_datastore');

ctx_ddl.set_attribute ('my_datastore_prefs', 'path', 'd:\file');

end;

--將伺服器(windows)d盤資料夾file設定成oracle讀取檔案的根目錄

begin

ctx_ddl.create_preference('url_pref','url_datastore');

ctx_ddl.set_attribute('url_pref','timeout','300');

end;

--設定url儲存型別(順便記錄下)

begin

ctx_ddl.create_preference('cs_filter', 'charset_filter');

ctx_ddl.set_attribute('cs_filter', 'charset', 'utf8');

end;

--設定字符集為utf-8，oracle預設為utf-8，txt檔案要改儲存型別為utf-8

begin

ctx_ddl.create_preference ('my_lexer', 'chinese_lexer');

end;

--中文分析器,據說比chinese_vgram_lexer 效能好，但是它是按片語查詢單字不一定能匹--配出來

begin

ctx_ddl.create_preference ('my_chinese_lexer', 'chinese_vgram_lexer');

end;

--同樣是中文分析器，單字可以查詢

create table mydocs( id number primary key, title varchar2(255), thefile

varchar2(255) );

--建立測試表

insert into mydocs( id, title, thefile ) values( 1, 'document1', 'test1.txt');

insert into mydocs( id, title, thefile ) values( 2, 'document2', 'test2.txt');

--插入測試資料,檔案不用帶路徑名,放在d:/file目錄下

insert into mydocs( id, title, thefile ) values( 5, '163', '');

insert into mydocs( id, title, thefile ) values( 6, 'qq', '');

--url的測試資料

commit;

create index mydocs_text_index on mydocs(thefile) indextype is ctxsys.context

parameters('datastore my_datastore_prefs filter cs_filter lexer my_lexer');

--對儲存檔案路徑的字段建立索引

--這時資料庫自動建立索引表其中$i為讀取檔案內容分析片語後建立的索引

--強制刪除索引(有時刪不掉)

drop index mydocs_text_index force

select id,title,m.thefile

from mydocs m

where contains( thefile, '查詢條件' ) > 0;

--查詢條件為 txt檔案中出現的字元

--模擬真實環境測試時，對大資料量的考慮將測試表建立成分割槽表，每個表空間對應乙個物--理硬碟

--建立oracle text索引需要消耗ctxsys使用者預設的臨時表空間空間。如果空間不夠的話將導--致 ora-01652 錯誤。你可以擴充套件ctxsys的臨時表空間，而不是發出命令的使用者預設的臨時--表空間。

--更改全文索引建立的表空間

begin

ctx_ddl.create_preference('mystore', 'basic_storage'); --建立storage

ctx_ddl.set_attribute('mystore', --設定引數

'i_table_clause',

'tablespace test_space03 storage (initial 1k)');