C 抓取網頁類（獲取網頁中所有資訊）

using system;

using system.data;

using system.configuration;

using system.net;

using system.io;

using system.text;

using system.collections.generic;

using system.text.regularexpressions;

using system.threading;

using system.web;

using system.web.ui.mobilecontrols;

///

/// 網頁類

///

public class webpage

}///

/// 通過此屬性可獲得本網頁的標題，唯讀

///

public string title

return m_title;}}

public string m_html

return m_html;}}

///

/// 此屬性獲得本網頁的所有鏈結資訊，唯讀

///

public listlinks

}///

/// 此屬性返回本網頁的全部純文字資訊，唯讀

///

public string context

}///

/// 此屬性獲得本網頁的大小

///

public int pagesize

}///

/// 此屬性獲得本網頁的所有站內鏈結

///

public listinsitelinks

}///

/// 此屬性表示本網頁是否可用

///

public bool isgood

}///

/// 此屬性表示網頁的所在的**

///

public string host

}#endregion

///

/// 從html**中分析出鏈結資訊

///

/// list

private listgetlinks()

catch (exception ex) ;

match = match.nextmatch();}}

}return m_links;

}///

/// 此私有方法從一段html文字中提取出一定字數的純文字

///

/// html**

/// 提取從頭數多少個字

/// 是否要鏈結裡面的字

/// 純文字

private string getfirstnchar(string instr, int firstn, bool withlink)

return m_outstr.length > firstn ? m_outstr.substring(0, firstn) : m_outstr;

}#region 公有文法

///

/// 此公有方法提取網頁中一定字數的純文字，包括鏈結文字

///

/// 字數

///

public string getcontext(int firstn)

///

/// 此公有方法從本網頁的鏈結中提取一定數量的鏈結，該鏈結的url滿足某正則式

///

/// 正則式

/// 返回的鏈結的個數

/// list

public listgetspeciallinksbyurl(string pattern, int count)

}return speciallinks;

}///

/// 此公有方法從本網頁的鏈結中提取一定數量的鏈結，該鏈結的文字滿足某正則式

///

/// 正則式

/// 返回的鏈結的個數

/// list

public listgetspeciallinksbytext(string pattern, int count)

}return speciallinks;

}///

/// 這公有方法提取本網頁的純文字中滿足某正則式的文字

///

/// 正則式

/// 返回文字

public string getspecialwords(string pattern)

#endregion

#region 建構函式

//該處視情況而定有的需要解碼

}#endregion}呼叫

webpage webinfo = new webpage("**");

webinfo.context;//不包含html標籤的所有內容

webinfo.m_html;//包含html標籤的內容

c 抓取網頁類（獲取網頁中所有資訊）

using system using system.data using system.configuration using system.net using system.io using system.text using system.collections.generic using sy...

php 抓取網頁資訊

最近要抓取網頁資料，就用php試了下，發現了乙個不錯的php抓取資料的整合類 html dom.php，而根據網頁的特點大致分為兩類，一類是網頁dom結構相同，url類似只是改了部分引數，這個直接用 html file get html url 然後根據網頁dom用 html find plain...

動態抓取網頁資訊

前幾天在做資料庫實驗時，總是手動的向資料庫中新增少量的固定資料，於是就想如何向資料庫中匯入大量的動態的資料？在網上了解了網路爬蟲，它可以幫助我們完成這項工作，關於網路爬蟲的原理和基礎知識，網上有大量的相關介紹，本人不想在累述，個人覺得下面的文章寫得非常的好網路爬蟲基本原理一網路爬蟲基本原理二 ...

C 抓取網頁類（獲取網頁中所有資訊）

c 抓取網頁類（獲取網頁中所有資訊）

php 抓取網頁資訊

動態抓取網頁資訊

相關推薦