#時間 2023年3月4日19:16:06
#:
#功能:爬取筆趣閣任何**。
from urllib import
request
from bs4 import beautifulsoup
#此函式用來獲取每章對應的**,並儲存**
defsecondopenurl(url,ch_name):
#請求每章詳細內容
date = request.urlopen(url).read().decode('
gbk'
) soup = beautifulsoup(date, '
html.parser
').find('
div', attrs=)
#對正文進行處理,去除多餘的字
novel = str(soup).replace('
', '').replace('
', '').replace('
', ''
)
#換成相對應的儲存位址。最好是絕對路徑
filepath = '
../day02/novel_劍來/劍來/%s
'%(ch_name)
with open(filepath, 'w
', encoding='
utf-8
') as f:
f.write(novel)
f.close()
print('
%s-》快取完成
'%(ch_name))
##換成相對應的書本詳情頁的**鏈結
url = '
/3_3109/'#
解碼可能是utf-8,如果儲存請換成utf-8
page = request.urlopen(url).read().decode('
gbk'
)soup = beautifulsoup(page,'
html.parser')
chapter_1 = soup.find_all('dd'
)chapter_2 = chapter_1[9:]
for ch in
chapter_2:
str1 =str(ch)
url = '
'+beautifulsoup(str(ch),'
html.parser
').a['
href']
chapter_name =str(ch.string)
(url,ch.string)
secondopenurl(url,chapter_name
用的beautifulsoup寫的,比較簡單。寫得也不好,多見諒。個人興趣編寫,僅供學習交流,請大家支援正版!支援正版!正版!
初級爬蟲爬取筆趣閣小說
import requests from pyquery import pyquery as pq def get content a response requests.get a response.encoding gbk doc pq response.text text doc conten...
Python爬蟲 筆趣閣小說爬取
import requests from lxml import etree以 我有百萬技能點 為例,在筆趣閣搜尋進入目錄頁,複製目錄頁url 對目錄頁的每個章節的url進行爬取,分析網頁利用xpath定位每個章節的url然後進行爬取,然後重新構造url。目錄每一章節的url href html e...
04筆趣閣小說爬取 爬取整部小說
考慮到爬取時間有點長,再加上一行資訊充當進度條。完整 如下 import requests from bs4 import beautifulsoup 獲取章節名稱和鏈結 target 目錄頁位址 req requests.get url target 使用beautifulsoup 篩選出id l...