學如逆水行舟,不進則退
今天想看**..找了半天,沒有資源..
只能自己爬了
想了半天.,,,忘記了這個古老的技能
撿了一下
import requestsfrom
bs4 import beautifulsoup
cookies =
headers =
response = requests.get('
', headers=headers, cookies=cookies)
# print(response.text)
class downloder(object
): def __init__(self):
self.server = '
'self.target = '
/1_1094/
'self.names = #存放章節名字
self.urls = #存放章節鏈結
self.nums = 0
# 章節數量
def get_download_url(self):
req = requests.get('
', headers=headers, cookies=cookies)
html =req.text
# print(html)
div_bf =beautifulsoup(html)
div = div_bf.find_all('
div',id='
list')
a_bf = beautifulsoup(str(div[0
])) a = a_bf.find_all('a'
)
for each in
a:string)'
'+each.get('
href'))
self.nums =len(a)
def writer(self, name, path, text):
write_flag =true
with open(path, 'a
', encoding='
utf-8
') as
f: f.write(name + '\n'
) f.writelines(text)
f.writelines(
'\n\n')
def get_contents(self, target):
req = requests.get(url=target)
html =req.content
# print(
'html
',html)
bf =beautifulsoup(html)
texts = bf.find_all('
div', id='
content')
texts=str(texts[0]).replace('
','\n')
# print(
'texts
',texts)
# texts = texts[0].text.replace('
', '
\n\n')
# texts = texts[0].text.replace('
', '
\n\n')
# texts = texts[0].text.replace('
', '
\n\n')
# texts = texts[0].text.replace('
', '
\n\n')
return
texts
if __name__ == '
__main__':
dl =downloder()
dl.get_download_url()
# print(dl.urls)
print(dl.nums)
print(''
)
for i in
range(dl.nums):
dl.writer(dl.names[i],
'用點.txt
', dl.get_contents(dl.urls[i]))
print('第
'+str(i)+'')
print(
"")
不是什麼難的東西....
筆趣閣小說 python3爬蟲例項
import urllib.request import re from bs4 import beautifulsoup as bs def urlopen url req urllib.request.request url html urllib.request.urlopen req htm...
Python爬蟲 筆趣閣小說爬取
import requests from lxml import etree以 我有百萬技能點 為例,在筆趣閣搜尋進入目錄頁,複製目錄頁url 對目錄頁的每個章節的url進行爬取,分析網頁利用xpath定位每個章節的url然後進行爬取,然後重新構造url。目錄每一章節的url href html e...
初級爬蟲爬取筆趣閣小說
import requests from pyquery import pyquery as pq def get content a response requests.get a response.encoding gbk doc pq response.text text doc conten...