1#12 url = '
'3#2.
4 response = requests.get(url=url)5#
3.6 page_text =response.text7#
4.8 with open('
./sogou.html
','w
',encoding='
utf-8
') as fp:
9 fp.write(page_text)
1通過抓包,獲取請求攜帶的引數,import
requests
2 url = "
'web
"3 wd = input("
請輸入搜尋關鍵字")
4 param =
78 response = requests.get(url=url,params=param).content
9 filename = wd+'
.html
'10 with open(filename,'
w',encoding='
utf8
') as f1:
11 f1.write(response)
儲存每頁獲取的資料的id,編輯new_url,獲取詳情的資訊
1bs4解析:import
requests
2 url = '
'3 headers =
6 param =
15 id_list =
16 json_object = requests.post(url=url,headers=headers,params=param).json()
17print(json_object['
list'])
18for i in json_object['
list']:
id'])20
21 new_url = '
'22 filename = '
yaojians.text
'23 f = open(filename,'
w',encoding='
utf8')
24for id in
id_list:
25 param =
28 content_json = requests.post(url=new_url,params=param,headers=headers).json()
29 f.write(str(content_json)+'\n'
)303132
pip install bs4
pip install lxml
解析原理
1、將要進行解析的原始碼載入到bs物件
2、呼叫bs物件中相關的方法或屬性進行原始碼中的相關標籤的定位
3、將定位到的標籤之間存在的文字或屬性值獲取
1import
requests
2from bs4 import
beautifulsoup
34 url = '
'5 headers =
8 res = requests.get(url=url, headers=headers).text
9 soup = beautifulsoup(res, '
lxml')
10 a_tags_list = soup.select('
.book-mulu > ul > li > a')
11 filename = '
snaguo.text
'12 fp = open(filename, '
w', encoding='
utf-8')
13for a_tag in
a_list:
14 title =a_tag.string
15 detail_url = "
"+a_tag["
href"]
16 detail_content = requests.get(url=detail_url, headers=headers).text
17 soup = beautifulsoup(detail_content, "
lxml")
18 detail_text = soup.find('
div', class_="
chapter_content
").text
19 fp.write(title+'
\n'+detail_text)
20print(title, '')
21print('
over')
22fp.close()
23
1 url = ''2 start_page = int(input("
請輸入起始頁:"))
3 end_page = int(input("
請輸入結束頁:"))
4 headers =
7for page in range(start_page,end_page+1):
8 new_url = format(url%page)
9 response = requests.get(url=new_url, headers=headers).text10#
每一頁的url
獲取到當前的二進位製流
17 content = requests.get(url=detail_url,headers=headers).content18#
切割 把路徑最後的字元作為名
19 image_name = image_url.split('
/')[-1]
20 with open('
./qiutu/
'+image_name,'wb'
)as f1:
21f1.write(content)
22print('
over')
requests模組的簡單使用
簡單來說,requests就是乙個可以模擬傳送請求的庫。安裝方法 pip install requests幫助文件 直接呼叫requests模組的get方法就可以傳送get請求。返回值為乙個response物件 import requests url response requests.get ur...
使用requests模組進行簡單爬蟲
我最近在學習python爬蟲相關的內容,打算學一點寫一點,一邊總結一邊學習。使用requests模組可以對一些缺乏反爬蟲限制的 進行爬取。本次爬取的是貓眼的電影排行,利用url中offset的設定即可爬取前十頁每一頁的html 再通過re模組使用正規表示式提取網頁中我們需要的成分,然後將這些成分以字...
requests模組的使用
寫在前面的話 在學習爬蟲入門時,會常用到requests模組,熟悉這個模組的使用需要熟悉http,https 及瀏覽器的請求原理。初次接觸爬蟲時了解下,掌握瀏覽器的請求過程和爬蟲的本質,學起來就輕鬆多啦。get response requests.get url,headers headers ge...