Python爬蟲基礎學習案例

以下案例僅為個人學習，無侵權意識

爬取搜狗指定詞條對應的搜尋結果頁面（簡易網頁採集器）：

#!/usr/bin/env python
# encoding: utf-8
"""@file: 獲取搜狗首頁.py
@time: 2020/2/17 9:14
"""import requests
if __name__ ==
'__main__'
:# step1.指定url
url =
""# step2.發起請求
# get方法會返回乙個響應物件
response = requests.get(url=url)
# step3.獲取響應資料
page_text = response.text
print
(page_text)
# step4.持久化儲存
with
open
('./sogou.html'
,'w'
, encoding=
'utf-8'
)as fp:
fp.write(page_text)
print
('爬取資料結束！！！'
)

#!/usr/bin/env python
# encoding: utf-8
"""@time: 2020/2/17 12:51
"""import json
import requests
defpost_bai_du()
:# 1、指定url
post_url =
''# 2、進行ua偽裝
headers =
# 3、post請求引數處理（同get請求一致）
word =
input
('enter a word：'
)    data =
# 4、傳送請求
response = requests.post(url=post_url, data=data, headers=headers)
# 5、獲取響應資料，.json()方法返回的是obj（如果確認相應的資料是json型別的，才可以使用json()）
response.json(
)    dic_obj = response.json(
)print
(dic_obj)
# 持久化儲存（即時翻譯不需要）
# filename = word + '.json'
# fp = open(filename, 'w', encoding='utf-8')
# json.dump(dic_obj, fp=fp, ensure_ascii=false)
print
('over!'
)if __name__ ==
'__main__'
:    post_bai_du(
)

爬取豆瓣電影分類排行榜中的電影詳情資料：

#!/usr/bin/env python
# encoding: utf-8
"""@file: 爬取豆瓣電影分類排行榜.py
@time: 2020/2/17 13:21
"""import requests
import json
defget_dou_ban()
:    url =
'j/chart/top_list'
param =
headers =
response = requests.get(url=url, params=param, headers=headers)
list_data = response.json(
)#持久化儲存
# fp = open('./douban.json', 'w', encoding='utf-8')
# json.dump(list_data, fp=fp, ensure_ascii=false)
print
(list_data)
print
('over!'
)if __name__ ==
'__main__'
:    get_dou_ban(
)

爬取肯德基餐廳查詢中指定地點的餐廳資料：

#!/usr/bin/env python
# encoding: utf-8
"""@file: 根據城市查詢肯德基餐廳資訊.py
@time: 2020/2/17 20:56
"""import requests
defpost_kfc()
:pass
url =
""word =
input
('請輸入城市：'
)    data =
headers =
response = requests.post(url=url, data=data, headers=headers)
list_data = response.json(
)print
(list_data)
print
('over！'
)if __name__ ==
'__main__'
:    post_kfc(
)

#!/usr/bin/env python
# encoding: utf-8
"""@file: 國家藥監局化妝品許可證.py
@time: 2020/2/17 21:58
"""import requests
defpost_gy()
:pass
url =
""word =
int(
input
('頁碼：'))
for page in
range(1
, word +1)
:        data =
headers =
response = requests.post(url=url, data=data, headers=headers)
list_data = response.json(
)print
(list_data)
print
('over！'
)if __name__ ==
'__main__'
:    post_gy(
)

希望通過這些學習案例，讓自己對爬蟲思想有乙個更深的體會，也希望自己更快更好地掌握爬蟲技術

python爬蟲案例 Python爬蟲案例集合

在python2.x裡面有urllib和urllib2 在python3.x裡面就把urllib和urllib2合成乙個urllib urllib3是在python3.x了裡面新增的第三方擴充套件。import urllib.request 向指定的url位址傳送請求，並返回伺服器響應的類檔案物件 ...

python案例 Python爬蟲案例集合

urllib2 在python2.x裡面有urllib和urllib2 在python3.x裡面就把urllib和urllib2合成乙個urllib urllib3是在python3.x了裡面新增的第三方擴充套件。urllib2 官方文件 urllib2 原始碼 urllib2 在 python3....

python爬蟲案例講解 Python爬蟲案例集合

伺服器返回的類檔案物件支援python檔案物件的操作方法 read 方法就是讀取檔案裡的全部內容，返回字串 html response.read 列印響應內容我們需要稍微偽裝下，要不然第一步就會被反爬蟲發現 usr bin env python coding utf 8 import urllib...

Python爬蟲基礎學習案例

python爬蟲案例 Python爬蟲案例集合

python案例 Python爬蟲案例集合

python爬蟲案例講解 Python爬蟲案例集合

相關推薦