目錄
# requests庫
## requests.get(url) 模擬瀏覽器開啟網頁
# re庫
import requests
import re
response = requests.get('') # 模擬瀏覽器開啟網頁
# print(response.status_code) # 200成功,301,404網頁丟失
# print(response.encoding) # utf-8
data = response.text #
# print(data)
# .匹配所有字元,*表示前面的字元0到無窮個
content_res = re.findall('(.*?)
', data)
title_res = re.findall('(.*?)', data)
# print(title_res.index('活得糊塗的人,容易幸福'))
# print(title_res.index('購買銀行理財產品虧損後如何起訴'))
title_res= title_res[10:60]
# print(title_res)
title_content_dic = {}
for i in range(len(title_res)):
title_content_dic[title_res[i]] = content_res[i]
# print(title_content_dic)
# print(title_content_dic)
for i in title_content_dic.items():
# print(str(i)+'\n')
print(f' | ')
import requests
import re
response = requests.get('') # 模擬瀏覽器開啟網頁
data = response.text
res = re.findall('(.*?)',data)
title_content_desc_dic = {}
for i in res:
content = re.findall('(.*?)
',i)[0]
title = re.findall('(.*?)',i)[0]
desc = re.findall('(04月.*?)
',i)[0]
title_content_desc_dic[title] = (content,desc)
for i in title_content_desc_dic.items():
print(f' | ')
import requests
import re
response = requests.get('')
data = response.text
# print(data)
img_url_res = re.findall('data-src="(.*?)"',data)
for i in img_url_res:
img_response = requests.get(i)
img_data = img_response.content
img_name = i.split('/')[-1]
f=open(img_name,'wb')
f.write(img_data)
# f.flush() # 快速重新整理
import requests
import re
response = requests.get('')
# response.encoding = 'utf8'
data = response.text
# print(data)
# mp4_res1 = re.findall('',data)
# for i in mp4_res1:
# print(i)
mp4_res2 = re.findall('', data)
for i in mp4_res2: # type:str
res = re.findall('(.*?htm)', i)[0]
res = '' + res
response = requests.get(res)
data = response.text
# url_res = re.findall('//video (.*?.mp4)',data)[0]
mp4_response = requests.get(url_res)
mp4_data = mp4_response.content
f = open('test.mp4','wb')
f.write(mp4_data)
# break
''''''
python爬蟲簡單 python爬蟲 簡單版
學過python的帥哥都知道,爬蟲是python的非常好玩的東西,而且python自帶urllib urllib2 requests等的庫,為爬蟲的開發提供大大的方便。這次我要用urllib2,爬一堆風景。先上重點 1 response urllib2.urlopen url read 2 soup...
簡單python爬蟲
一段簡單的 python 爬蟲程式,用來練習挺不錯的。讀出乙個url下的a標籤裡href位址為.html的所有位址 一段簡單的 python 爬蟲程式,用來練習挺不錯的。讀出乙個url下的a標籤裡href位址為.html的所有位址 usr bin python filename test.py im...
Python簡單爬蟲
一.獲取整個頁面的資料 coding utf 8 import urllib defgethtml url page urllib.urlopen url html page.read return html html gethtml print html 二.篩選需要的資料 利用正規表示式來獲取想...