import timeimport requests
#執行緒池、程序池
from concurrent.futures import threadpoolexecutor,processpoolexecutor
#多執行緒:
from threading import thread
#多程序:
from multiprocessing import process
#程序池:
from multiprocessing import pool
from bs4 import beautifulsoup
#匯入cpu_count檢視cpu資訊獲取本機cpu核數:
from multiprocessing import cpu_count
def task(url):
#format格式化頁數:
response = requests.get("".format(url))
#獲取編碼:
# print(response.encoding)
#轉碼:
response.encoding = "gbk"
#獲取文字:
text = response.text
#解析文字:
soup = beautifulsoup(text,"html.parser")
#獲取div:
div = soup.find(name = "div",attrs=)
#獲取img:
img_list = div.find_all(name = "img")
#獲取第乙個鏈結和長度:
"""程序池一般開cpu核數、執行緒池開cpu核數的2-5倍、"""
# print(cpu_count())
stat = time.time()
#開啟程序池、4核是4程序乘以2總共是8個程序:
p = processpoolexecutor(max_workers=cpu_count())
for i in range(1,110):
p.submit(task,i)
p.shutdown()
print("耗時:%s" %(time.time() - stat))
爬取汽車之家
爬汽車之家新聞 爬取汽車之家新聞 import requests 向汽車之家傳送get請求,獲取到頁面 ret requests.get print ret.text 用bs4解析 from bs4 import beautifulsoup 例項化得到物件,傳入要解析的文字,解析器 html.par...
PYTHON爬取汽車之家資料
使用知識 使用說明 源 usr bin env python coding utf 8 time 2020 1 16 15 34 author wsx site file cars.py software pycharm import json from multiprocessing import...
Python練習 scrapy 爬取汽車之家文章
autohome.py spider檔案 coding utf 8 import scrapy from autohome.items import autohomeitem class autohomespider scrapy.spider name autohome allowed domai...