import requests
import threading
from queue import queue
from lxml import etree
import time
import wget
import os
urlqueue=queue(
)#鏈結佇列
threads_num=
100#執行緒數
threads=
urlist=
skin=
defgeturl
(urllist)
:for i in
range(1
,43):
""+str
(i)+
".shtml"
)return urllist
defdownload
(url)
:try
: r=requests.get(url)
r.raise_for_status
print
(r.url)
html=r.text
return html
except
:print
("抓取失敗"
)def
tree_parser
(html)
:try
: tre=etree.html(html)
skin=tre.xpath(
'//li[@class="boxshadow"]/a/img/@src'
)return skin
except
:print
("分析失敗"
)def
downloads()
:while
true
:global urlqueue
url = urlqueue.get(
)# 取乙個url
if url is
none
:break
t = download(url)
skin=tree_parser(t)
for i in skin:
try:
path =
"d:\\skin"
t = wget.download(i,os.path.join(path,i.split(
"1")[-
1]))
except
:print
("儲存失敗"
,i) urlqueue.task_done(
)print
("剩餘:"
,urlqueue.qsize())
defmain()
: list=geturl(urlist)
#獲取所有**鏈結
for i in list:
urlqueue.put(i)
#啟動執行緒,並放入執行緒池中
for i in
range
(threads_num)
: t=threading.thread(target=downloads())
t.start(
) urlqueue.join(
)#向佇列傳送n個none
for i in
range
(threads_num)
: urlqueue.put(
"none"
)for t in threads:
t.jion(
)print
("結束程式"
)starttime=time.time(
)main(
)times = time.time(
)- starttime
print
(times)
Python爬英雄聯盟lol全英雄面板
1.匯入所需模組 import requests import os 建立lol資料夾 os.mkdir lol 2.讀取js檔案,獲取英雄id hero id url response requests.get url,headers headers json list response.json...
python爬蟲 爬取英雄聯盟全英雄面板
import requests import re 1 分析目標網頁,確定爬取的url路徑,headers引數 base url headers 2 傳送請求 response requests.get base url,headers headers base data response.json...
LOL全英雄面板爬蟲
coding utf 8 import requests import re import os class lolspider def init self 定義乙個user agent,偽裝成瀏覽器 self.headers defgetresponse self,url 傳送請求,獲取響應 ur...