selenium例項 唯品會爬取

2021-08-19 13:27:59 字數 2306 閱讀 6841

import json

from selenium import webdriver

from time import sleep

from bs4 import beautifulsoup

import urllib.parse

class weispider(object):

def __init__(self,url,shang,start_page,end_page):

self.url = url

self.shang = shang

self.start = start_page

self.end = end_page

def handle_url(self,page):

data =

url = urllib.parse.urlencode(data)

url = self.url +url

return url

#頁面def handle_page(self,url):

driver = webdriver.chrome()

driver.get(url)

for i in range(10):

js = "document.body.scrolltop += '1000'"

driver.execute_script(js)

sleep(2)

return driver.page_source

def download(self,res):

soup = beautifulsoup(res,'lxml')

wei_list = soup.select('.goods-inner')

print(wei_list)

items =

for wei in wei_list:

item = {}

title = wei.select('.goods-title-info')[0].get_text()

nowprice = wei.select('.goods-sells-price')[0].get_text()

zhekou = wei.select('.good-title-pms')[0].get_text()

oldprice = wei.select('.goods-market-price')[0].get_text()

discount = wei.select('.goods-price-info')[0].get_text()

img = wei.select('.goods-image-link img')[0]['src']

item['title'] = title

item['nowprice'] = nowprice

item['zhekou'] = zhekou

item['oldprice'] = oldprice

item['discount'] = discount

item['img'] = img

return items

#爬取def weipinspider(self):

infos =

for page in range(self.start,self.end+1):

url = self.handle_url(page)

res = self.handle_page(url)

info = self.download(res)

infos += info

jsonfile = json.dumps(infos,ensure_ascii=false)

with open('vip.json', 'w', encoding='utf-8')as fp:

fp.write(jsonfile)

def main():

url = ""

shang = input("請輸入你要爬取的商品名")

start_page = int(input("爬取的起始頁"))

end_page = int(input("爬取的結束頁"))

spider = weispider(url=url,shang=shang,start_page=start_page,end_page=end_page)

spider.weipinspider()

if name == "main":

main()
chrome是谷歌瀏覽器驅動【有介面的瀏覽器驅動】選擇chrome是注意是否與本機的谷歌瀏覽器匹配

phantomjs是乙個無介面的瀏覽器【執行較快】但是該產品已經不在更新

爬取唯品會首頁商品分類及鏈結

很久之前做的東西。一直沒時間分享,今天有空正好分享出來。想做個爬取唯品會首頁的商品分類和鏈結的指令碼。第一反應是用beautifulsoup。但是在瀏覽器裡除錯了很久沒有發現鏈結,無奈只能放棄了使用beautifulsoup。嘗試了抓包,我們看看下面是抓到的包 我們可以很清楚的看到,兩個介面除了id...

2013 3 28 唯品會筆試題

唯品會筆試題 1.const關鍵字作用是什麼?static 關鍵字作用是什麼?2.先進先出 後進先出的典型資料結構分別是什麼?用你熟悉的語言編寫其中乙個資料結構。3.任選一題 1 協議狀態碼 200 302 404 500分別代表什麼?2 tcp協議和 udp協議的區別是什麼?各舉乙個典型的應用場景...

selenium 爬取拉勾

用到的知識點 用selenium爬取拉勾 from lxml import etree from selenium import webdriver from selenium.webdriver.support import expected conditions as ec from selen...