import pymysql
import pymysql.cursors
from bs4 import beautifulsoup
import requests
import random
import time
from selenium import webdriver
from selenium.webdriver.common.by import by
from selenium.webdriver.support.ui import webdriverwait
from selenium.webdriver.support import expected_conditions as ec
import codecs
from selenium.common.exceptions import timeoutexception
cars =
conn = pymysql.connect(host=
'*******'
,charset=
'utf8'
,user=
*******
',passwd='
****
*',db='mysql',cursorclass=pymysql.cursors.dictcursor)
try:
cur = conn.cursor(
) cur.execute(
"use data_etl"
) cur.execute(
"select distinct(car_id),car_name from user_car_port"
) item = cur.fetchone(
) count =
0while item is
notnone
: count+=
1 item = cur.fetchone(
)print
(count)
finally
: conn.close(
)
driver = webdriver.chrome(
'chromedriver.exe'
)
def
getcarpriceoffsale
(innerhtml)
: button =
0.0 top =
0.0print
("此車型已經停售!"
) bsobj = beautifulsoup(innerhtml)
try:
spanprice = bsobj.findall(
"span",)
[0]if spanprice is
notnone
: strongprice = spanprice.find(
"strong",)
if strongprice is
notnone
: text = strongprice.text
if text is
notnone
: prices = text.split(
"-")
prices = text.split(
"-")
prices[0]
= prices[0]
.replace(
"萬","")
prices[0]
= prices[0]
.replace(
"元","")
button =
float
(prices[0]
)if(len
(prices)==2
):prices[1]
= prices[1]
.replace(
"萬","")
prices[1]
= prices[1]
.replace(
"元","")
top =
float
(prices[1]
)else
: top = button
else
:print
("**欄位為空"
)else
:print
("**strong為空"
)else
:print
("**span為空"
)except exception:
print
("程式出錯!停售車型"
)return button,top
def
getcarpriceonsale
(innerhtml)
: button =
0.0 top =
0.0print
("此車型在售"
) bsobj = beautifulsoup(innerhtml)
try:
ddprice = bsobj.findall(
"dd")[
0]if ddprice is
notnone
: a = ddprice.find(
"a",
)if a is
notnone
: text = a.text
prices = text.split(
"-")
prices[0]
= prices[0]
.replace(
"萬","")
prices[0]
= prices[0]
.replace(
"元","")
button =
float
(prices[0]
)if(len
(prices)==2
):prices[1]
= prices[1]
.replace(
"萬","")
prices[1]
= prices[1]
.replace(
"元","")
top =
float
(prices[1]
)else
: top = button
else
:print
("此車型暫時無法查詢**"
)except exception:
print
("程式出錯!在售車型"
)return button,top
def
getcarprice
(carid)
: button =
0.0 top =
0.0try
: driver.get(url+
str(carid)
) wait = webdriverwait(driver,5)
.until(ec.presence_of_element_located(
(by.class_name,
"information-summary"))
) ele = driver.find_element_by_class_name(
"information-price"
).get_attribute(
'innerhtml'
) button,top=getcarpriceonsale(ele)
except timeoutexception:
try:
wait = webdriverwait(driver,5)
.until(ec.presence_of_element_located(
(by.class_name,
"car_price"))
) ele = driver.find_element_by_class_name(
"car_price"
).get_attribute(
'innerhtml'
) button,top=getcarpriceoffsale(ele)
except timeoutexception:
print
("此車型有問題:"
+str
(carid)
)return button,top
for car in cars:
id= car[
"car_id"
] time.sleep(random.randint(1,
5)) button,top = getcarprice(id)
if button ==
0.0and top ==
0.0:
car[
"button"]=
9999
car[
"top"]=
9999
else
: car[
"button"
]= button
car[
"top"
]= top
Python實戰爬蟲 爬取段子
不管三七二十一我們先導入模組 段子所在的 import re import requests 如果沒這模組執行cmd pip install requests領域 web開發,爬蟲,資料分析,資料探勘,人工智慧 零基礎到專案實戰,7天學習上手做專案 獲取 的內容 段子所在的 import re im...
python爬蟲實戰 爬取豆瓣影評資料
爬取豆瓣影評資料步驟 1 獲取網頁請求 2 解析獲取的網頁 3 提速資料 4 儲存檔案 1 匯入需要的庫 import urllib.request from bs4 import beautifulsoup 隨機數的庫 import random 時間庫 import time 庫 import ...
Python爬蟲實戰之爬取鏈家廣州房價 03儲存
系列目錄 python爬蟲實戰之爬取鏈家廣州房價 01簡單的單頁爬蟲 python爬蟲實戰之爬取鏈家廣州房價 02把小爬蟲變大 這一小節主要講一下前面一直沒有實現的儲存,儲存主要分兩大類 檔案和資料庫。結合這次爬蟲的資料量及後期分析的需要,這次主要介紹sqlite。通過對sqlite資料庫的封裝,處...