感覺最近做的東西好菜~~隨便了。
('d://鏈家房價資料.csv'
,'wt'
,newline=
'',encoding=
'utf8'
)writer = csv.writer(fp)
writer.writerow(
('樓盤名'
,'位址'
,'房間格式'
,'房間面積'
,'**'
,'起價'
,'優點'))
defget_html
(url)
:try
: response = requests.get(url, headers=headers)
if response.status_code ==
200:
return response.content.decode(
'utf8'
)else
('1'
)return
none
except
('2'
)return
none
defget_info
(html)
: selector = etree.html(html)
li_list = selector.xpath(
)for li in li_list:
try:
name = li.xpath(
"div[@class='resblock-name']/a[@class='name ']/text()")[
0]adress_1 = li.xpath(
"div[@class='resblock-location']/span[1]/text()")[
0]adress_2 = li.xpath(
"div[@class='resblock-location']/span[2]/text()")[
0]adress_3 = li.xpath(
"div[@class='resblock-location']/a/text()")[
0]adress = adress_1 +
'/'+ adress_2 +
'/'+ adress_3
how_many_1 = li.xpath(
"a[@class='resblock-room']/span[1]/text()")[
0]how_many_2 = li.xpath(
"a[@class='resblock-room']/span[2]/text()"
)if how_many_2:
how_many_1 = how_many_1 +
'/'+ how_many_2[0]
else
:pass
minaji = li.xpath(
"div[@class='resblock-area']/span/text()")[
0]price = li.xpath(
"div[@class='resblock-price']/div[@class='main-price']/span[@class='number']/text()")[
0]price +=
'元/平(均價)'
qijia = li.xpath(
"div[@class='resblock-price']/div[@class='second']/text()")[
0]advantge = li.xpath(
"div[@class='resblock-tag']//text()"
) mylist =
for i in advantge:
j = i.strip()if
len(j)==0
:continue
else
: real_advantge =
','.join(mylist)
x =[name, adress, how_many_1, minaji, price, qijia, real_advantge]
(x) writer.writerow(x)
except
:pass
if __name__ ==
'__main__'
: urls =
[''.format
(i)for i in
range(1
,19)]
for url in urls:
html = get_html(url)
get_info(html)
結果
Python爬取鏈家房價資訊
房子問題近些年來越來越受到大家的關注,要了解近些年的房價,首先就要獲取網上的房價資訊,我們以鏈家網上 的房價資訊為例,將資料爬取下來並儲存起來。這次資訊的爬取我們依然採取requests beautiful soup的線路來爬取鏈家網上的 房的資訊。需要安裝好anaconda,並保證系統中已經有re...
Python爬蟲實戰之爬取鏈家廣州房價 03儲存
系列目錄 python爬蟲實戰之爬取鏈家廣州房價 01簡單的單頁爬蟲 python爬蟲實戰之爬取鏈家廣州房價 02把小爬蟲變大 這一小節主要講一下前面一直沒有實現的儲存,儲存主要分兩大類 檔案和資料庫。結合這次爬蟲的資料量及後期分析的需要,這次主要介紹sqlite。通過對sqlite資料庫的封裝,處...
python爬蟲 爬取Q房網房價
from lxml import etree 從lxml中匯入etree import requests import csv import time defwritecsv item 定義寫入函式 with open qfang.csv a encoding utf 8 as f writer c...