#爬取【京客隆超市】店鋪資訊
匯入所需庫
import requests
import pandas as pd
from lxml import etree
爬取各區鏈結
url =
''headers =
response = requests.get(url,headers=headers)
.text
html = etree.html(response)
city_name = html.xpath(
'//div[@class="infolis"]//a/text()'
)city_name =
[i.strip(
)for i in city_name]
city_url = html.xpath(
'//div[@class="infolis"]//a/@href'
)city_url =
[''+ i for i in city_url]
3.當只存在乙個大區需要翻頁時
for i in city_url:
if i ==
'shoplis.aspx?id=865'
:for a in
range(1
,4):
date =
response3 = requests.post(url = i, data=date,headers=headers)
.text
html2 = etree.html(response3)
city_shop_name = html2.xpath(
'//span[@class="con01"]/text()'
)
city_shop_dis = html2.xpath(
'//span[@class="con02"]/text()'
) city_shop_phone = html2.xpath(
'//span[@class="con03"]/text()'
) city_shop_time = html2.xpath(
'//span[@class="con04"]/text()'
) shop_name =
[d.strip(
)for d in city_shop_name]
print
(shop_name)
print
('*'*30
) date = pd.dataframe(
) date.to_csv(
"e:/爬取爬取【京客隆超市】店鋪資訊.csv"
,index=
false
,header=
0,mode=
"a",encoding =
"ansi"
)else
: response1 = requests.post(url=i,headers=headers)
.text
html1 = etree.html(response1)
city_shop_name1 = html1.xpath(
'//span[@class="con01"]/text()'
) city_shop_dis1 = html1.xpath(
'//span[@class="con02"]/text()'
) city_shop_phone1 = html1.xpath(
'//span[@class="con03"]/text()'
) city_shop_time1 = html1.xpath(
'//span[@class="con04"]/text()'
) shop_name1 =
[c.strip(
)for c in city_shop_name1]
print
(shop_name1)
#資料儲存
date = pd.dataframe(
) date.to_csv(
"e:/爬取爬取【京客隆超市】店鋪資訊.csv"
,index=
false
,header=
0,mode=
"a",encoding =
"ansi"
)
#完成**
#爬取【京客隆超市】店鋪資訊
import requests
import pandas as pd
from lxml import etree
url =
''headers =
response = requests.get(url,headers=headers)
.text
html = etree.html(response)
city_name = html.xpath(
'//div[@class="infolis"]//a/text()'
)city_name =
[i.strip(
)for i in city_name]
city_url = html.xpath(
'//div[@class="infolis"]//a/@href'
)city_url =
[''+ i for i in city_url]
for i in city_url:
if i ==
'shoplis.aspx?id=865'
:for a in
range(1
,4):
date =
response3 = requests.post(url = i, data=date,headers=headers)
.text
html2 = etree.html(response3)
city_shop_name = html2.xpath(
'//span[@class="con01"]/text()'
)
city_shop_dis = html2.xpath(
'//span[@class="con02"]/text()'
) city_shop_phone = html2.xpath(
'//span[@class="con03"]/text()'
) city_shop_time = html2.xpath(
'//span[@class="con04"]/text()'
) shop_name =
[d.strip(
)for d in city_shop_name]
print
(shop_name)
print
('*'*30
) date = pd.dataframe(
) date.to_csv(
"e:/爬取【京客隆超市】店鋪資訊.csv"
,index=
false
,header=
0,mode=
"a",encoding =
"ansi"
)else
: response1 = requests.post(url=i,headers=headers)
.text
html1 = etree.html(response1)
city_shop_name1 = html1.xpath(
'//span[@class="con01"]/text()'
) city_shop_dis1 = html1.xpath(
'//span[@class="con02"]/text()'
) city_shop_phone1 = html1.xpath(
'//span[@class="con03"]/text()'
) city_shop_time1 = html1.xpath(
'//span[@class="con04"]/text()'
) shop_name1 =
[c.strip(
)for c in city_shop_name1]
print
(shop_name1)
#資料儲存
date = pd.dataframe(
) date.to_csv(
"e:/爬取【京客隆超市】店鋪資訊.csv"
,index=
false
,header=
0,mode=
"a",encoding =
"ansi"
)#如果區域內店鋪不止一頁,且只有一頁時
帖子資訊爬取
coding utf 8 import requests import re j dub 小吧主 13 有的人天之驕子,從選秀便是球隊核心有的人甘作綠葉,一心一意幹好自己的事有的人能力有限,最後只能被聯盟淘汰而有的人 雖有天賦,但是球隊的處境讓他無法讓他的天賦得到兌現,而他們可能在離開球隊後,便得到...
qt爬取網頁資訊 簡單幾步完成網頁資訊爬取
什麼是爬蟲 2.爬蟲能幹些什麼 爬蟲能夠做到哪些應用,上面已經提到一點是可以批量爬取網頁的資料資訊,並用於後續業務研究 爬蟲還可以爬取深層次的使用者個人資訊 恭喜你,到這一步就離查水表不遠了 個人理解淺薄請大家補充 3.爬蟲幾大步驟 網頁資料爬蟲 python 3.1 導包 3.2 請求資料 請求資...
安居客資訊爬取
本篇是我第一次利用bs寫的爬蟲 爬取 每頁的 變數是p後的數字,可能因為這是老早之前寫的 了,所以現在一看,發現並沒有什麼難的,掌握基本要素即可。廢話不多說,直接上 吧!encoding utf8 import re import urllib import urllib2 from bs4 imp...