爬取**:爬蟲檔案部分爬取目標:獲得乙個地區七天之內的天氣狀況,並存入excel**中
import scrapy
from
..items import tianqiyubaoitem
class
tianqispider
(scrapy.spider)
: name =
'tianqi'
allowed_domains =
['weather.com.cn'
] start_urls =
['']def
parse
(self, response)
: item = tianqiyubaoitem(
) date = response.xpath(
"/html/body/div[@class='con today clearfix']/div[@class='left fl']/div[@class='left-div'][1]/div[@id='7d']/ul[@class='t clearfix']/li/h1/text()"
).extract(
) weather = response.xpath(
"/html/body/div[@class='con today clearfix']/div[@class='left fl']/div[@class='left-div'][1]/div[@id='7d']/ul[@class='t clearfix']/li/p[@class='wea']/text()"
).extract(
) max_wendu = response.xpath(
"/html/body/div[@class='con today clearfix']/div[@class='left fl']/div[@class='left-div'][1]/div[@id='7d']/ul[@class='t clearfix']/li/p[@class='tem']/span/text()"
).extract(
) min_wendu = response.xpath(
"/html/body/div[@class='con today clearfix']/div[@class='left fl']/div[@class='left-div'][1]/div[@id='7d']/ul[@class='t clearfix']/li/p[@class='tem']/i/text()"
).extract(
) wind = response.xpath(
"/html/body/div[@class='con today clearfix']/div[@class='left fl']/div[@class='left-div'][1]/div[@id='7d']/ul[@class='t clearfix']/li/p[@class='win']/i/text()"
).extract(
)for i in
range
(len
(date)):
# print(date[i],weather[i],max_wendu[i],min_wendu[i],wind[i],"\n")
item[
'日期'
]= date[i]
item[
'天氣'
]= weather[i]
item[
'最高溫'
]= max_wendu[i]
item[
'最低溫'
]= min_wendu[i]
item[
'風級'
]= wind[i]
# pass
yield item
items部分
import scrapy
class
tianqiyubaoitem
(scrapy.item)
:# define the fields for your item here like:
# name = scrapy.field()
日期 = scrapy.field(
) 天氣 = scrapy.field(
) 最高溫 = scrapy.field(
) 最低溫 = scrapy.field(
) 風級 = scrapy.field(
)# pass
settings部分
pipelines部分
class
tianqiyubaopipeline
:def
process_item
(self, item, spider)
:with
open
('天氣.csv'
,'a'
)as f:
item[
'日期'
]= item.get(
'日期'
) item[
'天氣'
]= item.get(
'天氣'
) item[
'最高溫'
]= item.get(
'最高溫'
) item[
'最低溫'
]= item.get(
'最低溫'
) item[
'風級'
]= item.get(
'風級'
) txt =
str.
format
('{},{},{},{},{}\n'
,item[
'日期'
],item[
'天氣'
],item[
'最高溫'
],item[
'最低溫'
],item[
'風級'])
f.write(txt)
return item
爬取結果 scrapy框架全站資料爬取
每個 都有很多頁碼,將 中某板塊下的全部頁碼對應的頁面資料進行爬取 實現方式有兩種 1 將所有頁面的url新增到start urls列表 不推薦 2 自行手動進行請求傳送 推薦 yield scrapy.request url,callback callback專門用做於資料解析 下面我們介紹第二種...
使用Scrapy框架爬取鏈家資料
coding utf 8 import scrapy from pachong6.items import pachong6item class lianjiaspider scrapy.spider name lianjia allowed domains m.lianjia.com start ...
scrapy爬取資料並儲存到文字
1.scrapy專案結構如下 2.開啟spidler目錄下的duba.py檔案,如下 這個是根據豆瓣一部分頁面獲取的熱門話題內容,有6條資料 coding utf 8 import scrapy from scrapydemo.items import scrapydemoitem from lxm...