然後修改items.py裡面的內容:import scrapy
import json
import codecs
import re
class commentspider(scrapy.spider):
name = "comment"
start_urls = [
]def parse(self, response):
# filename = "firstpagecomment.json"
# data = json.loads(response.text)
# print(data['data']['data'][0]['text'])
# data = json.dumps(data, ensure_ascii = false)
# # print(data.data[0].text)
# # print(data)
# with codecs.open(filename, 'w+', encoding='utf-8') as f:
# f.write(data)
""":param response: response物件
"""result = json.loads(response.text)
if result.get('ok') and result.get('data').get('data') and len(result.get('data').get('data')):
comment_list = result.get('data').get('data')
for comment in comment_list:
if (comment.get('text')):
comment_text = re.sub(r'', '', comment.get('text'))
# print("1",comment_text)
body =
yield scrapy.http.request('',(這是獲取的accessed_token加在後面)
callback = self.requestcallback,
body = json.dumps(body))
def requestcallback(self, response):
result = json.loads(response.text)
if result.get('text') and result.get('items'):
# print(result.get('text'), '積極情感' if result.get('items')[0].get('positive_prob') > result.get('items')[0].get('negative_prob') else '消極情感')
print('積極情感' if result.get('items')[0].get('positive_prob') > result.get('items')[0].get('negative_prob') else '消極情感')
# else:
# print('error', result)
四、執行在pycharm終端輸入scrapy crawl commentimport scrapy
class weibospideritem(scrapy.item):
# define the fields for your item here like:
weibo_id = scrapy.field()
text = scrapy.field()
中途用postman 除錯的截圖如下:
