繼微信大更新之後,爬取微信文章 評論

2021-09-23 07:53:57 字數 4307 閱讀 6494

import json

import re

from datetime import datetime

import time

import os

from lxml import etree

from selenium import webdriver

from pyquery import pyquery as pq

i =0

import requests

class wxmps(object):

self.offset = _offset

self.pass_ticket = _pass_ticket # 票據(非固定)

self.account = _account

self.startime = start_time

self.headers =

def start(self):

# print(type(self))

offset = self.offset

while true:

api = '' \

'=&x5=1&f=json'.format(self.biz, offset, self.pass_ticket, self.msg_token)

print(api)

resp = requests.get(api, headers=self.headers).json()

print(resp)

ret, status = resp.get('ret'), resp.get('errmsg') # 狀態資訊

if ret == 0 or status == 'ok':

print('crawl article: ' + api)

time.sleep(2)

offset = resp['next_offset'] # 下一次請求偏移量

general_msg_list = resp['general_msg_list']

msg_list = json.loads(general_msg_list)['list']

# 獲取文章列表

for msg in msg_list:

# 本次推送的首條文章

# 本次推送的其餘文章

# 缺一不可

print('crawl article comments: ' + content_url)

comment_list =

'&pass_ticket=&wxtoken=777&devicetype=android-26&clientversion=26060739' \

try:

resp = requests.get(api, headers=self.headers).json()

ret, status = resp['base_resp']['ret'], resp['base_resp']['errmsg']

if ret == 0 or status == 'ok':

time.sleep(2)

elected_comment = resp['elected_comment']

for comment in elected_comment:

like_num = comment.get('like_num') # 點讚數

comment_dict = {}

comment_dict["comment_content"] = content

comment_dict["like_num"] = like_num

self.write_in(main_article, self.account, comment_list)

except exception as e:

print(e,'error3')

def write_in(self, main_url, account, comment_list):

print("------json")

data1 = {}

# print(type(main_url))

data1['title'] = main_url.get('title')

data1['url'] = main_url.get('content_url')

data1['account'] = account

# print(data1)

content_url = main_url.get('content_url')

content = self.get_content_requests(content_url)

# print("處理內容")

data2 = {}

data2['title']= main_url.get('title')

data2['digest']= main_url.get('digest')

data2['content']= content

data2['comments']= comment_list

# print(data2)

global i

i += 1

print(i)

#備份with open(r'{}.json'.format(account), 'a+', encoding='utf-8' ) as f:

json.dump(data1, f, ensure_ascii=false, indent=4)

f.write(',')

f.write('\n')

#文章with open(r'{}-articles.json'.format(account), 'a+', encoding='utf-8') as f:

json.dump(data2, f, ensure_ascii=false, indent=4)

f.write(',')

f.write('\n')

def get_content_requests(self,content_url):

response = requests.get(content_url).text

html = etree.html(response)

content = html.xpath('//*[@id="js_content"]//p')

content_string = ""

for temp in content:

if temp != none and temp != '\n':

# file.writelines(temp.xpath('string(.)') + '\n')

content_string = content_string + temp.xpath('string(.)') + '.'

return content_string

if __name__ == '__main__':

pass_ticket = '' #pass_ticket

cookie = '' # cookie

# global file_path##需修改

start_time = datetime.now()

繼微信之後 微博上線新表情裂開了

編程式設計客棧程客棧 ww月26日 訊息 今日,有程式設計客棧發現,就在微信上線裂開了表情之後,微博也上線了裂開了的表情qkmybinpu。據悉,我裂開了 一詞起源於cs go主播冬瓜強,後被pdd廣泛傳播。該詞的大概意思為我心態崩了或者我炸了。11月18日,微信宣布新增六個新表情包。它們分別是翻白...

爬取微信好友資訊

在寫程式之前,需要安裝pillow itchat pandas三個模組。此三個模組可以直接使用pip進行安裝,將生成資訊寫進csv檔案中,或將資訊生成字典 有 或將每條資訊寫進自己的資料庫都皆可用於分析好友男女比例與獲取好友備註 暱稱 所在城市以及個性簽名等資訊。import itchat from...

爬取天氣,並發給微信好友

爬蟲之前就接觸過,只是沒有深入。今天爬了個天氣資訊,然後發給好友或群聊.coding utf 8 created on sat dec 22 18 26 31 2018 author shaowu import re import requests from requests import exce...