import json
import re
from datetime import datetime
import time
import os
from lxml import etree
from selenium import webdriver
from pyquery import pyquery as pq
i =0
import requests
class wxmps(object):
self.offset = _offset
self.pass_ticket = _pass_ticket # 票據(非固定)
self.account = _account
self.startime = start_time
self.headers =
def start(self):
# print(type(self))
offset = self.offset
while true:
api = '' \
'=&x5=1&f=json'.format(self.biz, offset, self.pass_ticket, self.msg_token)
print(api)
resp = requests.get(api, headers=self.headers).json()
print(resp)
ret, status = resp.get('ret'), resp.get('errmsg') # 狀態資訊
if ret == 0 or status == 'ok':
print('crawl article: ' + api)
time.sleep(2)
offset = resp['next_offset'] # 下一次請求偏移量
general_msg_list = resp['general_msg_list']
msg_list = json.loads(general_msg_list)['list']
# 獲取文章列表
for msg in msg_list:
# 本次推送的首條文章
# 本次推送的其餘文章
# 缺一不可
print('crawl article comments: ' + content_url)
comment_list =
'&pass_ticket=&wxtoken=777&devicetype=android-26&clientversion=26060739' \
try:
resp = requests.get(api, headers=self.headers).json()
ret, status = resp['base_resp']['ret'], resp['base_resp']['errmsg']
if ret == 0 or status == 'ok':
time.sleep(2)
elected_comment = resp['elected_comment']
for comment in elected_comment:
like_num = comment.get('like_num') # 點讚數
comment_dict = {}
comment_dict["comment_content"] = content
comment_dict["like_num"] = like_num
self.write_in(main_article, self.account, comment_list)
except exception as e:
print(e,'error3')
def write_in(self, main_url, account, comment_list):
print("------json")
data1 = {}
# print(type(main_url))
data1['title'] = main_url.get('title')
data1['url'] = main_url.get('content_url')
data1['account'] = account
# print(data1)
content_url = main_url.get('content_url')
content = self.get_content_requests(content_url)
# print("處理內容")
data2 = {}
data2['title']= main_url.get('title')
data2['digest']= main_url.get('digest')
data2['content']= content
data2['comments']= comment_list
# print(data2)
global i
i += 1
print(i)
#備份with open(r'{}.json'.format(account), 'a+', encoding='utf-8' ) as f:
json.dump(data1, f, ensure_ascii=false, indent=4)
f.write(',')
f.write('\n')
#文章with open(r'{}-articles.json'.format(account), 'a+', encoding='utf-8') as f:
json.dump(data2, f, ensure_ascii=false, indent=4)
f.write(',')
f.write('\n')
def get_content_requests(self,content_url):
response = requests.get(content_url).text
html = etree.html(response)
content = html.xpath('//*[@id="js_content"]//p')
content_string = ""
for temp in content:
if temp != none and temp != '\n':
# file.writelines(temp.xpath('string(.)') + '\n')
content_string = content_string + temp.xpath('string(.)') + '.'
return content_string
if __name__ == '__main__':
pass_ticket = '' #pass_ticket
cookie = '' # cookie
# global file_path##需修改
start_time = datetime.now()
繼微信之後 微博上線新表情裂開了
編程式設計客棧程客棧 ww月26日 訊息 今日,有程式設計客棧發現,就在微信上線裂開了表情之後,微博也上線了裂開了的表情qkmybinpu。據悉,我裂開了 一詞起源於cs go主播冬瓜強,後被pdd廣泛傳播。該詞的大概意思為我心態崩了或者我炸了。11月18日,微信宣布新增六個新表情包。它們分別是翻白...
爬取微信好友資訊
在寫程式之前,需要安裝pillow itchat pandas三個模組。此三個模組可以直接使用pip進行安裝,將生成資訊寫進csv檔案中,或將資訊生成字典 有 或將每條資訊寫進自己的資料庫都皆可用於分析好友男女比例與獲取好友備註 暱稱 所在城市以及個性簽名等資訊。import itchat from...
爬取天氣,並發給微信好友
爬蟲之前就接觸過,只是沒有深入。今天爬了個天氣資訊,然後發給好友或群聊.coding utf 8 created on sat dec 22 18 26 31 2018 author shaowu import re import requests from requests import exce...