利用python爬取微博熱搜並進行資料分析

爬取微博熱搜

import schedule
import pandas as pd
from datetime import datetime
import requests
from bs4 import beautifulsoup
url =
""get_info_dict =
count =
0def
main()
:global url, get_info_dict, count
get_info_list =
print
("正在爬取資料~~~"
)    html = requests.get(url)
.text
soup = beautifulsoup(html,
'lxml'
)for tr in soup.find_all(name=
'tr'
, class_='')
:        get_info = get_info_dict.copy(
)        get_info[
'title'
]= tr.find(class_=
'td-02'
).find(name=
'a')
.text
try:            get_info[
'num']=
eval
(tr.find(class_=
'td-02'
).find(name=
'span'
).text)
except attributeerror:
get_info[
'num']=
none
get_info[
'time'
]= datetime.now(
).strftime(
"%y/%m/%d %h:%m"
)    get_info_list = get_info_list[1:
16]df = pd.dataframe(get_info_list)
if count ==0:
df.to_csv(
'datas.csv'
, mode=
'a+'
, index=
false
, encoding=
'gbk'
)        count +=
1else
:        df.to_csv(
'datas.csv'
, mode=
'a+'
, index=
false
, header=
false
, encoding=
'gbk'
)# 定時爬蟲
schedule.every(1)
.minutes.do(main)
while
true
:    schedule.run_pending(
)

pyecharts資料分析

import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import bar, timeline, grid
from pyecharts.
globals
import themetype, currentconfig
df = pd.read_csv(
'datas.csv'
, encoding=
'gbk'
)print
(df)
t = timeline(init_opts=opts.initopts(theme=themetype.macarons)
)# 定製主題
for i in
range
(int
(df.shape[0]
/15))
:    bar =
(        bar(
).add_xaxis(
list
(df[
'title'
][i*
15: i*15+
15][:
:-1]
))# x軸資料
.add_yaxis(
'num'
,list
(df[
'num'
][i*
15: i*15+
15][:
:-1]
))# y軸資料
.reversal_axis(
)# 翻轉
.set_global_opts(
# 全域性配置項
title_opts=opts.titleopts(
# 標題配置項
title=f""
,                pos_right=
"5%"
, pos_bottom=
"15%"
,                title_textstyle_opts=opts.textstyleopts(
font_family=
'kaiti'
, font_size=
24, color=
'#ff1493'))
,            xaxis_opts=opts.axisopts(
# x軸配置項
splitline_opts=opts.splitlineopts(is_show=
true),
),yaxis_opts=opts.axisopts(
# y軸配置項
splitline_opts=opts.splitlineopts(is_show=
true),
axislabel_opts=opts.labelopts(color=
'#dc143c'))
).set_series_opts(
# 系列配置項
label_opts=opts.labelopts(
# 標籤配置
position=
"right"
, color=
'#9400d3'))
)    grid =
(        grid(
).add(bar, grid_opts=opts.gridopts(pos_left=
"24%"))
)    t.add(grid,"")
t.add_schema(
play_interval=
1000
,# 輪播速度
is_timeline_show=
false
,# 是否顯示 timeline 元件
is_auto_play=
true,)
t.render(
'時間輪播圖.html'
)

python爬取微博熱搜

1 import requests 2importre3 import bs44 importos5 import datetime 67 url 8 headers 9try 10 r requests.get url,headers headers 11except 12 print 出現了不可...

爬取新浪微博熱搜榜

一主題式網路爬蟲設計方案 15分 3.主題式網路爬蟲設計方案概述包括實現思路與技術難點本案例使用requests庫獲取網頁資料，使用beautifulsoup庫解析頁面內容，再使用pandas庫把爬取的資料輸出，並對資料視覺化，最後進行小結技術難點爬取有用的資料，將有礙分析的資料剔除，回歸...

Python爬取微博熱搜榜，將資料存入資料庫

這裡是用來爬取微博熱搜榜的資料，網頁位址為開啟網頁並按下f12進入開發者模式，找到.裡的內容，如圖所示 href後面的內容即為對應的中文編碼的原始碼，其中很多25應該是干擾字元，後面刪掉解析就可以發現是微博熱搜的標題。我數了下，一共有27個，剛好第乙個標題為比伯願為賽琳娜捐腎九個字，乙個漢字佔三...

利用python爬取微博熱搜並進行資料分析

python爬取微博熱搜

爬取新浪微博熱搜榜

Python爬取微博熱搜榜，將資料存入資料庫

相關推薦