#
#更改後面網址以對應欲爬取內容
import requests
from lxml import etree
class spider(object):
def __init__(self):
def getreponce(self,url):
reponce = requests.get(url, headers=self.headers)
return reponce
def main(self,reponce,id):
try:
list =
htmls = etree.html(reponce.text)
datas = htmls.xpath("//a[@class='album-title line-1 lg bold _kc']")
for data in datas:
tits = data.xpath('./@title')
href = data.xpath('./@href')
id+=1
print(list)
idnum=int(input('輸入欲抓取該音樂整套輯的id:'))
idnum=list[idnum-1].split('/')
return idnum[-2]
except exception as f:
print('拼音內容可能有誤,或無法對應該拚音,請重新執行'+f)
return ""
def getjson(self,reponce):
import re
json=reponce.text
trackname = re.findall(r'"trackname":(.*?),',json)
src = re.findall(r'"src":(.*?),',json)
# print(src)
print(trackname)
return src ,trackname
def download(self,src ,trackname):
import os
#在當前路徑中生成資料夾(放置音樂用)
paths = os.getcwd()
paths = os.path.join(paths, '自定義')
print(paths)
#判斷資料夾是否存在
being = os.path.exists(paths)
if being == false:
print('建立資料夾')
os.mkdir(paths)
for m4a,name in zip(src,trackname):
music = requests.get(m4a.strip('"'), headers=self.headers)
name =name.strip('"')
with open(paths+"\\"+name+'.m4a','wb')as f:
f.write(music.content)
print(name,'下載完畢')
print('done')
stop=input('等待')
def pini(music):
from pypinyin import lazy_pinyin
music = lazy_pinyin(music)
if len(music):
music = ''.join(music) + "/"
else:
music = ""
return music
if __name__ == '__main__':
idnum = 0
music=input('請輸入音樂爬取的音樂總類,不輸入表示音樂全部種類:')
pini=pini(music)
url = "{}".format(pini)
spider=spider()
reponce=spider.getreponce(url)
idnumber_url="".format(spider.main(reponce,idnum))
src,trackname=spider.getjson(spider.getreponce(idnumber_url))
spider.download(src,trackname)
喜馬拉雅 音訊爬取
import requests import parsel headers 介面的 url 每一集 都有相對應的 id 的引數值 def down mp4 player,mp4 name resp requests.get player,headers headers path r c users ...
Python爬蟲 喜馬拉雅音訊爬取
爬取喜馬拉雅三國中的前十章音訊 匯入requests模組 import requests 匯入正規表示式 import re 解決反爬問題,匯入ua header 網頁源 中獲取的前十章id sound ids 64686514,64689648,64695831,64695832,3218935,...
Python爬蟲實戰案例之爬取喜馬拉雅音訊資料詳解
前言 喜馬拉雅是專業的音訊分享平台,匯集了有聲 有聲讀物,有聲書,fm電台,兒童睡前故事,相聲小品,鬼故事等數億條音訊,我最喜歡聽民間故事和德雲社相聲集,你呢?今天帶大家爬取喜馬拉雅音訊資料,一起期待吧!這個案例的 位址在這裡 專案目標 爬取喜馬拉雅音訊資料 受害者位址 本文知識點 1 系統分析網頁...