乙個比較簡單的爬蟲,用到bs4庫存和requests庫,話不多說,上**
import requests
import csv
import os
import random
from bs4 import beautifulsoup
actors=
title=
actorss=
sengm=
headers=
defmain()
: url=
'' r=requests.get(url,headers=headers)
r.raise_for_status(
) html=r.content
soup=beautifulsoup(html,
"html.parser"
) req=soup.find_all(
'h1'
)for title in req:
mingzi=title.text
req1=soup.find_all(class_=
'attrs'
)for i in req1:
names=i.select(
'a')
for name in names:
sk=name.text
for i in actors:
j=i+
',' req2=soup.find_all(class_=
"related-info"
)for k in req2:
contents=k.find_all(
'span'
)for content in contents:
new_con =content.text
a=''
.join(title)
b=''
.join(actorss)
c=''
.join(sengm)
stu1=
[a,b,c]
res=soup.find_all(class_=
"nbgnbg"
)for imgs in res:
img=imgs.find_all(
'img'
)for src in img:
imgurl=src.get(
'src'
)with
open
('douban.csv'
,'a'
,encoding =
'utf-8'
,newline='')
as f1:
csv_write=csv.writer(f1,dialect=
'excel'
) csv_write.writerow(
['電影名'
,'主演'
,'簡介'])
csv_write.writerow(stu1)
f1.close(
)print
("影片資訊已儲存"
) og=title[0]
[1:]
download(imgurl,og)
defdownload
(url,look)
: root=r'c:\users\mrq\desktop\資料\python\桌布爬蟲\\'
path=root+url.split(
'/')[-
1]print
(look)
try:
ifnot os.path.exists(root)
: os.mkdir(root)
ifnot os.path.exists(path)
: r=requests.get(url)
with
open
(path,
'wb'
)as f:
f.write(r.content)
f.close(
)print
("檔案儲存成功"
)else
:print
("檔案已存在"
)except
:print
("檔案儲存失敗"
)if __name__ ==
'__main__'
: main(
)
本爬蟲只適用於學習,非商業用途。不遵守這與本文作者無關。 爬蟲爬取豆瓣電影寫入csv
小白flag10 爬蟲爬取豆瓣電影寫入csv json化 csv檔案操作學習 import requests import json from requests.packages.urllib3.exceptions import insecurerequestwarning 解決警告 class ...
豆瓣電影資訊查詢
電影資訊 為豆瓣網,搜尋頁面位址為由於其頁面是通過js渲染的,直接通過requests請求是拿不到電影查詢結果的,所以我先用selenium獲取本頁面的查詢結果並提取出前10條電影名和對應的詳情頁面url,然後再用requests請求需要查詢的電影詳情頁面以獲取電影資訊。在運用selenium的時候...
爬取豆瓣網電影資訊
coding utf 8 import urllib2 import bs4 from bs4 import beautifulsoup 爬取豆瓣網電影簡介,包括電影名,導演,評分以及介紹等 class dbtop def init self self.usr agent mozilla 5.0 w...