'◎年 代'):
date = parse_info(info,
'◎年 代'
) movie[
'date'
]= date
elif info.startswith(
'◎產 地'):
country = parse_info(info,
'◎產 地'
) movie[
'country'
]= country
elif info.startswith(
'◎類 別'):
categories = parse_info(info,
'◎類 別'
) movie[
'categories'
]= categories
elif info.startswith(
'◎豆瓣評分'):
score = parse_info(info,
'◎豆瓣評分'
) movie[
'score'
]= score
elif info.startswith(
'◎片 長'):
duration = parse_info(info,
'◎片 長'
) movie[
'duration'
]= duration
elif info.startswith(
'◎主 演'):
actor = parse_info(info,
'◎主 演'
) actors =
for i in
range
(index +1,
len(infos)-1
):if infos[i]
.startswith(
'◎標 籤'):
break
) movie[
'actors'
]= actors
elif info.startswith(
'◎簡 介'):
爬取電影天堂
分析每頁的url,可以得到規律是 第t頁的url為 於是可以先分析第一頁,然後對頁數進迴圈,就可得到所有最新電影的詳細資訊。from lxml import etree headers defget movie url url resp requests.get url,headers header...
python xpath爬取電影天堂
import requests from lxml import html base domain url html gndy dyzz list 23 1.html headers defspider base url html gndy dyzz list 23 html movies for ...
Python爬取電影天堂資源
from urllib import request,parse from lxml import etree import requests,re url1 req1 request.request url1 response1 request.urlopen req1 html1 respons...