# 定義乙個函式獲取貓眼電影的資料
import requests
def main():
url = url = ''
html = requests.get(url).text
print(html)
if __name__ == '__main__':
main()
# 利用正則匹配,獲得我們想要的資訊
"""< dd >
< i class ="board-index board-index-10">10
< a href = "/films/2760" title = "魂斷藍橋" class ="image-link" data-act="boarditem-click"
data-val="" >
< img src = "" alt = "" class ="poster-default" / >
< img data - src = ""
alt = "魂斷藍橋" class ="board-img" / >< / a >
< div class ="board-item-main" >
< div class ="board-item-content" >
< div class ="movie-item-info" >
< p class ="name" > < a href="/films/2760" title="魂斷藍橋"
data-act="boarditem-click" data-val="" > 魂斷藍橋 < / a > < / p >
< p class ="star" >主演:費雯·麗, 羅伯特·泰勒, 露塞爾·沃特森< / p >
< div class ="movie-item-number score-num" >
< p class ="score" > < i class ="integer" > 9. < / i > < i class ="fraction" > 2 < / i > < / p >
< / div >< / div >< / div >
< / dd >
"""
import re
reg = r'.*?>(.*?).*?data-src="(.*?)".*?title="(.*?)".*?主演:(.*?)
.*?' \
reg = re.compile(reg, re.s)
items = re.findall(reg, html)
print(items)
# 迴圈遍歷列表並且把列表轉換為字典
for item in items:
index = item[0]
image = item[1]
title = item[2]
actor = item[3]
time = item[4]
score = item[5] + item[6]
dict1 =
print(dict1)
# 把獲得的資料儲存在檔案中
import json
with open('result.txt', 'a', encoding='utf-8') as f:
f.write(json.dumps(dict1, ensure_ascii=false))
# 利用迴圈獲取貓眼電影所有資料
def main():
for i in range(10):
url = '' + str(i*10)
# 最後**整理如下
import json
import re
from time import sleep
import requests
def main():
for i in range(10):
url = '' + str(i * 10)
html = requests.get(url).text
reg = r'.*?>(.*?).*?data-src="(.*?)".*?title="(.*?)"'
r'(.*?).*?fraction.*?>(.*?).*?'
reg = re.compile(reg, re.s)
items = re.findall(reg, html)
for item in items:
# print(item)
index = item[0]
image = item[1]
title = item[2]
actor = item[3]
time = item[4]
score = item[5] + item[6]
dict1 =
sleep(1)
with open('result.txt', 'a', encoding='utf-8') as f:
f.write(json.dumps(dict1, ensure_ascii=false))
if __name__ == '__main__':
main()
python爬蟲基礎爬取貓眼電影
import requests from requests.exceptions import requestexception from sqlalchemy import create engine from lxml import etree import pandas as pd impor...
Python爬取貓眼電影
不多說,直接上 import requests import re import random import pymysql import time 連線資料庫 db pymysql.connect host localhost port 3306,user root passwd a db pyt...
爬取貓眼電影
有乙份工作需要我列出兩個電影院的每天電影排期資訊,我不想每次都要去貓眼上覆制貼上。所以做了個爬蟲 功能 能夠知道每天的電影排期資訊 使用限制 只能在當天使用,不能在前一晚上使用,後面我會再考慮修改 coding utf 8 import requests import re from bs4 imp...