.
用fontcreator開啟base.woff.檢視對應字型關係
初始化時將對應關係寫入字典中。
1#!/usr/bin/env python2#
coding:utf-83#
__author__ = "南樓"45
6import
requests
7importre8
importos9
10from fonttools.ttlib import
ttfont
1112#13
class
maoyan(object):
1415
def__init__
(self):
16 self.url = '
'17 self.headers =
20 self.base_num = {} #
編號—數字
21 self.base_obj = {} #
編號—物件22#
23 self.base_font_file = ttfont('
./fonts/base.woff')
24#25 self.base_num["
unif3ba
"] = "0"
26 self.base_num["
unif2a9
"] = "1"
27 self.base_num["
unie6a5
"] = "2"
28 self.base_num["
unif680
"] = "3"
29 self.base_num["
unie69c
"] = "4"
30 self.base_num["
unie710
"] = "5"
31 self.base_num["
unie07d
"] = "6"
32 self.base_num["
unie5a7
"] = "7"
33 self.base_num["
uniec7a
"] = "8"
34 self.base_num["
unie2a3
"] = "9"
3536
for key in
self.base_num:
37 self.base_obj[key] =self.base_font_file['
glyf
'][key]
3839
defbaseobj(self):
40for key in
self.base_num:
4142 self.base_obj[key] =self.base_font_file['
glyf
'][key] #
獲得woff內編號對應的字型物件
43return
self.base_obj
4445
#傳送請求獲得響應
46def
get_html(self, url):
47 response = requests.get(url, headers=self.headers)
48return
response.content
4950
defcreate_font(self, re_font_file):51#
52 file_list = os.listdir('
./fonts')
53#54if re_font_file not
infile_list:
5556
print('
', re_font_file)
57 url = '
' +re_font_file
58 new_file =self.get_html(url)
59 with open('
./fonts/
' + re_font_file, 'wb'
) as f:
60f.write(new_file)
6162
#開啟字型檔案,建立 self.font_file屬性
63 self.font_file = ttfont('
./fonts/
' +re_font_file)
6465
defget_num_from_font_file(self, re_star):
6667 newstar = re_star.upper().replace("
", "
uni"
)68 realnum = newstar.replace("
;", ""
)69 numlist = realnum.split("."
)70#gly_list = self.font_file.getglyphorder() #uni列表['glyph00000', 'x', 'unif680', 'unie2a3', 'unie710', 'unie69c', 'uniec7a', 'unif2a9', 'unie5a7', 'unie07d', 'unie6a5', 'unif3ba']
71 star_rating =
72for hax_num in
numlist:
73 font_file_num = self.font_file['
glyf
'][hax_num]
74for key in
self.baseobj():
75if font_file_num ==self.base_obj[key]:
7677
#星級評分待優化,暫不支援10.0,
78 star_rating = star_rating[0]+"
."+star_rating[1]
79return
star_rating
8081
defstart_crawl(self):
82 html = self.get_html(self.url).decode('
utf-8')
8384
#正則匹配字型檔案
85 re_font_file = re.findall(r'
vfile\.meituan\.net\/colorstone\/(\w+\.woff)
', html)[0]
86self.create_font(re_font_file)87#
正則匹配星級評分
88 re_star_rating = re.findall(r'
\s+(.*?)\s+
', html)[0]
89 star_rating =self.get_num_from_font_file(re_star_rating)
90print("
星級評分:
", star_rating)
9192
93if
__name__ == '
__main__':
9495 m =maoyan()
96 m.start_crawl()
Python爬取貓眼電影
不多說,直接上 import requests import re import random import pymysql import time 連線資料庫 db pymysql.connect host localhost port 3306,user root passwd a db pyt...
爬取貓眼電影
有乙份工作需要我列出兩個電影院的每天電影排期資訊,我不想每次都要去貓眼上覆制貼上。所以做了個爬蟲 功能 能夠知道每天的電影排期資訊 使用限制 只能在當天使用,不能在前一晚上使用,後面我會再考慮修改 coding utf 8 import requests import re from bs4 imp...
python爬取貓眼電影排行
完整的 如下在這裡 閒著沒事,把解析html中的正則方法改用了xpath與beautifulsoup,只能說各有各的優點吧。正則的話,提取資訊可以連貫,一次性提取出所有需要的資訊,當然前提是你的正則式子沒有寫錯,所以說正則寫起來相比xpath與beautifulsoup來說要複雜一下,提取出錯後,除...