對base64加密的頁面內容進行解密
"""# 1、提取出字型檔案內容
base64_str = re.search(r';base64,(.*?)\'\)'
, page_content)
.group(1)
font_content = base64.b64decode(base64_str)
font = ttfont(io.bytesio(font_content)
)# 2、獲取文字對照的字典
keys = font.getbestcmap(
)#print(keys.items())
#print(hex(38006))
keys =
#print(keys)
# 3、替換文字內容
for k, v in keys.items():
page_content = page_content.replace(f''
, v)
return page_content
defget_xpath
(url)
:"""
模仿瀏覽器的頭,並用etree解析加密內容
"""headers =
response = requests.get(url, headers=headers)
# 解密頁面加密內容
page_content = base64_decode(response.text)
return etree.html(page_content)
defparse_div_list
(div_list)
:'''
解析一頁資料
:param div_list: 每條住房資訊的element物件的列表
'''# for div in div_list:
# title = div.xpath('.//div[@class="zu-info"]/h3/a/b/text()')[0]
# numbers = div.xpath('.//p[@class="details-item tag"]/b/text()')
# # print(title)
# # print(numbers)
# house_scale = f'室廳'
# house_square = f'平公尺'
# print(house_scale,house_square)
# price = div.xpath('.//div[@class="zu-side"]/p/strong/b/text()')[0]
# # print(price)
# house_floor = div.xpath('.//p[@class="details-item tag"]/text()')[-3]
# # print(house_floor)
# item = {}
# item['house_title'] = title
# item['house_scale'] = house_scale
# item['house_square'] = house_square
# item['house_price'] = price
# item['house_floor'] = house_floor.strip()
#print(item)
defmain()
: base_url =
'fangyuan/p%s/'
# 2 分頁:
i =1while
true
: tree = get_xpath(base_url % i)
div_list = tree.xpath(
'//div[@class="zu-itemmod"]'
)#print(div_list)
ifnot div_list:
break
parse_div_list(div_list)
i +=
1 time.sleep(
110)
if __name__ ==
'__main__'
: main(
)
最帥爬蟲 破解有道翻譯CSS加密檔案
基礎路徑 基於xhr檔案判斷這是ajax請求方式 翻譯單詞 param kw 翻譯的單詞內容 return str base url translate o?smartresult dict smartresult rule ts str int time.time 1000 salt ts str...
linux下壓縮解壓 最簡最帥教程
tar 解壓 tar xvf filename.tar 壓縮 tar cvf filename.tar dirname gz解壓 gunzip filename.gz 壓縮 gzip filename tar.gz 解壓 tar zxvf filename.tar.gz 壓縮 tar zcvf fi...
爬蟲 動態字型 破解
爬取的woff字型動態變化,且字型座標會隨機偏移一點點 如下圖1.woff和2.woff 無法形成統一的 unicode 字型座標矩陣 對映,只能求兩矩陣的相識度,這裡使用距離相識度 參考機器學習knn分類演算法概念 放大後兩者是有區別的 解決辦法 以2.woff為標準,人為設定基礎矩陣字型對映,然...