爬蟲爬取桌面桌布

import requests
from bs4 import beautifulsoup
import os
import time
import random
import useragent
interval = 3  # 爬取的間隔時間
firstdir = 'd:/netbian'  # 總路徑
classificationdict = {}  # 存放**分類子頁面的資訊
# 獲取頁面篩選後的內容列表
def screen(url, select):
# 隨機獲取乙個headers
headers = 
html = requests.get(url=url, headers=headers)
html.encoding = 'gbk'  # **的編碼
html = html.text
soup = beautifulsoup(html, 'lxml')
return soup.select(select)
# 獲取頁碼
while (os.path.exists(path)):  # 若檔名重複
path = path.split(".")[0] + str(random.randint(2, 17)) + '.' + path.split(".") [1]
with open(path, 'wb') as pic:
for chunk in response.iter_content(128):
pic.write(chunk)
# 定位到 1920 1080 解析度
def handleimgs(links, path):
for link in links:
href = link.get('href')
if (href == ''):  # 過濾廣告
continue
# 第一次跳轉
url = href
else:
url = index + href
select = 'div#main div.endpage div.pic div.pic-down a'
link = screen(url, select)
if (link == ):
print(url + ' 無此，爬取失敗')
continue
href = link [0].get('href')
# 第二次跳轉
url = index + href
# 獲取到了
select = 'div#main table a img'
link = screen(url, select)
if (link == ):
print(url + " 該需要登入才能爬取，爬取失敗")
continue
name = link [0].get('alt').replace('\t', '').replace('|', '').replace(':', '').replace('\\', '').replace('/',
'').replace(
'*', '').replace('?', '').replace('"', '').replace('<', '').replace('>', '')
src = link [0].get('src')
if (requests.get(src).status_code == 404):
print()
continue
print()
download(src, name, path)
time.sleep(interval)
def select_classification(choice):
print('---------------------------')
print('--------------' + choice + '-------------')
print('---------------------------')
secondurl = classificationdict [choice] ['url']
seconddir = classificationdict [choice] ['path']
if (not os.path.exists(seconddir)):
os.mkdir(seconddir)  # 建立分類目錄
select = '#main > div.page > span.slh'
pageindex = screenpage(secondurl, select)
lastpagenum = int(pageindex)  # 獲取最後一頁的頁碼
for i in range(0, lastpagenum):
if i == 0:
url = secondurl
else:
url = secondurl + 'index_%d.htm' % (i + 1)
print('--------------' + choice + ': ' + str(i + 1) + '-------------')
path = seconddir
select = 'div#main div.list ul li a'
links = screen(url, select)
handleimgs(links, path)
def ui():
print('--------------netbian-------------')
print('全部', end=' ')
for c in classificationdict.keys():
print(c, end=' ')
print()
choice = input('請輸入分類名：')
if (choice == '全部'):
for c in classificationdict.keys():
select_classification(c)
elif (choice not in classificationdict.keys()):
print("輸入錯誤，請重新輸入！")
print('----')
ui()
else:
select_classification(choice)
# 將分類子頁面資訊存放在字典中
def init_classification():
url = index
select = '#header > div.head > ul > li:nth-child(1) > div > a'
#header相當於 id = "header"
#div.head相當於 div class="head"
#li:nth-child(1) 相當於父元素下第乙個子分類
classifications = screen(url, select)
for c in classifications:
href = c.get('href')  # 獲取的是相對位址
text = c.string  # 獲取分類名
if (text == '4k桌布'):  # 4k桌布，因許可權問題無法爬取，直接跳過
continue
seconddir = firstdir + '/' + text  # 分類目錄
url = index + href  # 分類子頁面url
global classificationdict
classificationdict[text] = 
def main():
if not os.path.exists(firstdir):
os.mkdir(firstdir)  # 建立總目錄
init_classification()
ui()
if __name__ == '__main__':
main()

python爬取彼岸桌面桌布

1.目標站點分析進入經過f12分析,url都儲存在 2.選擇爬取工具,這裡網頁比較簡單,就採用requests庫和正則.import requests import osimport reimport time 主頁 main urls headers ifnot os.path.exists ...

Python 爬取高畫質桌面桌布

今天寫了乙個指令碼用來爬取zol桌面桌布的高畫質如下 coding utf 8 import urllib import re import time class spider baseurl pic index 0 itemgrouppic def init self,page count t...

框架 MFC 修改桌面桌布

功能使用 iactivedesktop 介面獲取設定和重新整理桌面背景桌布開發環境 vc vs2005 vs2008 vs2010 vs2012 vs2013 新建專案 mfc應用程式基於對話方塊 include include shlobj.h shlwapi.h 包含了對檔案判別的a...

爬蟲爬取桌面桌布

python爬取彼岸桌面桌布

Python 爬取高畫質桌面桌布

框架 MFC 修改桌面 桌布

相關推薦

框架 MFC 修改桌面桌布