# 使用者**
# 大綱正則
patternoutline = ("", ".*?
")# 章節頁面正則
patternbodyset = ("", '(.*?)
')tklist =
# 2064
# 獲得大綱鏈結
def getoutline(url, patternoutline):
res = requests.get(url, headers=random.sample(headers, 1)[0])
novelname = re.findall(patternoutline[0], res.text, re.s | re.m | re.i)[0]
print(len([url + link.split("/")[-1] for link in re.findall(patternoutline[1], res.text, re.s|re.m|re.i)]))
novellink = iter([url + link.split("/")[-1] for link in re.findall(patternoutline[1], res.text, re.s|re.m|re.i)])
return novelname, novellink
# 初始化資料庫
def initdb():
import pymongo
client = pymongo.mongoclient("localhost:27017")
cursor = client['biquge']
return cursor
# 章節處理
def chapterdeal(patternbodyset, html, novelname, cursor, url):
start = time.time()
chapter = re.findall(patternbodyset[0], html, re.s|re.m|re.i)[0]
body = re.findall(patternbodyset[1], html, re.m)[0]
cursor[novelname].insert_one()
print(chapter + ' 存入成功!', time.time() - start)
print('獲取res用時:', time.time() - start)
tk = threading.thread(target=chapterdeal, args=(patternbodyset, await resp.text(encoding="utf-8"), novlename, cursor, url))
tk.start()
except:
pass
# 主程式執行
def run(urls, patternbodyset, novelname, cursor):
loop = asyncio.get_event_loop()
tasks = [chapterdownload(url, patternbodyset, novelname, cursor) for url in urls]
loop.run_until_complete(asyncio.wait(tasks))
for tk in tklist:
tk.join()
print("執行結束!")
if __name__ == '__main__':
# novelname, novellink = getoutline(url, patternoutline)
import time
start = time.time()
cursor = initdb()
novelname, novellink = getoutline(url, patternoutline)
print("執行outline,總用時:{}".format(start - time.time()))
run(novellink, patternbodyset, novelname, cursor)
print("執行完畢,總用時:{}".format(start - time.time()))執行結果:
執行outline,總用時:-1.6277616024017334
獲取res用時: 1.2178490161895752
...獲取res用時: 15.794588804244995
第1257章 神話大聖決戰 存入成功! 0.001337289810180664
獲取res用時: 15.234058618545532
第四百三十三章 大坑貨 存入成功! 0.0011713504791259766
執行結束!
執行完畢,總用時:-18.238994121551514
說明:
首先,說明,本人電腦i7處理器,也許是因為代數的關係,效能貌似不行。
在下面**:
print('獲取res用時:', time.time() - start)
tk = threading.thread(target=chapterdeal, args=(patternbodyset, await resp.text(encoding="utf-8"), novlename, cursor, url))
tk.start()
ThreadPoolExecutor 多執行緒
from concurrent.futures import threadpoolexecutor,wait,all completed from queue import queue myqueue queue 佇列,用於儲存函式執行結果。多執行緒的問題之一 如何儲存函式執行的結果。def thr...
c 多線例項
using system using system.threading using system.text namespace controlthread 第二個執行緒正在執行,請輸入 s uspend,r esume,i nterrupt,or e xit.datetime.now.tostrin...
CLLocationManager在多執行緒下使用
似乎定位的返回 呼叫 只能有主線程來呼叫,並且這個物件還必須是在主線程建立的。做過以下實驗 1.子執行緒中 self.locationmanager cllocationmanager alloc init autorelease locationmanager.delegate self loca...