返回載入出來的資料
def gethtmltext(url):
return ""
解析列表
def fillunivlist(ulist, html):
pass
列印資料
def printunivlist(ulist, num):
pass
進行呼叫函式,返回結果
def main():
unifo =
url = ''
# 載入html
html = gethtmltext(url)
# 解析html,返回列表
fillunivlist(unifo, html)
printunivlist(unifo, 20)
main()
gethtmltext函式**如下:
def gethtmltext(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
return r.text
except:
return ""
fillunivlist**如下, 過濾不是tr的標籤,找到tr下的td下的三行
def fillunivlist(ulist, html):
soup = beautifulsoup(html, "html.parser")
for tr in soup.find('tbody').children:
# 過濾
if isinstance(tr, bs4.element.tag):
tds = tr('td')
printunivlist**如下,遍歷前二十列資料,漢字是另一種編碼
def printunivlist(ulist, num):
# 位置,補數字,約束幾個字元
tplt = "\t^10}\t"
print(tplt.format("排名", "學校", "總分", chr(12288)))
for i in range(num):
u = ulist[i]
print(tplt.format(u[0], u[1], u[2], chr(12288)))
整體**如下:
import bs4
import requests
from bs4 import beautifulsoup
def gethtmltext(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
return r.text
except:
return ""
def fillunivlist(ulist, html):
soup = beautifulsoup(html, "html.parser")
for tr in soup.find('tbody').children:
# 過濾
if isinstance(tr, bs4.element.tag):
tds = tr('td')
def printunivlist(ulist, num):
# 位置,補數字,約束幾個字元
tplt = "\t^10}\t"
print(tplt.format("排名", "學校", "總分", chr(12288)))
for i in range(num):
u = ulist[i]
print(tplt.format(u[0], u[1], u[2], chr(12288)))
def main():
unifo =
url = ''
# 載入html
html = gethtmltext(url)
# 解析html,返回列表
fillunivlist(unifo, html)
printunivlist(unifo, 20)
main()
大學排名資訊爬取
import requests from bs4 import beautifulsoup import bs4 defgethtmltext url try r requests.get url r.raise for status return r.text except return def ...
python爬取最好大學排名
coding gbk import requests from bs4 import beautifulsoup import bs4 def gethtmltext url 將url資訊爬取,並將html頁面返回給其他程式 try r requests.get url,timeout 30 30m...
python爬取大學排名,電影的排名與評分
這裡爬取的都是靜態的資料,也沒有設計到跨頁爬取,也沒有用到正規表示式,這就是乙個簡單的爬取 爬取最好大學網的排名 coding gbk import requests from bs4 import beautifulsoup import bs4 defgethtmltext url try r ...