這段時間公司人事部門需要爬取一些崗位資訊下來,利用空閒時間寫了個指令碼下來,**如下。
#!usr/bin/python
from bs4 import beautifulsoup
import csv
import time
import random
import requests
import sys
import operator
from urllib.parse import quote
class
main
: def index
(self,i)
:# 崗位
work_name=
"行政"
link=
""+quote
(work_name)
+"&curpage="
+str
(i);
user_agent_list =[,
,]headers=
try:
response=requests.
get(link,headers=headers)
response.encoding =
'utf-8'
html = response.text
soup=
beautifulsoup
(html,
'html.parser'
) sojob_result=soup.
find
("div"
,class_=
'sojob-result'
) list_r=sojob_result.
find_all
("li"
) except baseexception:
print
("抱歉,請您手動開啟獵聘**完成驗證操作"
) sys.
exit()
if(len(list_r)==0
):print
("恭喜你,本次爬取資料任務已完成啦"
) sys.
exit()
# 崗位
in_data=
out_data =
with open
("data/liepin_data.txt"
,"r+"
, encoding=
"utf-8"
)as f:
f.close()
for x in range(0
,len
(list_r)):
address =
'' work=list_r[x]
.find
("a").
get_text()
.strip()
edu=list_r[x]
.find
("span"
,class_=
'edu').
get_text()
.strip()
year=list_r[x]
.find
("span"
,class_=
'edu').
find_next_sibling
("span").
get_text()
.strip()
money=list_r[x]
.find
("span"
,class_=
'text-warning').
get_text()
.strip()
company=list_r[x]
.find
("p"
,class_=
'company-name').
get_text()
.strip()
data=
[work,edu,money,company,year,address]
in_data=data
with open
("data/liepin_data.txt"
,"r+"
, encoding=
"utf-8"
)as f:
out_data=f.
read()
f.close()
in_data=
str(in_data);if
(operator.
eq(in_data,out_data)):
with open
("data/liepin_data.txt"
,"w+"
, encoding=
"utf-8"
)as f:
f.write
(str([
])) f.
close()
print
("恭喜你,本次爬取資料任務已完成啦"
) sys.
exit()
self.
write
(work_name, data)
print
(data)
with open
("data/liepin_data.txt"
,"w+"
, encoding=
"utf-8"
)as f:
f.write
(str
(in_data)
) f.
close()
def write
(self,work_name,data)
: with open
('data/lieping_'
+time.
strftime
("%y-%m-%d"
, time.
localtime()
)+'_'+work_name+
'.csv'
,'a+'
, encoding=
'utf-8'
, newline='')
as f:
writer = csv.
writer
(f) writer.
writerow
(data)
f.close()
print
('寫入成功'
)with open
("data/liepin_data.txt"
,"w+"
, encoding=
"utf-8"
)as f:
f.write
(str([
])) f.
close()
for i in range(0
,200):
main()
.index
(i);
sys.stdout.
flush()
time.
sleep
(random.
randint(7
,16))
python爬取前程無憂招聘崗位資訊
首先使用requests獲取前程無憂一級網頁 tatal re.compile 共 頁 findall rst 正則獲取總頁數 tatal int tatal 0 獲取二級網頁的url 所有崗位的url 儲存起來 with open sh1.txt a encoding utf 8 as f for...
python爬取智聯招聘資訊
importrandom importre fromtimeimportsleep importrequests fromtqdmimporttqdm importuser agents importcsv defget page city,keyword,page 構造請求位址 paras 完整網...
python爬取智聯招聘資訊
分享今天寫的乙個爬取智聯招聘資訊的爬蟲,使用了requests和re模組,沒有寫注釋,但是 都比較簡單,不是太難,這是爬取的資訊 coding utf 8 import requests import re from itertools import izip from json import du...