爬取前程無憂上的招聘資訊

2021-10-24 11:49:48 字數 3553 閱讀 3143

這段時間公司人事部門需要爬取一些崗位資訊下來,利用空閒時間寫了個指令碼下來,**如下。

#!usr/bin/python

from bs4 import beautifulsoup

import csv

import time

import random

import requests

import sys

import operator

from urllib.parse import quote

class

main

: def index

(self,i)

:# 崗位

work_name=

"行政"

link=

""+quote

(work_name)

+"&curpage="

+str

(i);

user_agent_list =[,

,]headers=

try:

response=requests.

get(link,headers=headers)

response.encoding =

'utf-8'

html = response.text

soup=

beautifulsoup

(html,

'html.parser'

) sojob_result=soup.

find

("div"

,class_=

'sojob-result'

) list_r=sojob_result.

find_all

("li"

) except baseexception:

print

("抱歉,請您手動開啟獵聘**完成驗證操作"

) sys.

exit()

if(len(list_r)==0

):print

("恭喜你,本次爬取資料任務已完成啦"

) sys.

exit()

# 崗位

in_data=

out_data =

with open

("data/liepin_data.txt"

,"r+"

, encoding=

"utf-8"

)as f:

f.close()

for x in range(0

,len

(list_r)):

address =

'' work=list_r[x]

.find

("a").

get_text()

.strip()

edu=list_r[x]

.find

("span"

,class_=

'edu').

get_text()

.strip()

year=list_r[x]

.find

("span"

,class_=

'edu').

find_next_sibling

("span").

get_text()

.strip()

money=list_r[x]

.find

("span"

,class_=

'text-warning').

get_text()

.strip()

company=list_r[x]

.find

("p"

,class_=

'company-name').

get_text()

.strip()

data=

[work,edu,money,company,year,address]

in_data=data

with open

("data/liepin_data.txt"

,"r+"

, encoding=

"utf-8"

)as f:

out_data=f.

read()

f.close()

in_data=

str(in_data);if

(operator.

eq(in_data,out_data)):

with open

("data/liepin_data.txt"

,"w+"

, encoding=

"utf-8"

)as f:

f.write

(str([

])) f.

close()

print

("恭喜你,本次爬取資料任務已完成啦"

) sys.

exit()

self.

write

(work_name, data)

print

(data)

with open

("data/liepin_data.txt"

,"w+"

, encoding=

"utf-8"

)as f:

f.write

(str

(in_data)

) f.

close()

def write

(self,work_name,data)

: with open

('data/lieping_'

+time.

strftime

("%y-%m-%d"

, time.

localtime()

)+'_'+work_name+

'.csv'

,'a+'

, encoding=

'utf-8'

, newline='')

as f:

writer = csv.

writer

(f) writer.

writerow

(data)

f.close()

print

('寫入成功'

)with open

("data/liepin_data.txt"

,"w+"

, encoding=

"utf-8"

)as f:

f.write

(str([

])) f.

close()

for i in range(0

,200):

main()

.index

(i);

sys.stdout.

flush()

time.

sleep

(random.

randint(7

,16))

python爬取前程無憂招聘崗位資訊

首先使用requests獲取前程無憂一級網頁 tatal re.compile 共 頁 findall rst 正則獲取總頁數 tatal int tatal 0 獲取二級網頁的url 所有崗位的url 儲存起來 with open sh1.txt a encoding utf 8 as f for...

python爬取智聯招聘資訊

importrandom importre fromtimeimportsleep importrequests fromtqdmimporttqdm importuser agents importcsv defget page city,keyword,page 構造請求位址 paras 完整網...

python爬取智聯招聘資訊

分享今天寫的乙個爬取智聯招聘資訊的爬蟲,使用了requests和re模組,沒有寫注釋,但是 都比較簡單,不是太難,這是爬取的資訊 coding utf 8 import requests import re from itertools import izip from json import du...