import requests
from lxml import etree
import pymysql
headers =
# 連線mysql
db = pymysql.connect(host='127.0.0.1', user='root', password='123456', database='python')
# 建立游標
cursor = db.cursor()
for p in range(3):
url = ''
p = p * 10
print(p)
url = url + str(p)
print(url)
# 傳送請求
response = requests.get(url,headers=headers)
# 獲取html obj
html_obj = etree.html(response.text)
# 獲取每條資訊的href
html_list = html_obj.xpath('//table[@class="tablelist"]/tr/td[1]/a/@href')
print(html_list)
# 遍歷拿出每條href
for i in html_list:
# 資料拼接新url
url_b = ''+i
# print(url_b)
# 傳送請求
response = requests.get(url_b, headers=headers)
# 獲取html obj
html_obj_b = etree.html(response.content)
# 獲取標題
html_title = html_obj_b.xpath('//tr[@class="h"]/td')[0].text
print(html_title)
# 獲取工作地點
html_place = html_obj_b.xpath('//tr[@class="c bottomline"]/td[1]/text()')[0]
print(html_place)
# 獲取崗位職責
html_duty = html_obj_b.xpath('//ul[@class="squareli"]/li/text()')
html_duty = ','.join(html_duty)
print(html_duty)
# # 獲取工作要求
html_req = html_obj_b.xpath('//table[@class="tablelist textl"]/tr[4]/td/ul/li/text()')
html_req = ','.join(html_req)
print(html_req)
# 設定sql語句
data = (html_title, html_place, html_duty, html_req)
sql = '''insert into zhaopin values (null,'{}','{}','{}','{}')'''.format(html_title,html_place,html_duty,html_req)
# print(sql)
cursor.execute(sql)
db.commit()
cursor.close()
db.close()
Scrapy實踐 爬取騰訊社會招聘資訊(文字爬取)
注 爬取後的資訊將以json格式儲存,並將檔案命名為 recruit.json 可用notepad 開啟。coding utf 8 import scrapy class txhritem scrapy.item 職位名稱 positionname scrapy.field 職位類別 positio...
python 爬取智聯招聘
乙個爬取智聯的乙個小爬蟲 python版本 python3.7 依賴模組 selenium pyquery 廢話少說,上 from selenium import webdriver from selenium.webdriver.chrome.options import options from...
爬取智聯招聘資訊並儲存
coding utf 8 import urllib.request import os,time from bs4 import beautifulsoup 爬取智聯招聘 的招聘種類,然後再爬取某個種類的招聘資訊,並將招聘資訊以種類名稱命名儲存。hds position 存放職位名稱 href 存...