獲取城市名稱存在乙個沒有解決的問題:用csv讀寫每個詞都被逗號分開,實在沒有辦法了,用的普通的讀寫檔案
第乙個確實可以,但是我不能很好的把第二個中的正規表示式用進去,所以只能這樣了
#獲取城市名稱1
import re
import csv
import time
import requests
from bs4 import beautifulsoup
from selenium import webdriver
from selenium.webdriver import actionchains
from selenium.webdriver.support.select import select
def get_url():
url = '' #
return url
def get_urltext(url):
try:
kv =
r = requests.get(url, headers=kv)
r.raise_for_status()
# print (r.text) #1
return r.text
except:
print('error 1')
return
def get_parsetext(parse_url):
try:
city_list =
soup = beautifulsoup(parse_url, 'html.parser')
for elem in soup.find_all('a'):
return city_list
except:
print("error 2")
return
url = get_url()
parse_text = get_urltext(url)
citylist = get_parsetext(parse_text)
with open('cityname.csv', 'w', newline='\n', encoding='utf8') as f:
f = csv.writer(f)
for row in citylist:
f.writerow(row)
#獲取城市名稱2
def get_urltext(url):
try:
kv =
r = requests.get(url, headers=kv)
r.raise_for_status()
# print (r.text) #1
return r.text
except:
print('error 1')
return
def get_lists(parse_url):
try:
province= ''
city=''
lists=
for num in range(0, 34):
if (num == 33):
text = re.findall('',parse_url)
else:
text = re.findall('(.*?)',parse_url)
for i in text:
if (num == 0): #北京
province = re.findall('(.*?)
', i)
else:
province = re.findall('(.*?)', i)
city = re.findall('(.*?)', i)
for i in province:
if (len(city) != 0):
for j in city:
else:
return lists
except:
print("error 2")
return
url = ''
parse_text = get_urltext(url)
citylist=get_lists(parse_text)
f=open('cityname.txt','w',encoding='utf-8')
for elem in citylist:
f.write(elem)
f.write('\r')
f.close()
Python爬蟲之pyquery獲取不到元素
今天在做爬蟲專案的時候出現了乙個錯誤,通過pyquery獲取不到元素。from pyquery import pyquery as pq html doc pq html element doc warp ul li first child print element 執行結果 none但是pyqu...
Python之爬蟲 中國大學排名
usr bin env python coding utf 8 import bs4 import requests from bs4 import beautifulsoup 通過傳入 資訊建立乙個獲取網頁文字的函式 def gethtmltext url 判斷獲取網頁文字過程中是否有錯誤 try...
python爬蟲之獲取豆瓣電影資訊
本質就是 發起請求 獲取響應內容 解析內容 儲存資料首先,需要做的就是匯入模組pip install requests pip install lxml coding utf 8 import requests from lxml import etree 選取網頁並做解析 這裡以 titanic ...