開源到github了 專案位址 基於springboot,idea匯入依賴
org.jsoupgroupid>
jsoupartifactid>
1.10.2version>
dependency>
資料放入redis中,引人redis
org.springframework.bootgroupid>
spring-boot-starter-data-redisartifactid>
dependency>
開啟定時任務
@enablescheduling
//開啟定時任務
public
class
public
static
void
main(string args) }
實體類
public
class roominfo58
@scheduled(fixedrate = 10000)
public void tongcheng()
//如果id存在 continue
roominfo58.setid(id);
roominfo58.setcomefrom("58同城");
roominfo58.setcreatetime(new date());
roominfo58.settitle("" + next.select("h2").text());
roominfo58.setstyle("" + next.select("p[class=baseinfo]").get(0).text());
roominfo58.setposition("" + next.select("p[class=baseinfo]").get(1).text());
roominfo58.setname("" + next.select("span[class=jjrname-outer]").text());
roominfo58.setprice(next.select("p[class=sum]").text());
roominfo58.setarea(next.select("p[class=unit]").text());
string url = ones.replace("11111", id);
roominfo58.seturl(url);
//獲取手機號
document doc = jsoup.connect(url).get();
roominfo58.setphone(doc.select("p[class=phone-num]").text());
//todo 如果手機號是null 放位址
jediscluster.lpush("roominfo", roominfo58.tostring());
}// jediscluster.ltrim("roominfo",0,10000);
} catch (exception e)
}}
房天下
/**
* created by daitian on 2017/6/1.
*/@component
public class taskfangtest
//如果id存在 continue
roominfo58.setid(id);
roominfo58.setcomefrom("房天下");
roominfo58.setcreatetime(new date());
roominfo58.settitle("" + next.select("p[class=title]").text());
roominfo58.setstyle("" + next.select("p[class=mt12]").text() + next.select("div[class=area alignr]").select("p").first().text().replaceall("�o", "m2"));
roominfo58.setposition("" + next.select("p[class=mt10]").text());
roominfo58.setprice(next.select("p[class=mt5 alignr]").text());
roominfo58.setarea(next.select("p[class=danjia alignr mt5]").text().replaceall("�o", "m2"));
string url = fangs + id;
roominfo58.seturl(url);
document doc = jsoup.connect(url).get();
elements nexts = doc.getelementsbyclass("booktel");
roominfo58.setphone(nexts.select("strong").text());
roominfo58.setname("" + nexts.select("a").text().replaceall("業主", ""));
jediscluster.lpush("roominfo", roominfo58.tostring());
}} catch (ioexception e)
}}
獲取資料 restcontroller
public string tasktest()
Python爬取鏈家二手房資訊
2 資料庫表結構 使用物件導向的方式,搭建專案框架 import requests from bs4 import beautifulsoup import pymysql class lianjiaspider mydb pymysql.connect localhost root 123456 ...
python爬蟲爬取鏈家二手房資訊
問題一 鏈家 也有反爬蟲策略和robots限制,robots限制忽略 不然沒法爬 另外頻繁爬取會直接導致被ban,需要隔天才會解禁止。防止被ban的方法有多種,1.禁止cookie 2.設定header 3.加大爬取間隔 4.使用 我只用了前三種方法,具體可以在settings.py 和middle...
python爬取鏈家網二手房資訊
朋友請我幫忙做的期末作業,我自己不是愛說話,直接分享 可以直接執行的,期中用的是 python 3.6版本,導包的時候直接在cmd裡面用的pip install 包名,其中有的包安裝失敗,提示pip需要公升級,可以看一下這個鏈結 下面是 在這裡插入 片 usr bin env python3 cod...