根據以前的指令碼改的抓取一天內的百度新問題

2021-09-06 22:18:17 字數 4086 閱讀 5964

沒有注釋 ,太簡單了,一看就懂哈。

只是作寫爛的地方是,沒有直接從結果裡發郵件,而是用多執行緒生成檔案,然後通過郵件發這些檔案裡的內容。。

ui沒有,直接幹!

主要涉及乙個轉碼,可以直接搜尋

#

!/usr/bin/python

#coding: utf-8

import

os,sys,time,commands

import

urllib

import

urllib2

import

string

from bs4 import

beautifulsoup

import

threading

import

smtplib

from email.mime.text import

mimetext

mail_host = '

smtp.x.x.com

'mail_user = '

[email protected]

'mail_pwd = '

***x

'keywordlist =

f1= open("

zhidaokeyword

",'r')

for line in

f1: line = line.strip('\n'

)f1.close()

defsend_email( content, mailto, get_sub ):

print

'setting mimetext

'msg = mimetext( content.encode('

utf8

'), _subtype = '

html

', _charset = '

utf8')

msg[

'from

'] =mail_user

msg[

'subject

'] = u'

%s' %get_sub

msg['to

'] = ","

.join( mailto )

try:

print

'connecting

', mail_host

s = smtplib.smtp_ssl( mail_host, 465)

#s.connect(mail_host)

print

'login to mail_host

's.login(mail_user, mail_pwd )

print

'send email

's.sendmail(mail_user, mailto, msg.as_string())

print

'close the connection between the mail server

's.close()

except

exception as e:

print

'exception:

', e

class

searchzhidao(threading.thread):

def__init__

(self,keywordlist):

threading.thread.

__init__

(self)

self.keywordlist =keywordlist

defsearchzhidao(self):

for keyword in

self.keywordlist:

try:

str = keyword.encode('

gb2312')

str_dic =

encode_keyword =urllib.urlencode(str_dic)

url = "

" + encode_keyword + "

&pn=0&cid=0&lm=8960

"htmlpage =urllib2.urlopen(url).read()

htmlpage = unicode(htmlpage, "

gb2312

").encode("

utf8")

soup =beautifulsoup(htmlpage)

for result_li in soup.findall("

li", ):

question_time = result_li.find("

div", )

q_time =question_time.get_text().split()

if len(q_time) > 1 and u'小時'

in q_time[1]:

html_output = "

" + keyword + "

"a_click = result_li.find("a"

) html_output += "

" + a_click.get("

href

") + "

' target='_blank'>

"html_output += a_click.rendercontents() + "

"html_output += "

" + question_time.get_text() + "

"file_object.write(html_output)

else

:

pass

except

exception as e:

print

e,keyword

continue

defrun(self):

self.searchzhidao()

if__name__ == "

__main__":

print

"start the programe....

"searchzhidaothreads =

file_object = open('

zhidao_html_mail.html

', 'w'

) file_object.write(""

)

for i in range(20):

t = searchzhidao(keywordlist[((len(keywordlist)+19)/20) * i:((len(keywordlist)+19)/20) * (i+1)])

for i in

range(len(searchzhidaothreads)):

searchzhidaothreads[i].start()

for i in

range(len(searchzhidaothreads)):

searchzhidaothreads[i].join()

file_object.write(""

) file_object.close()

print

"finished this job!

"html_mail = ""

f2= open("

zhidao_html_mail.html

",'r')

for line in

f2: html_mail +=line

f2.close()

time_title = curtime = time.strftime("

%y-%m-%d %x

", time.localtime(time.time()))

to_list = [ '

***@xx.xx

', '

xx@x.***']

send_email( html_mail, to_list,

''+ time_title )

郵件輸出樣子:

懊惱的情緒要在一天內解除

人生有不同的階段在不同的階段裡又扮演不同的角色,但是自己最喜歡自己的是那個角色呢!只有自己知道答案。從初入社會開始,要接受這個社會大熔爐的洗禮,進入某個公司從乙個小弟小妹慢慢成長成乙個可以獨當一面的大哥大姐,在這個過程中有人走的艱辛,有人走的相對的輕鬆,當然不論過程是怎樣的,當做過這段時光,都會發現...

sql查詢一天內的where寫法,sql寫法

sql查詢一天內的寫法 1.where createtime between select date format now y m d 00 00 00 and select date format now y m d 23 59 59 2.select to days now select to ...

百詞斬筆試題 一天內時針 分針 秒針轉過的角度

給定一天內先後的兩個時間,求時針 分針 秒針在時鐘表盤上轉動了多少角度。注意 為了保證計算精度,請使用double型別進行計算。輸入兩行,分別代表兩個時間點a和b,a和b都為24小時制,b大於等於a,且都在同一天,時分秒用冒號 分割 輸出為三行,分別代表時針 分針 秒針在表盤上轉動的角度,結果只保留...