python的第乙個小程式

#-*-coding:utf-8-*-
import sys
from htmlparser import htmlparser
reload(sys)
encoding = sys.getdefaultencoding()
if not encoding == 'utf-8':
sys.setdefaultencoding('utf-8')
class contentparser(htmlparser):
def __init__(self):
self.text = ''
self.is_comment = 0 
self.is_content = 0
htmlparser.__init__(self)
def handle_starttag(self, tag, attr):
if tag == 'li':
for k,v in attr:
if k == 'id': 
self.is_comment = 1;
if tag == 'a':
self.is_content = 0
if tag == 'p':
align = 0
for k,v in attr:
if k == 'align':
align = 1
if not align and not self.is_comment:
self.is_content = 1
def handle_endtag(self, tag):
if tag == 'li':
self.is_comment = 0
if tag == 'p':
self.is_content = 0
def handle_data(self, text):
if self.is_content:
text.find('稱呼')==-1 and\
text.find('內容')==-1:
self.text += '\n'+text
#print text
def get_text(self):
return self.text
if __name__ == '__main__':
fd = open(sys.argv[1])
cp = contentparser()
cp.feed(fd.read())
fd.close()

這個檔案儲存成progress.py

import sys,urllib2,time
from progress import contentparser 
from htmlparser import htmlparser
reload(sys)
sys.setdefaultencoding('utf-8')
class linkparser(htmlparser):
def __init__(self):
self.link = '' 
self.content = ''
self.mulu = ''
self.has_mulu = 0
self.is_mulu = 0
self.is_href = 0
self.start_time = 0
self.end_time = 0
htmlparser.__init__(self)
def handle_starttag(self, tag, attr):
if tag == 'div':
for k,v in attr:
if k == 'class' and v == 'mulu':
self.is_mulu = 1
if tag == 'a' and self.is_mulu:
self.is_href = 1
for k,v in attr:
if k == 'href':
self.link = v
if tag == 'td' and self.is_mulu:
for k,v in attr:
if k == 'colspan':
self.has_mulu = 1
def handle_endtag(self, tag):
if tag == 'div' and self.is_mulu and len(self.mulu):
self.is_mulu = 0
print 'end',self.mulu
self.mulu = ''
self.end_time = time.time()
print 'time : ', str(self.end_time - self.start_time)
if tag == 'a':
self.is_href = 0
if tag == 'td' and self.is_mulu and self.has_mulu:
self.has_mulu = 0
def handle_data(self, text):
if self.is_mulu and self.is_href:
self.content = text
progressing(self.link, self.mulu, self.content)
return
if self.has_mulu:
self.mulu = text
print 'begin',self.mulu
self.start_time = time.time()
def progressing(url, filename, chaptername):
chapter_text = get_chapter_text(url)
fd = open(filename, 'a')
fd.write(chaptername)
fd.write('\n'.format(chapter_text))
fd.close()
def get_chapter_text(url):
fd = urllib2.urlopen(urllib2.request(url))
cp = contentparser()
try:
cp.feed(fd.read())
except htmlparseerror, msg:
print msg
return cp.get_text()
if __name__ == '__main__':
fd = urllib2.urlopen(urllib2.request(''))
lp = linkparser()
try:
lp.feed(fd.read())
except htmlparseerror, msg:
print msg

第乙個小程式

2.建立pages目錄檔案作用是放各個頁面的 3.建立頁面給頁面起名字，並建立四個檔案 1 js 邏輯的實現 2 json 負責標題欄和一些狀態列 3 wxml 頁面文字 4 wxss 頁面樣式 4.把內容元素封裝在view內部，寫法內容 5.這節課需要用到三個元件文字按鈕 1 2 文字 ...

python的第乙個小程式，helloword

程式的編寫有兩種就是編譯式和互動式。黑視窗的方式是互動式，互動式執行程式的方法如下進入環境終端輸入 python就進入了python直譯器。exit 退出 python環境互動式的缺點是是寫一行執行一行，無法儲存，編譯式，可以統一程式設計，可以儲存和維護程式，所以採用編譯式進行程式編寫。編譯式...

第乙個python 程式

有人在論壇上問將日誌格式化的方法，剛好學python,就拿這個練手了 09 55 54 error1 tmp error log.3 50 times mon jun 28 00 00 53 2009 09 55 54 error1 tmp error log.3 50 times 09 56 ...

python的第乙個小程式

第乙個小程式

python的第乙個小程式，helloword

第乙個python 程式

相關推薦