#!/usr/bin/python
#-*-coding:utf-8-*-
__author__ = 'lxw'
import string
defloadenglish
():#這個函式載入字典檔案
dicfile = open('dictionary.txt')
english_word= {}
for word in dicfile.read().split('\n'):
english_word[word]=none
return english_word
english_words = loadenglish()
defremovenonletters
(message):
#移除非字母字元,包括標點符號和數字
letteronly =
allletter = string.uppercase+string.lowercase+' \n\t'
for i in message:
if i in allletter:
return
"".join(letteronly)
defgetenglishcount
(message):
#得到一段字串中單詞的個數
message = message.upper()
message = removenonletters(message)
match = 0
words = message.split()
if len(words)==0: return
0.0for word in words:
if word in english_words.keys():
match+=1
return float(match)/len(words)
defisenglish
(message,wordpercentage=20,letterpercentage=85):
#呼叫這個函式判斷一段字串是否問一段英文.如果是則返回true
wordmatch = getenglishcount(message)*100 >= wordpercentage
removedmsg = removenonletters(message)
lettermatch = (len(removedmsg)/float(len(message)))*100>=letterpercentage
return wordmatch and lettermatch
Python 英文分詞
pattern r x set flag to allow verbose regexps a z abbreviations,e.g.u.s.a.w w words with optional internal hyphens d d currency and percentages,e.g.12...
python 判斷區分字串是否都是英文 中文
b bilibili站 b.isalpha 中英混合不適用 true b.encode utf 8 isalpha false b.encode utf 8 b bilibili xe7 xab x99 word 1 如何再飄搖 res true for w in word 1 if not u4e...
python 判斷字元是否為英文,中文,數字。
def is chinese uchar 判斷乙個unicode是否是漢字 if uchar u u4e00 and uchar u u9fff return true else return false defis number uchar 判斷乙個unicode是否是數字 此函式用str.isd...