python判斷unicode字元型別

def is_chinese(uchar):
"""判斷乙個unicode是否是漢字"""
if uchar >= u'\u4e00' and uchar<=u'\u9fa5':
return true
else:
return false
def is_number(uchar):
"""判斷乙個unicode是否是數字"""
if uchar >= u'\u0030' and uchar<=u'\u0039':
return true
else:
return false
def is_alphabet(uchar):
"""判斷乙個unicode是否是英文本母"""
if (uchar >= u'\u0041' and uchar<=u'\u005a') or (uchar >= u'\u0061' and uchar<=u'\u007a'):
return true
else:
return false
def is_other(uchar):
"""判斷是否非漢字，數字和英文本元"""
if not (is_chinese(uchar) or is_number(uchar) or is_alphabet(uchar)):
return true
else:
return false
def b2q(uchar):
"""半形轉全形"""
inside_code=ord(uchar)
if inside_code<0x0020 or inside_code>0x7e:      #不是半形字元就返回原來的字元
return uchar
if inside_code==0x0020: #除了空格其他的全形半形的公式為:半形=全形-0xfee0
inside_code=0x3000
else:
inside_code+=0xfee0
return unichr(inside_code)
def q2b(uchar):
"""全形轉半形"""
inside_code=ord(uchar)
if inside_code==0x3000:
inside_code=0x0020
else:
inside_code-=0xfee0
if inside_code<0x0020 or inside_code>0x7e:      #轉完之後不是半形字元返回原來的字元
return uchar
return unichr(inside_code)
def stringq2b(ustring):
"""把字串全形轉半形"""
return "".join([q2b(uchar) for uchar in ustring])
def uniform(ustring):
"""格式化字串，完成全形轉半形，大寫轉小寫的工作"""
return stringq2b(ustring).lower()
def string2list(ustring):
"""將ustring按照中文，字母，數字分開"""
retlist=
utmp=
for uchar in ustring:
if is_other(uchar):
if len(utmp)==0:
continue
else:
utmp=
else:
if len(utmp)!=0:
return retlist
if __name__=="__main__":
#test q2b and b2q
for i in range(0x0020,0x007f):
print q2b(b2q(unichr(i))),b2q(unichr(i))
#test uniform
ustring=u'中國 人名ａ高頻ａ'
ustring=uniform(ustring)
ret=string2list(ustring)
print ret

python根據unicode判斷語言型別

def is chinese uchar 判斷乙個unicode是否是漢字 if uchar u u4e00 and uchar u u9fa5 return true else return false def is number uchar 判斷乙個unicode是否是數字 if uchar u...

python 學習 unicode 編碼

如果檔案需要制定編碼格式如utf 8 1.要在檔案開始時寫如下注釋 coding utf 8 2.或則使用以下 import sys reload sys sys.setdefaultencoding utf 8 說明 unicode支援不同的編碼方式，最著名的的是utf 8.ascii字元的...

Python學習筆記 Unicode

這裡簡單的說一下。下面內容基本上時從 python.core.programming.2ed 上摘的 unicode是計算機可以支援這個星球上的多種語言的秘密在unicode之前，用的都是ascii，ascii嗎非常簡單，每個英文本元都用7位二進位制數的方式儲存在計算機內，其範圍是32到126.它...

python判斷unicode字元型別

python根據unicode判斷語言型別

python 學習 unicode 編碼

Python學習筆記 Unicode

相關推薦