#!/usr/bin/env python
# -*- coding:utf-8 -*-
"""漢字處理的工具:判斷unicode是否是漢字,數字,英文,或者其他字元。全形符號轉半形符號。"""
def is_chinese(uchar):
"""判斷乙個unicode是否是漢字"""
if uchar >= u'\u4e00' and uchar<=u'\u9fa5':
return true
else:
return false
def is_chinese_all(ustring):
"""判斷乙個unicode串是否是漢字串"""
for uchar in ustring:
if not is_chinese(uchar):
return false
return true
def is_number(uchar, but = u''):
"""判斷乙個unicode是否是數字"""
if uchar == but or uchar >= u'\u0030' and uchar<=u'\u0039':
return true
else:
return false
def is_number_all(ustring, but = u''):
"""判斷乙個unicode串是否是數字串"""
for uchar in ustring:
if not is_number(uchar, but):
return false
return true
def is_alphabet(uchar, but = u''):
"""判斷乙個unicode是否是英文本母"""
if uchar == but or (uchar >= u'\u0061' and uchar<=u'\u007a') or (uchar >= u'\u0041' and uchar<=u'\u005a'):
return true
else:
return false
def is_alphabet_all(ustring, but = u''):
"""判斷乙個unicode串是否是英文本母串"""
for uchar in ustring:
if not is_alphabet(uchar, but):
return false
return true
def is_alphanum(uchar, but = u''):
"""判斷乙個unicode是否是英文本母或數字"""
if is_number(uchar, but) or is_alphabet(uchar, but):
return true
else:
return false
def is_alpha_or_num_all(ustring, but = u''):
"""判斷乙個unicode串是否是英文本母或數字串"""
for uchar in ustring:
if not is_alphanum(uchar, but):
return false
return true
def is_alpha_and_num_all(ustring, but = u''):
"""判斷乙個unicode串是否是英文本母及數字串"""
alphabet = 0
number = 0
for uchar in ustring:
if is_alphabet(uchar, but):
alphabet += 1
elif is_number(uchar, but):
number += 1
else:
return false
if alphabet > 0 and number > 0:
return true
else:
return false
def is_other(uchar, but = u''):
"""判斷是否非漢字、數字和英文本元"""
if not (is_chinese(uchar) or is_number(uchar, but) or is_alphabet(uchar, but)):
return true
else:
return false
def is_other_all(ustring, but = u''):
"""判斷是否非漢字、數字和英文本串"""
for uchar in ustring:
if not is_other(uchar, but):
return false
return true
def exist_chinese(ustring):
for uchar in ustring:
if is_chinese(uchar):
return true
return false
def exist_number(ustring):
for uchar in ustring:
if is_number(uchar):
return true
return false
def exist_alphabet(ustring):
for uchar in ustring:
if is_alphabet(uchar):
return true
return false
def exist_other(ustring, but = u''):
for uchar in ustring:
if is_other(uchar, but):
return true
return false
def b2q(uchar):
"""半形轉全形"""
inside_code=ord(uchar)
if inside_code<0x0020 or inside_code>0x7e: #不是半形字元就返回原來的字元
return uchar
if inside_code==0x0020: #除了空格其他的全形半形的公式為:半形=全形-0xfee0
inside_code=0x3000
else:
inside_code+=0xfee0
return unichr(inside_code)
def q2b(uchar):
"""全形轉半形"""
if uchar == u'』':
return u'\''
inside_code=ord(uchar)
if inside_code==0x3000:
inside_code=0x0020
else:
inside_code-=0xfee0
if inside_code<0x0020 or inside_code>0x7e: #轉完之後不是半形字元返回原來的字元
return uchar
return unichr(inside_code)
def stringq2b(ustring):
"""把字串全形轉半形"""
return "".join([q2b(uchar) for uchar in ustring])
def uniform(ustring):
"""格式化字串,完成全形轉半形,大寫轉小寫的工作"""
return stringq2b(ustring).lower()
def string2list(ustring):
"""將ustring按照中文,字母,數字分開"""
retlist=
utmp=
for uchar in ustring:
if is_other(uchar):
if len(utmp)==0:
continue
else:
utmp=
else:
if len(utmp)!=0:
return retlist
漢字處理元件
有時候專案中會根據使用者姓名的拼音檢索資料,微軟專門提供了乙個元件安裝包來處理非英文的特殊語言,名稱為 預設的安裝路徑為 c program files x86 microsoft visual studio international pack simplified chinese pin yin...
把16進製制編碼得到其中的漢字處理方法
name 中國 print name.encode utf8 結果輸出b xe4 xb8 xad xe5 x9b xbd b xe4 xb8 xad xe5 x9b xbd decode utf8 中國 注釋,也就是說只有字串才能進行編碼處理,位元組流進行解碼處理 所以要想知道16進製制下的內容就要...
jsp的分頁技術與訪問資料庫的漢字處理
class.forname com.microsoft.sqlserver.jdbc.sqlserverdriver string url jdbc sqlserver localhost 1433 databasename bookstore string user bookstore strin...