winapi的兩個函式:widechartomultibyte、multibytetowidechar。
int multibytetowidechar(
uint codepage, // code page
dword dwflags, // character-type options
lpcstr lpmultibytestr, // string to map
int cbmultibyte, // number of bytes in string
lpwstr lpwidecharstr, // wide-character buffer
int cchwidechar // size of buffer
);//將多個窄字元轉換成寬字元
需要用到的一些函式:cstring cxmlprocess::hextobin(cstring string)//將16進製制數轉換成2進製
cstring cxmlprocess::bintohex(cstring binstring)//將2進製數轉換成16進製制
int cxmlprocess::bintoint(cstring string)//2進製字元資料轉換成10進製整型
len += tempint*strint;
}return len;
}
utf-8轉換成gb2312先把utf-8轉換成unicode.然後再把unicode通過函式widechartomultibyte轉換成gb2312 wchar*
cxmlprocess::utf_8tounicode(char *ustart) //把utf-8轉換成unicode
char * cxmlprocess::unicodetogb2312(unsigned short udata) //把unicode 轉換成 gb2312
gb2312轉換成utf-8:先把gb2312通過函式multibytetowidechar轉換成unicode.然後再把unicode通過拆開unicode後拼裝成utf-8。
wchar * cxmlprocess::gb2312tounicode(char *gbbuffer) //gb2312 轉換成 unicode
char * cxmlprocess::unicodetoutf_8(wchar *unichar) // unicode 轉換成utf-8
例子:將gb2312轉換成utf-8的呼叫:
char * cxmlprocess::translatechartoutf_8(char *xmlstream, int len)
//如果是英文直接複製就可以
else
}newcharbuffer[newcharlen] = ''/0'';
cstring string1 ;
string1.format("%s",newcharbuffer);
finalcharbuffer = new char[newcharlen+1];
memcpy(finalcharbuffer,newcharbuffer,newcharlen+1);
return finalcharbuffer;
}utf8tounicode proc uses esi edi lpszbuf_out,lpszutf8_in
mov esi,lpszutf8_in
mov edi,lpszbuf_out
.while true
mov al,[esi]
.if sbyte ptr al <0
mov al,[esi]
and al,00001111b
shl al,4
mov [edi+1],al
mov al,[esi+1]
and al,00111100b
shr al,2
or [edi+1],al
mov al,[esi+1]
and al,11b
shl al,6
mov [edi+0],al
mov al,[esi+2]
and al,00111111b
or [edi+0],al
add edi,2
add esi,3
.elseif al
xor ah,ah
stosw
inc esi
.else
mov word ptr [edi],0
.break
.endif
.endw
retutf8tounicode endp
unicodetoutf8 proc uses esi edi lpbuf_out,lpszutf8_in
mov esi,lpszutf8_in
mov edi,lpbuf_out
.while true
mov ax,[esi]
.if ax==0
stosw
.break
.elseif ah==0
add esi,2
stosw
.else
mov al,[esi+1]
shr al,4
or al,11100000b
mov [edi+0],al
mov al,[esi+1]
and al,00001111b
shl al,2
or al,10000000b
mov ah,[esi+0]
shr ah,6
or al,ah
mov [edi+1],al
mov al,[esi+0]
and al,00111111b
or al,10000000b
mov [edi+2],al
add edi,3
add esi,2
.endif
.endw
retunicodetoutf8 endp
wchar* cxmlprocess::utf_8tounicode(char *ptext)
wchar* cxmlprocess::utf_8tounicode(char *ptext)
gb2312與UTF 8之間的互相轉換
function chinese2unicode salon dim i dim salon one dim salon unicode for i 1 to len salon salon one mid salon,i,1 salon unicode salon unicode chr 38 s...
《UTF 8與GB2312之間的互換》的改進
在utf 8,與unicode之間轉換的時候,用二進位制運算,代替了字串的轉換。utf 8乙個漢字,用3個位元組,而unicode用2個位元組。對應關係如下 utf 8編碼 1,1,1,0,a5,a6,a7,a8 1,0,b3,b4,b5,b6,b7,b8 1,0,c3,c4,c5,c6,c7,c8...
在PHP中進行GB2312與UTF 8的互換。
因為證書中有中文,所以需要在php中進行gb2312與utf 8的互換。網上搜尋一下這方面相關資料,說是需要php iconv.dll的支援,可是我在php5資料夾中根本找不到這個檔案,但是奇怪的是在php4中有這個,然後我將php4中的php iconv.dll檔案,複製到system32下,卻提...