UNICODE編碼細節與個人使用總結

1、tchar ,unicode,char,wchar_t 之間的關係

經常發現有的人愛用strcpy等標準ansi函式，有的人愛用_t***x函式，這個問題曾經搞的很混亂。為了統一，有必要搞清楚它們之間的關係。

為了搞清這些函式，就必須理請幾種字元型別的寫法。char就不用說了，先說一些wchar_t。wchar_t是unicode字元的資料型別，它實際定義在裡：

typedef unsigned short wchar_t;

不能使用類似strcpy這樣的ansi c字串函式來處理wchar_t字串，必須使用wcs字首的函式，例如wcscpy。為了讓編譯器識別unicode字串，必須以在前面加乙個「l」,例如:

wchar_t *sztest=l"this is a unicode string.";

下面在看看tchar。如果你希望同時為ansi和unicode編譯的源**，那就要include tchar.h。tchar是定義在其中的乙個巨集，它視你是否定義了_unicode巨集而定義成char或者wchar_t。如果你使用了tchar，那麼就不應該使用ansi的str***函式或者unicode的wcs***函式了，而必須使用tchar.h中定義的_tcs***函式。另外，為了解決剛才提到帶「l」的問題，tchar.h中定義了乙個巨集：「_text」。

以strcpy函式為例子，總結一下:

.如果你想使用ansi字串，那麼請使用這一套寫法：

char szstring[100];

strcpy(szstring,"test");

.如果你想使用unicode字串，那麼請使用這一套：

wchar_t szstring[100];

wcscpyszstring,l"test");

.如果你想通過定義_unicode巨集，而編譯ansi或者unicode字串**：

tchar szstring[100];

_tcscpy(szstring,_text("test"));

2、增加unicode巨集定義 unicode,_unicode

3、如何在調式程式中顯示unicode字元，需要在vc開發工具「tools」—>「options」à「debug」頁中勾選「display unicode strings」選項。如圖

4、使用unicode的問題 wwinmaincrtstartup設定程式入口

project-> settings->link在category：選擇output在entry point symbol：加上wwinmaincrtstartup

5、幾種編碼之間的轉換

//utf8格式轉換成gb格式

cstring convertutf8togbk(cstring strutf8)

int len=multibytetowidechar(cp_utf8, 0, (lpcstr)strutf8.getbuffer(0), -1, null,0);

unsigned short * wszgbk = new unsigned short[len+1];

memset(wszgbk, 0, len * 2 + 2);

multibytetowidechar(cp_utf8, 0, (lpcstr)strutf8.getbuffer(0), -1, wszgbk, len);

len = widechartomultibyte(cp_acp, 0, wszgbk, -1, null, 0, null, null);

char *szgbk=new char[len + 1];

memset(szgbk, 0, len + 1);

widechartomultibyte (cp_acp, 0, wszgbk, -1, szgbk, len, null,null);

cstring strgbk;

strgbk = szgbk;

delete szgbk;

delete wszgbk;

return strgbk;

//gb格式轉換成utf8格式

cstring convertgbktoutf8(cstring strgbk)

int len=multibytetowidechar(cp_acp, 0, (lpcstr)strgbk.getbuffer(0), -1, null,0);

unsigned short * wszutf8 = new unsigned short[len+1];

memset(wszutf8, 0, len * 2 + 2);

multibytetowidechar(cp_acp, 0, (lpcstr)strgbk.getbuffer(0), -1, wszutf8, len);

len = widechartomultibyte(cp_utf8, 0, wszutf8, -1, null, 0, null, null);

char *szutf8=new char[len + 1];

memset(szutf8, 0, len + 1);

widechartomultibyte (cp_utf8, 0, wszutf8, -1, szutf8, len, null,null);

cstring sutf_8;

sutf_8=szutf8;

delete szutf8;

delete wszutf8;

return sutf_8;

//字串轉換float

float strtofloat(cstring str)

char a[max_path];

memset(a, 0, max_path);

widechartomultibyte(cp_acp, 0,(lpcwstr)str, -1, a, max_path, null, null);

float f = (float)atof(a);

return f;

//utf8轉換成unicdoe

wchar_t* u8tounicode(const char* szu8)

int wcslen = ::multibytetowidechar(cp_utf8, null, szu8, strlen(szu8), null, 0);

wchar_t* wszstring = new wchar_t[wcslen + 1];

::multibytetowidechar(cp_utf8, null, szu8, strlen(szu8), wszstring, wcslen);

wszstring[wcslen] = '/0';

return wszstring;

//unicode轉換成utf8

char* unicodetou8(wchar_t* wszstring)

int u8len = ::widechartomultibyte(cp_utf8, null, wszstring, wcslen(wszstring), null, 0, null, null);

char* szu8 = new char[u8len + 1];

::widechartomultibyte(cp_utf8, null, wszstring, wcslen(wszstring), szu8, u8len, null, null);

szu8[u8len] = '/0';

return szu8;

//unicode轉換成ansi

char* unicodetoansi(wchar_t* wszstring)

int ansilen = ::widechartomultibyte(cp_acp, null, wszstring, wcslen(wszstring), null, 0, null, null);

char* szansi = new char[ansilen + 1];

::widechartomultibyte(cp_acp, null, wszstring, wcslen(wszstring), szansi, ansilen, null, null);

szansi[ansilen] = '/0';

return szansi;

UNICODE編碼細節與個人使用總結

UNICODE編碼細節與個人使用總結

Unicode與UTF 8編碼規則轉換

Unicode是什麼編碼，與ASCII的關係

UNICODE編碼細節與個人使用總結

UNICODE編碼細節與個人使用總結

Unicode與UTF 8編碼規則轉換

Unicode是什麼編碼，與ASCII的關係

相關推薦