C 編碼轉換

最近在c++專案中經常會遇到編碼轉換問題，例如ascii碼，utf-8，多位元組等等，特此總結下來並附上**,也方便大家學習和使用。

#include enum u78 ;
typedef unsigned short utf16; // 16 bits
typedef unsigned char utf8; // 8 bits
typedef unsigned char ubyte;
enum unimode ;
u78 utf8_7bits_8bits(utf8 *pucbuf, dword dwbuflen);
unimode determineencoding(utf8 *pucbuf, dword dwbuflen);
int convert_to_multibyte(unimode encode, const char *psrcbuf, int isrclen, char *pdstbuf, int idstlen);
// returned value :
// 0 : utf8
// 1 : 7bits
// 2 : 8bits
u78 utf8_7bits_8bits(utf8 *pucbuf, dword dwbuflen)
sx+=2;
} else if (*sx < (0x80 + 0x40 + 0x20 + 0x10))
sx+=3;
} else 
}if (ascii7only) 
return ascii7bits;
if (rv)
return utf8nobom;
return ascii8bits;}/*
【函式功能】轉換編碼格式為多位元組編碼
【函式輸入】encode:待轉換buf的編碼格式，參見unimode
psrcbuf:待轉換的buf
isrclen:待轉換的buf位元組長度
pdstbuf:接收轉換的buf
idstlen:接收轉的的buf長度
【函式輸出】無            
【函式返回】轉換的位元組長度
【修改記錄】
20150401 zhouw utf16編碼格式解析修改
*/

int convert_to_multibyte(unimode encode, const char *psrcbuf, int isrclen, char *pdstbuf, int idstlen)
case uniutf8:      // utf8帶標識頭
case uniutf8_nobom: // utf8不帶帶標識頭
else if (ptmpbuf[i] < (0x80 + 0x40))
else if (ptmpbuf[i] < (0x80 + 0x40 + 0x20))
i += 2;
}else if (ptmpbuf[i] < (0x80 + 0x40 + 0x20 + 0x10))
i += 3;
}else
// 每次轉換不超過1024位元組
if (j >= 1024 )
}// 轉換最後剩餘的字元
if (j > 0)
break;
}case uni16be:
case uni16be_nobom:
case uniend:
default:
}return ret;}/*
【函式功能】判斷所給內容的編碼型別
【函式輸入】pucbuf:待檢測編碼格式的指標
dwbuflen:待檢測內容的位元組長度
【函式輸出】無            
【函式返回】編碼型別，參見unimode定義，成功，返回文字長度；失敗，返回錯誤**
*/

unimode determineencoding(utf8 *pucbuf, dword dwbuflen)
,  // unknown
,  // utf8
,  // big endian
,  // little endian
};// 檢測帶標識頭的utf-16大頭編碼
if (dwbuflen > 1 && pucbuf[0] == k_boms[uni16be][0] && pucbuf[1] == k_boms[uni16be][1])
// detect utf-16 little-endian with bom
else if (dwbuflen > 1 && pucbuf[0] == k_boms[uni16le][0] && pucbuf[1] == k_boms[uni16le][1])
// detect utf-8 with bom
else if (dwbuflen > 2 && pucbuf[0] == k_boms[uniutf8][0] && 
pucbuf[1] == k_boms[uniutf8][1] && pucbuf[2] == k_boms[uniutf8][2])
// 檢測不帶標識頭的 utf-16 小頭編碼
/*else if (m_nlen > 1 && m_pbuf[0] != null && m_pbuf[1] == null && istextunicode(m_pbuf, m_nlen, null))
else if (m_nlen > 1 && m_pbuf[0] == null && m_pbuf[1] != null)
*/else
else if (detectedencoding == ascii7bits)
eencoding = uni7bit;
else //(detectedencoding == ascii8bits)
}else}}
sx++;}}
}return eencoding;
}void test_code_type()
;    char szdstbuf[512];
int  idstbuflen = sizeof(szdstbuf);
int iconvlen;
// 直接輸入編碼格式，適用於已知的編碼型別
iconvlen = convert_to_multibyte(uniutf8_nobom, szbuf, sizeof(szbuf), szdstbuf, idstbuflen);
// 未知的編碼格式，要先判斷編碼型別
unimode um = determineencoding((utf8*)szbuf, sizeof(szbuf));
iconvlen = convert_to_multibyte(um, szbuf, sizeof(szbuf), szdstbuf, idstbuflen);
}

C 文字編碼轉換

1.c 的編碼轉換預設由system.text.encoding進行操控轉換.引用為 using system.text 2.c encoding類自帶編碼有 utf7 utf8 utf32 unicode ascii,encoding類有乙個子類就是default,此類會隨操作環境變化而變化,在w...

C 各類編碼轉換

字串轉unicode 源字串 unicode編碼後的字串 internal static string string2unicode string source return stringbuilder.tostring unicode轉字串經過unicode編碼的字串正常字串 internal...

C 中的編碼轉換

好長時間沒有寫編碼轉換.今天寫了下特地放上來供大家以後用到就不用去找了.將乙個字串轉換成unicode型別的base64編碼的字串如下 convert.tobase64string encoding.unicode.getbytes unicodestring 紅色標明的是還可以轉換bigendia...

C 編碼轉換

C 文字編碼轉換

C 各類編碼轉換

C 中的編碼轉換

相關推薦