utf8與ansi之間的轉換

2021-06-08 14:25:03 字數 3004 閱讀 9765

//#include "stdafx.h"

#include #include #include #include #include #include namespace fs = boost::filesystem;

const int count = 1024;

const std::string ret_success = "success";

const std::string ret_src_open = "error:原始檔開啟錯誤";

const std::string ret_dst_open = "error:目標檔案開啟錯誤";

const std::string ret_over_line = "error:檔案中某行字元數過大";

const std::string ret_type_convert = "error:轉換型別不正確";

typedef enum type_convert; // 轉換型別

std::string utf8_ansi(std::string utf8_path, std::string ansi_path); // utf8轉換為ansi

std::string ansi_utf8(std::string ansi_path, std::string utf8_path); // ansi轉換為utf8

int get_filenames(const std::string& dir, std::vector& filenames); // 獲取目錄下的所有檔名

void code_convert(const std::string& src_dir,

const std::string& dst_dir,

type_convert type_convert,

std::string expanded_names = ".h.cpp.txt"); // 轉換目錄下所有指定副檔名的檔案,路徑用"//"

int _tmain(int argc, _tchar* argv)

std::string utf8_ansi(std::string utf8_path, std::string ansi_path)

if (!fdst.is_open())

std::string line;

wchar_t wstr[count];

char src_path[count<<1];

bool is_first_line = true;

while (!fsrc.eof())

::zeromemory(wstr, sizeof(wstr));

::zeromemory(src_path, sizeof(src_path));

::multibytetowidechar(cp_utf8, 0, line.c_str(), line.size(), wstr, count);

::widechartomultibyte(cp_acp, 0, wstr, wcslen(wstr), src_path, count<<1, 0, 0);

int str_len = strlen(src_path);

src_path[str_len] = '\n';

if (is_first_line && (0x3f==src_path[0])) // 跳過第一行的第乙個字元

else

}fsrc.close();

fdst.close();

return ret;

}std::string ansi_utf8(std::string ansi_path, std::string utf8_path)

if (!fdst.is_open())

unsigned char head[3] = ;

fdst.write((char*)head, 3); // utf8檔案頭

std::string line;

wchar_t wstr[count];

char str[count<<1];

while (!fsrc.eof())

::zeromemory(str, sizeof(str));

::zeromemory(wstr, sizeof(wstr));

::multibytetowidechar(cp_acp, 0, line.c_str(), line.size(), wstr, count);

::widechartomultibyte(cp_utf8, 0, wstr, wcslen(wstr), str, count<<1, 0, 0);

int len = strlen(str);

str[len] = '\n';

fdst.write(str, len+1);

} fsrc.close();

fdst.close();

return ret;

}int get_filenames(const std::string& dir, std::vector& filenames)

fs::directory_iterator end_iter;

for (fs::directory_iterator iter(path); iter!=end_iter; ++iter)

if (fs::is_directory(iter->status()))

}return filenames.size();

}void code_convert(const std::string& src_dir,

const std::string& dst_dir,

type_convert type_convert,

std::string expanded_names)

if (get_filenames(src_dir, src_filenames) > 0)

else

}int num = 0;

for (iter1=src_filenames.begin(); iter1!=src_filenames.end(); ++iter1)

}} }

}

ANSI與UTF 8編碼轉換

將ansi編碼裝換為utf 8在windows mfc環境下測試下面的 static int ansi2utf8 in const char csrc,out char cdest 以下 將utf 8 轉換為gb2312 intutf8togb2312 const char sourcebuf,si...

UTF 8與GBK字元之間的轉換

1.utf 8轉換為unicode編碼 utf 8編碼不能直接轉換為gbk漢字編碼,中間需要先轉換為unicode編碼,在由unicode編碼轉換為gbk漢字編碼 2.unicode編碼轉換為gbk漢字編碼 unicode漢字編碼與gbk漢字編碼的對照關係為,兩個unicode編碼對應乙個漢字,並且...

Unicode和UTF 8之間的轉換

unicode是乙個字符集,而utf 8是unicode的其中一種,unicode是定長的都為雙位元組,而utf 8是可變的,對於漢字來說unicode占有的位元組比utf 8占用的位元組少1個位元組。unicode為雙位元組,而utf 8中漢字佔三個位元組。utf 8編碼字元理論上可以最多到6個位...