惯性聚合 高效追踪和阅读你感兴趣的博客、新闻、科技资讯
阅读原文 在惯性聚合中打开

推荐订阅源

Simon Willison's Weblog
Simon Willison's Weblog
P
Privacy International News Feed
www.infosecurity-magazine.com
www.infosecurity-magazine.com
T
Troy Hunt's Blog
Hacker News - Newest:
Hacker News - Newest: "LLM"
Attack and Defense Labs
Attack and Defense Labs
S
Secure Thoughts
V2EX - 技术
V2EX - 技术
cs.AI updates on arXiv.org
cs.AI updates on arXiv.org
O
OpenAI News
Cloudbric
Cloudbric
Google Online Security Blog
Google Online Security Blog
Schneier on Security
Schneier on Security
cs.CV updates on arXiv.org
cs.CV updates on arXiv.org
Help Net Security
Help Net Security
Cyberwarzone
Cyberwarzone
G
GRAHAM CLULEY
L
Lohrmann on Cybersecurity
Threat Intelligence Blog | Flashpoint
Threat Intelligence Blog | Flashpoint
Spread Privacy
Spread Privacy
NISL@THU
NISL@THU
N
News and Events Feed by Topic
T
Tenable Blog
S
Security @ Cisco Blogs
N
News and Events Feed by Topic
The Hacker News
The Hacker News
C
CXSECURITY Database RSS Feed - CXSecurity.com
宝玉的分享
宝玉的分享
月光博客
月光博客
酷 壳 – CoolShell
酷 壳 – CoolShell
美团技术团队
奇客Solidot–传递最新科技情报
奇客Solidot–传递最新科技情报
Google DeepMind News
Google DeepMind News
钛媒体:引领未来商业与生活新知
钛媒体:引领未来商业与生活新知
T
Tailwind CSS Blog
V
Visual Studio Blog
P
Proofpoint News Feed
Webroot Blog
Webroot Blog
让小产品的独立变现更简单 - ezindie.com
让小产品的独立变现更简单 - ezindie.com
博客园 - 三生石上(FineUI控件)
cs.CL updates on arXiv.org
cs.CL updates on arXiv.org
Jina AI
Jina AI
雷峰网
雷峰网
T
The Blog of Author Tim Ferriss
Hugging Face - Blog
Hugging Face - Blog
腾讯CDC
L
LangChain Blog
The Register - Security
The Register - Security
OSCHINA 社区最新新闻
OSCHINA 社区最新新闻
博客园 - 聂微东

博客园 - waterflier

下载测试 Blog很久没更新了 好久没更新了 再见了!F1的王者 好久没写blog了 MSVC中的"pseudo register"调试技术 - waterflier - 博客园 最近想写的几篇文章 使用感想:VS2005的优点和缺点 Linux下CVS的配置 Linux下iptables的配置 一个困扰我3天的问题解决了 跨平台程序的UNICODE字符串处理方法。 DeadLine is coming 开始在新公司工作. 在深圳安顿下来了~ 毕业了! 断网两天重生纪念! 在Red Hat Linux9.0下安装PVM3 PVM Group失败的原因
跨平台的UTF8GBK转换以及GBKw_chart转换代码,支持std::string
waterflier · 2006-03-06 · via 博客园 - waterflier

关于unicode,各种编码等国际化的技术原理可以参看我blog上的文章。最近的项目里要用到GBK-

>wchar_t,wchar_t->UTF8.所以对这部分功能做了些简单封装。其实对于国际化技术的封装,无非

就是

DBCS <=> wchar_t .

wchar_t <=> 各种unicode编码 比如说UTF8,UTF16等。

这样的转换都是绝对可以成功的。像GBK<->BIG5这种dbcs<=>dbcs的转换就不一定能成功了。

wchar_t作为C++的字符串内部处理用类型,主要原因是各种字符串函数都都有wchar_t作为接口的

版本。方便使用。wchar_t这个东西的长度是由编译器,平台实现决定的,所以请记住,处理

wchar_t的时候,千万不要关心他的长度。如果你的代码对wchar_t的长度特别关心,这个时候你需

要的应该是一种标准的unicode编码

(Linux下的wchar_t长度为4byte,好心痛)

罗嗦这么多,各位看官久等了。上代码。

   class string_util
 {
 public:

#ifndef _UNIX
    //我的程序只需要支持GBK,各位可以在这里加上Linux下的编码名字与windows下的codepage的

对应关系进行扩展。
    static inline int codepage(const char* code_page)
    {
        return 936;//"GBK"
    }
 #endif

     static inline int dbcs2wchar(const char* code_page,/*in*/const char* in,int

in_len,
                                                /*out*/wchar_t* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
         env = iconv_open("WCHAR_T",code_page);
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)

&out_max);
         iconv_close(env);
         return (int) result;
#else
         return ::MultiByteToWideChar(codepage(code_page),0,in,in_len,out,out_max);
#endif
     }

     static inline int dbcs2wchar(const char* code_page,/*in*/const string&

in,/*out*/wstring& out)
     {
         int len = in.length() + 1;
         int result;
         wchar_t* pBuffer = new wchar_t[len];
         memset(pBuffer,0,len*sizeof(wchar_t));
         result = dbcs2wchar(code_page,in.c_str(),in.length(),pBuffer,len*sizeof

(wchar_t));
         if(pBuffer >= 0)
         {
            out = pBuffer;
         }
         else
         {
             out.clear();
         }
         delete[] pBuffer;
         return result;
     }

     static inline int wchar2dbcs(const char* code_page,/*in*/const wchar_t* in,int

in_len,
                                                 /*out*/char* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
         env = iconv_open(code_page,"WCHAR_T");
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)

&out_max);
         iconv_close(env);
         return (int) result;
#else
         BOOL use_def_char;
         use_def_char = FALSE;
         return ::WideCharToMultiByte(codepage(code_page),0,in,in_len/sizeof

(wchar_t),out,out_max,"?",&use_def_char);
#endif  
     }

     static inline int wchar2dbcs(const char* code_page,/*in*/const wstring&

in,/*out*/string& out)
     {
         int len = in.length() + 1;
         int result;
         char* pBuffer = new char[len*3];
         memset(pBuffer,0,len*3);
         result = wchar2dbcs(code_page,in.c_str(),in.length() * sizeof

(wchar_t),pBuffer,len*3);
         if(result >= 0)
         { 
             out = pBuffer;
         }
         else
         {
             out = "";
         }
         delete[] pBuffer;
         return result;
     }

     static inline int wchar2utf8(/*in*/const wchar_t* in,int in_len,
                                               /*out*/char* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
    
         env = iconv_open("UTF8","WCHAR_T");
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)

&out_max);    
         iconv_close(env);
         return (int) result;
#else
         BOOL use_def_char;
         use_def_char = FALSE;
         return ::WideCharToMultiByte(CP_UTF8,0,in,in_len/sizeof

(wchar_t),out,out_max,NULL,NULL);
#endif
     }
   
     static inline int wchar2utf8(/*in*/const wstring& in,/*out*/string& out)
     {
         int len = in.length() + 1;
         int result;
         char* pBuffer = new char[len*3];
         memset(pBuffer,0,len*3);  
    
     
         result = wchar2utf8(in.c_str(),in.length() * sizeof(wchar_t),pBuffer,len*3); 
    
   
         if(result >= 0)
         {
             out = pBuffer;
         }
         else
         {
             out = "";
         }
         delete[] pBuffer;
         return result;
     }
   
     static inline int utf82wchar(/*in*/const char* in,int in_len,
                                                /*out*/wchar_t* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
         env = iconv_open("WCHAR_T","UTF8");
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)

&out_max);
         iconv_close(env);
         return (int) result;
#else
         return ::MultiByteToWideChar(CP_UTF8,0,in,in_len,out,out_max);
#endif
     }

     static inline int utf82wchar(/*in*/const string& in,/*out*/wstring& out)
     {
         int len = in.length() + 1;
         int result;
         //wstring temp;
         wchar_t* pBuffer = new wchar_t[len];
         memset(pBuffer,0,len*sizeof(wchar_t));
         result = utf82wchar(in.c_str(),in.length(),pBuffer,len*sizeof(wchar_t));
         //printf("utf82wchar result is %d,errno is %s\n",result,strerror(errno));
         if(result >= 0)
         {
            out = pBuffer;
         }
         else
         {
            out.clear();     
         }
         delete[] pBuffer;
         return result;
     }
};