惯性聚合 高效追踪和阅读你感兴趣的博客、新闻、科技资讯
阅读原文 在惯性聚合中打开

推荐订阅源

cs.AI updates on arXiv.org
cs.AI updates on arXiv.org
C
CERT Recently Published Vulnerability Notes
C
Cybersecurity and Infrastructure Security Agency CISA
P
Proofpoint News Feed
Security Latest
Security Latest
P
Privacy International News Feed
Threat Intelligence Blog | Flashpoint
Threat Intelligence Blog | Flashpoint
AI
AI
Cisco Talos Blog
Cisco Talos Blog
K
Kaspersky official blog
S
Secure Thoughts
PCI Perspectives
PCI Perspectives
Simon Willison's Weblog
Simon Willison's Weblog
D
DataBreaches.Net
GbyAI
GbyAI
让小产品的独立变现更简单 - ezindie.com
让小产品的独立变现更简单 - ezindie.com
大猫的无限游戏
大猫的无限游戏
T
Tailwind CSS Blog
The Cloudflare Blog
阮一峰的网络日志
阮一峰的网络日志
CTFtime.org: upcoming CTF events
CTFtime.org: upcoming CTF events
罗磊的独立博客
V
Visual Studio Blog
aimingoo的专栏
aimingoo的专栏
H
Hackread – Cybersecurity News, Data Breaches, AI and More
IT之家
IT之家
V
V2EX
Last Week in AI
Last Week in AI
有赞技术团队
有赞技术团队
月光博客
月光博客
酷 壳 – CoolShell
酷 壳 – CoolShell
T
Tenable Blog
T
Threat Research - Cisco Blogs
T
Troy Hunt's Blog
V2EX - 技术
V2EX - 技术
S
Security @ Cisco Blogs
Security Archives - TechRepublic
Security Archives - TechRepublic
Project Zero
Project Zero
The GitHub Blog
The GitHub Blog
Recent Commits to openclaw:main
Recent Commits to openclaw:main
L
Lohrmann on Cybersecurity
F
Full Disclosure
H
Help Net Security
博客园 - Franky
Stack Overflow Blog
Stack Overflow Blog
N
Netflix TechBlog - Medium
Engineering at Meta
Engineering at Meta
A
Arctic Wolf
O
OpenAI News
S
Securelist

博客园 - waterflier

下载测试 Blog很久没更新了 好久没更新了 再见了!F1的王者 好久没写blog了 MSVC中的"pseudo register"调试技术 - waterflier - 博客园 最近想写的几篇文章 使用感想:VS2005的优点和缺点 Linux下CVS的配置 Linux下iptables的配置 一个困扰我3天的问题解决了 跨平台程序的UNICODE字符串处理方法。 DeadLine is coming 开始在新公司工作. 在深圳安顿下来了~ 毕业了! 断网两天重生纪念! 在Red Hat Linux9.0下安装PVM3 PVM Group失败的原因
跨平台的UTF8GBK转换以及GBKw_chart转换代码,支持std::string
waterflier · 2006-03-06 · via 博客园 - waterflier

关于unicode,各种编码等国际化的技术原理可以参看我blog上的文章。最近的项目里要用到GBK-

>wchar_t,wchar_t->UTF8.所以对这部分功能做了些简单封装。其实对于国际化技术的封装,无非

就是

DBCS <=> wchar_t .

wchar_t <=> 各种unicode编码 比如说UTF8,UTF16等。

这样的转换都是绝对可以成功的。像GBK<->BIG5这种dbcs<=>dbcs的转换就不一定能成功了。

wchar_t作为C++的字符串内部处理用类型,主要原因是各种字符串函数都都有wchar_t作为接口的

版本。方便使用。wchar_t这个东西的长度是由编译器,平台实现决定的,所以请记住,处理

wchar_t的时候,千万不要关心他的长度。如果你的代码对wchar_t的长度特别关心,这个时候你需

要的应该是一种标准的unicode编码

(Linux下的wchar_t长度为4byte,好心痛)

罗嗦这么多,各位看官久等了。上代码。

   class string_util
 {
 public:

#ifndef _UNIX
    //我的程序只需要支持GBK,各位可以在这里加上Linux下的编码名字与windows下的codepage的

对应关系进行扩展。
    static inline int codepage(const char* code_page)
    {
        return 936;//"GBK"
    }
 #endif

     static inline int dbcs2wchar(const char* code_page,/*in*/const char* in,int

in_len,
                                                /*out*/wchar_t* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
         env = iconv_open("WCHAR_T",code_page);
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)

&out_max);
         iconv_close(env);
         return (int) result;
#else
         return ::MultiByteToWideChar(codepage(code_page),0,in,in_len,out,out_max);
#endif
     }

     static inline int dbcs2wchar(const char* code_page,/*in*/const string&

in,/*out*/wstring& out)
     {
         int len = in.length() + 1;
         int result;
         wchar_t* pBuffer = new wchar_t[len];
         memset(pBuffer,0,len*sizeof(wchar_t));
         result = dbcs2wchar(code_page,in.c_str(),in.length(),pBuffer,len*sizeof

(wchar_t));
         if(pBuffer >= 0)
         {
            out = pBuffer;
         }
         else
         {
             out.clear();
         }
         delete[] pBuffer;
         return result;
     }

     static inline int wchar2dbcs(const char* code_page,/*in*/const wchar_t* in,int

in_len,
                                                 /*out*/char* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
         env = iconv_open(code_page,"WCHAR_T");
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)

&out_max);
         iconv_close(env);
         return (int) result;
#else
         BOOL use_def_char;
         use_def_char = FALSE;
         return ::WideCharToMultiByte(codepage(code_page),0,in,in_len/sizeof

(wchar_t),out,out_max,"?",&use_def_char);
#endif  
     }

     static inline int wchar2dbcs(const char* code_page,/*in*/const wstring&

in,/*out*/string& out)
     {
         int len = in.length() + 1;
         int result;
         char* pBuffer = new char[len*3];
         memset(pBuffer,0,len*3);
         result = wchar2dbcs(code_page,in.c_str(),in.length() * sizeof

(wchar_t),pBuffer,len*3);
         if(result >= 0)
         { 
             out = pBuffer;
         }
         else
         {
             out = "";
         }
         delete[] pBuffer;
         return result;
     }

     static inline int wchar2utf8(/*in*/const wchar_t* in,int in_len,
                                               /*out*/char* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
    
         env = iconv_open("UTF8","WCHAR_T");
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)

&out_max);    
         iconv_close(env);
         return (int) result;
#else
         BOOL use_def_char;
         use_def_char = FALSE;
         return ::WideCharToMultiByte(CP_UTF8,0,in,in_len/sizeof

(wchar_t),out,out_max,NULL,NULL);
#endif
     }
   
     static inline int wchar2utf8(/*in*/const wstring& in,/*out*/string& out)
     {
         int len = in.length() + 1;
         int result;
         char* pBuffer = new char[len*3];
         memset(pBuffer,0,len*3);  
    
     
         result = wchar2utf8(in.c_str(),in.length() * sizeof(wchar_t),pBuffer,len*3); 
    
   
         if(result >= 0)
         {
             out = pBuffer;
         }
         else
         {
             out = "";
         }
         delete[] pBuffer;
         return result;
     }
   
     static inline int utf82wchar(/*in*/const char* in,int in_len,
                                                /*out*/wchar_t* out,int out_max)
     {
#ifdef _UNIX
         size_t result;
         iconv_t env;
         env = iconv_open("WCHAR_T","UTF8");
         result = iconv(env,(char**)&in,(size_t*)&in_len,(char**)&out,(size_t*)

&out_max);
         iconv_close(env);
         return (int) result;
#else
         return ::MultiByteToWideChar(CP_UTF8,0,in,in_len,out,out_max);
#endif
     }

     static inline int utf82wchar(/*in*/const string& in,/*out*/wstring& out)
     {
         int len = in.length() + 1;
         int result;
         //wstring temp;
         wchar_t* pBuffer = new wchar_t[len];
         memset(pBuffer,0,len*sizeof(wchar_t));
         result = utf82wchar(in.c_str(),in.length(),pBuffer,len*sizeof(wchar_t));
         //printf("utf82wchar result is %d,errno is %s\n",result,strerror(errno));
         if(result >= 0)
         {
            out = pBuffer;
         }
         else
         {
            out.clear();     
         }
         delete[] pBuffer;
         return result;
     }
};