首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 开发语言 > C++ >

宽字符有关问题

2012-02-22 
宽字符问题#includefstream#includestringvoidmain(){wstringsL 123 wofstreamof( 1.dat )of

宽字符问题
#include   <fstream>
#include   <string>

void   main()
{
wstring   s   =   L "123 ";
wofstream   of( "1.dat ");

of < <s;
of.close();
}

上面代码输出的文件为什么只有3个字节?而不是6个字节?
std对宽字符怎么处理的?

[解决办法]
一年多前,曾经烦恼过这个wofstream和wifstream。
追踪fstream源码,可知这是跟codecvt相关的事情。当使用wofstream的时候,默认的codecvt仅仅是取了一个字节,然后输出到文件中,同样,使用wifstream的时候,每读来一个字节,便插入一个全0的字节(大致如此,具体细节记不太清楚)。因此,你需要做的事情是重新设计一个codecvt来处理输入和输出都是wchar_t的情形。幸运的是,设计vc的STL库的那位老大P.J.曾经写过这样的一个转换程序,见下面:
using namespace std;
typedef codecvt <wchar_t, char, mbstate_t> Mybase;
class Fancier_codecvt : public Mybase {
public:
typedef wchar_t _E;
typedef char _To;
typedef mbstate_t _St;

explicit Fancier_codecvt(size_t _R = 0)
: Mybase(_R) {}
protected:
virtual result do_in(_St& _State,
const _To *_F1, const _To *_L1, const _To *& _Mid1,
_E *_F2, _E *_L2, _E *& _Mid2) const
{_Mid1 = _F1, _Mid2 = _F2;
_St _Mystate = _State;
result _Ans = ok;
for (; ; )
if (_Mid1 == _L1 || _Mid2 == _L2)
break;
else if (_Mid1 + 1 == _L1)
{_Ans = partial;
break; }
else if (_Mystate == 0)
{*_Mid2 = *_Mid1++ & 0xff;
*_Mid2 |= *_Mid1++ < < 8;
if (*_Mid2 == 0xfffe)
{_Ans = partial;
_Mystate = 1;}
else if (*_Mid2 == 0xfeff)
_Ans = partial;
else
{++_Mid2;
_Ans = ok;
break; }}
else
{*_Mid2 = *_Mid1++ < < 8;
*_Mid2 |= *_Mid1++ & 0xff;
if (*_Mid2 == 0xfffe)
{_Ans = partial;
_Mystate = 0;
}
else if (*_Mid2 == 0xfeff)
_Ans = partial;
else
{++_Mid2;
_Ans = ok;
break; }}
if (_Ans == partial)
_Mid1 = _F1, _Mid2 = _F2;
_State = _Mystate;
return (_Ans); }

virtual result do_out(_St& _State,
const _E *_F1, const _E *_L1, const _E *& _Mid1,
_To *_F2, _To *_L2, _To *& _Mid2) const
{_Mid1 = _F1, _Mid2 = _F2;
if (_Mid1 == _L1 || _Mid2 == _L2)
return (ok);
else if (_Mid2 + 1 == _L2)
return (partial);
else if (*_Mid1 == 0xfffe)
{_State = 1;
++_Mid1;
*_Mid2++ = 0xfe;
*_Mid2++ = 0xff;
return (ok); }
else if (*_Mid1 == 0xfeff)
{_State = 0;
++_Mid1;
*_Mid2++ = 0xff;
*_Mid2++ = 0xfe;
return (ok); }
else if (_State == 0)
{*_Mid2++ = (unsigned char)*_Mid1;
*_Mid2++ = (unsigned char)(*_Mid1++ > > 8);
return (ok); }
else
{*_Mid2++ = (unsigned char)(*_Mid1 > > 8);
*_Mid2++ = (unsigned char)*_Mid1++;
return (ok); }}

virtual result do_unshift(_St& _State,
_To *_F2, _To *_L2, _To *& _Mid2) const
{_Mid2=_F2;
return (ok); }

virtual int do_length(_St& _State, const _To *_F1,
const _To *_L1, size_t _N2) const _THROW0()
{return ((int)(_L1 - _F1) / 2); }

virtual bool do_always_noconv() const _THROW0()
{return (false); }

virtual int do_max_length() const _THROW0()
{return (2); }

virtual int do_encoding() const _THROW0()
{return (0); }
};

使用的时候非常简单,采用这种方式:
locale loc(locale::classic(), new Fancier_codecvt );
debugFile.imbue(loc);
debugFile.open( "xxx.txt ",ios::binary );
wchar_t ch=0xfeff;
debugFile < <ch;
记得别忘了输出UTF16的bom

热点排行