首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 开发语言 > VC/MFC >

CHttpFile读取GBK网页时乱码,该怎么处理

2012-01-22 
CHttpFile读取GBK网页时乱码在VS2008下,项目字符集是Unicode,CHttpFile读取GBK网页时乱码. 我查看字符串是

CHttpFile读取GBK网页时乱码
在VS2008下,项目字符集是Unicode,CHttpFile读取GBK网页时乱码. 我查看字符串是用的断点,在运行时看字符串值的。
读取gb2312网页strHtml直接就是中文。读取UTF-8网页时,用ConvertUTF8toGB2312可以转成中文。
读取gbk,如sohu网页时就乱码了。我仿ConvertUTF8toGB2312写了个ConvertUTF8toGBK没效果,还是乱码。

C/C++ code
#include <stdafx.h>#include <afxinet.h>//将UTF8字符串转换为gb2312    CString ConvertUTF8toGB2312(const char *pData, size_t size){    size_t n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, NULL, 0);    WCHAR   *   pChar   =   new   WCHAR[n+1];    n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, pChar, n);    pChar[n]=0;    n = WideCharToMultiByte(936, 0, pChar, -1, 0, 0, 0, 0);    char *p = new char[n+1];    n = WideCharToMultiByte(936, 0, pChar, -1, p, (int)n, 0, 0);    CString result(p);    delete []pChar;    delete []p;    return result;} CString ConvertUTF8toGBK(const char *pData, size_t size){    size_t n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, NULL, 0);    WCHAR   *   pChar   =   new   WCHAR[n+1];    n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, pChar, n);    pChar[n]=0;    n = WideCharToMultiByte(20936, 0, pChar, -1, 0, 0, 0, 0);    char *p = new char[n+1];    n = WideCharToMultiByte(20936, 0, pChar, -1, p, (int)n, 0, 0);    CString result(p);    delete []pChar;    delete []p;    return result;} CString GetPageHtml(CString strUrl) {    CString strHtml = _T("");//获取HTML    CInternetSession session(L"HttpClient");    //CString strUrl = L"http://www.yahoo.com.cn";    CHttpFile* pFile = (CHttpFile*)session.OpenURL((LPCTSTR)strUrl);    DWORD dwStatusCode;       pFile -> QueryInfoStatusCode(dwStatusCode);       if(dwStatusCode == HTTP_STATUS_OK)       {           char sRecived[1024];        while(pFile->Read((LPTSTR)sRecived, 1024) != NULL)        {            strHtml += sRecived;        }        //if(strHtml.Find(L"utf-8") > -1)        {            USES_CONVERSION;            char *pChar = W2A(strHtml.GetBuffer());            CString tt= UTF82Ansi(pChar);                        [color=#FF0000]//CString strTest = ConvertUTF8toGBK(pChar,strlen(pChar));[/color]            CString strTemp = ConvertUTF8toGB2312(pChar,strlen(pChar));            strHtml = strTemp;        }    }    pFile -> Close();       delete pFile;     session.Close();    return strHtml;}int _tmain(int argc, _TCHAR* argv[]){    CString x = GetPageHtml("http://tv.sohu.com/20111017/n322381354.shtml");    return   0 ;}


[解决办法]
那个20936是怎么得到的,直接使用CP_ACP试试。

热点排行