【获取网页源码的方法】解决大家的问题同时提出一个问题
网上的很多的获取网页源代码的方法都要求已知编码方式,如果方式未知呢?
采用我的编码可以解决问题
Function GetByDiv2(ByVal code As String, ByVal divBegin As String, ByVal divEnd As String) '获取分隔符所夹的内容[完成,未测试] '仅用于获取编码数据 Dim lgStart As Integer Dim lens As Integer Dim lgEnd As Integer lens = Len(divBegin) If InStr(1, code, divBegin) = 0 Then GetByDiv2 = "" : Exit Function lgStart = InStr(1, code, divBegin) + CInt(lens) lgEnd = InStr(lgStart, code, divEnd) If lgEnd = 0 Then GetByDiv2 = "" : Exit Function GetByDiv2 = Mid(code, lgStart, lgEnd - lgStart) End Function Public Function getHtmlStr(ByVal strURL As String) As String '获取源码 '2012-08-14 deal with gb2312 and utf-8 'On Error Resume Next Dim codeStr As String = PreGetHtml(strURL, "UTF-8") Dim CodeSet As String = UCase(Replace(GetByDiv2(codeStr, "charset=", """"), """", "")) If CodeSet = "" Then CodeSet = "UTF-8" getHtmlStr = PreGetHtml(strURL, CodeSet) End Function Function PreGetHtml(ByVal strURL As String, Optional ByVal codeType As String = "") '2012-08-14 deal with gb2312 and utf-8 On Error Resume Next Dim httpReq As System.Net.HttpWebRequest Dim httpResp As System.Net.HttpWebResponse Dim httpURL As New System.Uri(strURL) Dim sTime As Date = CDate("1990-09-21") httpReq = CType(WebRequest.Create(httpURL), HttpWebRequest) httpReq.Method = "GET" 'httpReq.Headers.Add("If-Modified-Since", "0") httpReq.IfModifiedSince = sTime httpResp = CType(httpReq.GetResponse(), HttpWebResponse) PreGetHtml = "" Dim reader As StreamReader = New StreamReader(httpResp.GetResponseStream, System.Text.Encoding.GetEncoding(codeType)) PreGetHtml = reader.ReadToEnd reader.Close() httpResp.Close() End Function