首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > .NET > C# >

这个网站如何不让抓取呢

2013-08-09 
这个网站怎么不让抓取呢?http://read.10086.cn/booklist?nodeId0&fee0&order1&bookListType1&view2&p

这个网站怎么不让抓取呢?
http://read.10086.cn/booklist?nodeId=0&fee=0&order=1&bookListType=1&view=2&page=2


我抓取的时候老报,“远程服务器返回错误: (500) 内部服务器错误。”

你们能帮我测试下吗?我的代码:

 HttpWebResponse res;
            string charSet = "";
            try
            {
                WebClient myWebClient = new WebClient();//创建WebClient实例myWebClient 
                myWebClient.Credentials = CredentialCache.DefaultNetworkCredentials;
                byte[] myDataBuffer = myWebClient.DownloadData(url);
                string strWebData = Encoding.Default.GetString(myDataBuffer);

                //获取网页字符编码描述信息 
                Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
                string webCharSet = charSetMatch.Groups[2].Value.Replace(""", "");
                if (charSet == null || charSet == "")
                    charSet = webCharSet;

                if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
                    strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
                return strWebData;
            }
            catch (WebException ex)
            {


                res = (HttpWebResponse)ex.Response;
               
            }
            
            StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.Default);
            return sr.ReadToEnd();

可以的啊
[解决办法]
主要看那个Helper的方法就行了 如果你是.NET 4.5版本的  删掉这个引用这个网站如何不让抓取呢
[解决办法]
引用:
能把那段代码单独贴出来吗?好像还是不行。
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;

namespace App
{
    class HttpHelper
    {
        private static readonly string DefaultUserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.8.1.16) Gecko/20080702 Firefox/2.0.0.16";
        /// <summary>         
        /// 创建POST方式的HTTP请求          
        /// </summary>         
        /// <param name="url">请求的URL</param>          
        /// <param name="parameters">随同请求POST的参数名称及参数值字典</param>          


        /// <param name="userAgent">请求的客户端浏览器信息,可以为空</param>         
        /// <param name="requestEncoding">发送HTTP请求时所用的编码</param>         
        /// <param name="cookies">随同HTTP请求发送的Cookie信息,如果不需要身份验证可以为空</param>    
        /// <returns></returns>         
        public static Task<WebResponse> CreatePostHttpResponse(string url, IDictionary<string, string> parameters, string userAgent, Encoding requestEncoding, CookieContainer cookieContainer)
        {
            if (string.IsNullOrEmpty(url))
            {
                throw new ArgumentNullException("url");
            }
            if (requestEncoding == null)
            {
                throw new ArgumentNullException("requestEncoding");
            }
            HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;
            request.Method = "POST";
            request.ContentType = "application/x-www-form-urlencoded";
            if (!string.IsNullOrEmpty(userAgent))
            {
                request.UserAgent = userAgent;
            }
            else
            {
                request.UserAgent = DefaultUserAgent;


            }
            if (cookieContainer == null)
            {
                request.CookieContainer = new CookieContainer();
            }
            else
            {
                request.CookieContainer = cookieContainer;
            }
            //如果需要POST数据             
            if (!(parameters == null 
[解决办法]
 parameters.Count == 0))
            {
                StringBuilder buffer = new StringBuilder();
                int i = 0;
                foreach (string key in parameters.Keys)
                {
                    if (i > 0)
                    {
                        buffer.AppendFormat("&{0}={1}", key, parameters[key]);
                    }
                    else
                    {
                        buffer.AppendFormat("{0}={1}", key, parameters[key]);


                    }
                    i++;
                }
                byte[] data = requestEncoding.GetBytes(buffer.ToString());
                var task = Task.Factory.FromAsync<Stream>(request.BeginGetRequestStream, request.EndGetRequestStream, request, TaskCreationOptions.None);               //等待任务完成               
                task.Wait();                //执行完本任务后再连续执行写入留和返回response对象           '
                using (Stream stream = task.Result)//如果上面没有等待任务完成那一句,在这里直接获取结果也是可以的           
                {
                    stream.Write(data, 0, data.Length);
                }
            }
            return Task.Factory.FromAsync<WebResponse>(request.BeginGetResponse, request.EndGetResponse, request, TaskCreationOptions.None);
        }

        /// <summary>          
        /// 创建GET方式的HTTP请求        
        /// </summary>        
        /// <param name="url">请求的URL</param>          
        /// <param name="timeout">请求的超时时间</param>         
        /// <param name="userAgent">请求的客户端浏览器信息,可以为空</param>       


        /// <param name="cookies">随同HTTP请求发送的Cookie信息,如果不需要身份验证可以为空</param>         
        /// <returns></returns>        
        public static Task<WebResponse> CreateGetHttpResponse(string url, string userAgent, CookieContainer cookieContainer)
        {
            if (string.IsNullOrEmpty(url))
            {
                throw new ArgumentNullException("url");
            }
            HttpWebRequest request = WebRequest.Create(new Uri(url)) as HttpWebRequest;
            request.Method = "GET";
            request.UserAgent = DefaultUserAgent;
            if (!string.IsNullOrEmpty(userAgent))
            {
                request.UserAgent = userAgent;
            }
            if (cookieContainer == null)
            {
                request.CookieContainer = new CookieContainer();
            }
            else
            {
                request.CookieContainer = cookieContainer;
            }
            return Task.Factory.FromAsync<WebResponse>(request.BeginGetResponse, request.EndGetResponse, request, TaskCreationOptions.None);
        }
    }
}

热点排行