Help 网页抓取源码----神奇网址求抓
抓取不到源码的网址
http://www1.macys.com/catalog/product/index.ognc?ID=596761
用HttpWebRequest死活抓不到源码,报重定向太多。监视了下cookie。加了一堆还是没解决,求教有人能抓的到么。
-------------------------
ps:同样的产品页:比如 http://www1.macys.com/catalog/product/index.ognc?ID=603770 抓取就没问题。一样的代码抓取上面的网址就不行。网上能搜到的代码一一试验了下,均不行。没一个能抓到上面网址源码
-------------------------
测试方法代码:
private static string getContent(string Url) { string content = ""; try { HttpWebRequest wreq = (HttpWebRequest)WebRequest.Create(Url); wreq.MaximumAutomaticRedirections = 4; wreq.MaximumResponseHeadersLength = 4; //wreq.Credentials = System.Net.CredentialCache.DefaultCredentials; //wreq.Referer = "http://www.macys.com"; //wreq.Headers.Add(HttpRequestHeader.Cookie, "macys_online=4416704358; shippingCountry=US; currency=USD;"); wreq.Method = "Get"; wreq.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"; wreq.UserAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30"; CookieContainer cookieCon = new CookieContainer(); //CookieCollection cc = new CookieCollection(); //cc.Add(new System.Net.Cookie("currency", "USD", "/", "macys.com")); //cc.Add(new System.Net.Cookie("PPP", "24", "/", "macys.com")); //cc.Add(new System.Net.Cookie("SignedIn", "0", "/", "macys.com")); //cc.Add(new System.Net.Cookie("shippingCountry", "US", "/", "macys.com")); //cookieCon.Add(cc); wreq.CookieContainer = cookieCon; HttpWebResponse wresp = (HttpWebResponse)wreq.GetResponse(); StreamReader sr = new StreamReader(wresp.GetResponseStream()); content = sr.ReadToEnd(); } catch (Exception ex) { content = ex.Message; } return content; }