HttpClient模拟登录新浪微博求助!!!!!!!!!!
以下是参考网上找到的代码修改后的源码,可是登录不成功,望指教或探讨!!!
QQ:940154985
package cn.vangdo.search;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
public class SinaLogin {
private final static HttpClient client = new DefaultHttpClient();
/**
* 抓取网页
*
* @param url
* @throws IOException
*/
static String get(String url) throws IOException {
HttpGet get = new HttpGet(url);
HttpResponse response = client.execute(get);
System.out.println(response.getStatusLine());
HttpEntity entity = response.getEntity();
String result = dump(entity);
get.abort();
return result;
}
/**
* 执行登录过程
*
* @param user
* @param pwd
* @param debug
* @throws IOException
*/
static void login(String user, String pwd) throws IOException {
HttpPost post = new HttpPost(
"http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.3.17)");
post.setHeader("User-Agent",
"Mozilla/5.0 (X11; Linux i686; rv:5.0) Gecko/20100101 Firefox/5.0");
post.setHeader("Referer", "http://weibo.com/");
post.setHeader("Content-Type", "application/x-www-form-urlencoded"); // 登录表单的信息
List<NameValuePair> qparams = new ArrayList<NameValuePair>();
qparams.add(new BasicNameValuePair("entry", "account"));
qparams.add(new BasicNameValuePair("gateway", "1"));
qparams.add(new BasicNameValuePair("from", ""));
qparams.add(new BasicNameValuePair("savestate", "0"));
qparams.add(new BasicNameValuePair("useticket", "0"));
qparams.add(new BasicNameValuePair("ssosimplelogin", "1"));
qparams.add(new BasicNameValuePair("service", "account"));
qparams.add(new BasicNameValuePair("servertime", "1323790225"));
qparams.add(new BasicNameValuePair("nonce", "FUD0TL"));
// servertime=1309164392
// nonce=PJZCHM
qparams.add(new BasicNameValuePair("pwencode", "wsse"));
qparams.add(new BasicNameValuePair("encoding", "UTF-8"));
qparams.add(new BasicNameValuePair(
"url",
"http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack"));
qparams.add(new BasicNameValuePair("returntype", "IFRAME"));
qparams.add(new BasicNameValuePair("setdomain", "1"));
qparams.add(new BasicNameValuePair("username", user));
qparams.add(new BasicNameValuePair("password", pwd));
UrlEncodedFormEntity params = new UrlEncodedFormEntity(qparams, "UTF-8");
post.setEntity(params); // Execute the request
HttpResponse response = client.execute(post);
post.abort();
// 新浪微博登录没有301,302之类的跳转;而是返回200,然后用javascript实现的跳转
// int statusCode = response.getStatusLine().getStatusCode();
// if ((statusCode == HttpStatus.SC_MOVED_PERMANENTLY)
// || (statusCode == HttpStatus.SC_MOVED_TEMPORARILY)
// || (statusCode == HttpStatus.SC_SEE_OTHER)
// || (statusCode == HttpStatus.SC_TEMPORARY_REDIRECT)) {
// // 此处重定向处理 此处还未验证
// String newUri = response.getLastHeader("Location").getValue();
// get(newUri);
// } // Get hold of the response entity
HttpEntity entity = response.getEntity();
// 取出跳转的url
// location.replace("http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack&ticket=ST-MTkxODMxOTI0Nw==-1309224549-xd-263902F174B27BAB9699691BA866EFF2&retcode=0");
String location = getRedirectLocation(dump(entity));
System.out.println("entity:"+dump(entity));
System.out.println("location:"+location);
//get(location);
}
private static String getRedirectLocation(String content) {
String regex = "location\\.replace\\("(.*?)"\\)";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(content);
String location = null;
if (matcher.find()) {
location = matcher.group(1);
} return location;
}
/**
* 打印页面
*
* @param entity
* @throws IOException
*/
private static String dump(HttpEntity entity) throws IOException {
//System.out.println("content11:"+entity.getContent());
BufferedReader br = new BufferedReader(new InputStreamReader(entity.getContent(),"utf-8"));
File fl = new File("D:\\1.txt");//写入到本地的文件名称
FileOutputStream fos=new FileOutputStream(fl);
int data = br.read();
while(data!=-1) {
fos.write(data);
data=br.read();
}
fos.close();
System.out.println("content:"+data);
//return IOUtils.toString(br);
return "aa";
}
public static void main(String[] args) throws IOException {
//System.out.println("begin999");
login("username", "password");
String result = get("http://t.sina.com.cn/pub/tags");
System.out.println(result);
}
}
[最优解释]
我在研究爬虫程序时,也有需要爬去新浪微博的模块。同样遇到的类似的问题。以下为我目前登录的方式。结果是能够登录成功和爬取内容的。
HttpClient client = new HttpClient();
client.getHttpConnectionManager().getParams().setConnectionTimeout(5000);
PostMethod post = new PostMethod("http://login.sina.com.cn/sso/login.php?
client=ssologin.js(v1.3.16)");
String data = getServerTime();
String nonce = makeNonce(6);
NameValuePair[] nvps = new NameValuePair[] {
new NameValuePair("entry", "weibo"),
new NameValuePair("gateway", "1"),
new NameValuePair("from", ""),
new NameValuePair("savestate", "7"),
new NameValuePair("useticket", "1"),
new NameValuePair("ssosimplelogin", "1"),
new NameValuePair("vsnf", "1"),
new NameValuePair("vsnval", ""),
new NameValuePair("su", encodeAccount(微博用户名)),
new NameValuePair("service", "miniblog"),
new NameValuePair("servertime", data),
new NameValuePair("nonce", nonce),
new NameValuePair("pwencode", "wsse"),
new NameValuePair("sp", new SinaSSOEncoder().encode(微博密码, data, nonce)),
new NameValuePair("encoding", "UTF-8"),
new NameValuePair("returntype", "META"),
new NameValuePair(
"url",
"http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack") };
post.setRequestBody(nvps);
client.executeMethod(post);
// 此处是测试用的。获取到【正在登录页面跳转的url,此url为实际登录获取cookie的url】
String url = post.getResponseBodyAsString().substring(post.getResponseBodyAsString().indexOf("http://weibo.com/ajaxlogin.php?"),post.getResponseBodyAsString().indexOf("code=0")+6);
// 获取到实际url进行连接
GetMethod getMethod = new GetMethod(url);
client.executeMethod(getMethod);
GetMethod weiBoMethod = new GetMethod("http://需要登录才能访问的微博地址");
// 实际访问微博
status = client.executeMethod(getMethod);
at java.io.BufferedReader.read(Unknown Source)
at cn.vangdo.search.Sian.dump(Sian.java:129)
at cn.vangdo.search.Sian.login(Sian.java:96)
at cn.vangdo.search.Sian.main(Sian.java:142)
[其他解释]
最直接还是抓包,根据实际成功登录情况来模拟
需要加哪些head field,分别是什么值,加密方式等等
你上面returntype是不是应该加META值呢,服务端没法获取相关信息了
[其他解释]
= 128 << (24 - r % 32);
A[((r + 64 >> 9) << 4) + 15] = r;
int[] B = new int[80];
int z = 1732584193;
int y = -271733879;
int v = -1732584194;
int u = 271733878;
int s = -1009589776;
for (int o = 0; o < A.length; o += 16) {
int q = z;
int p = y;
int n = v;
int m = u;
int k = s;
for (int l = 0; l < 80; l++) {
if (l < 16) {
B[l] = A[o + l];
} else {
B[l] = d(B[l - 3] ^ B[l - 8] ^ B[l - 14] ^ B[l - 16], 1);
}
int C = e(e(d(z, 5), a(l, y, v, u)), e(e(s, B[l]), c(l)));
s = u;
u = v;
v = d(y, 30);
y = z;
z = C;
}
z = e(z, q);
y = e(y, p);
v = e(v, n);
u = e(u, m);
s = e(s, k);
}
return new int[] { z, y, v, u, s };
}
private int a(int k, int j, int m, int l) {
if (k < 20) {
return (j & m)
[其他解释]
上面的回复中,最后一行写错了。。抱歉。
应该是status = client.executeMethod(weiBoMethod);
另附上:新浪加密算法的代码
private static String encodeAccount(String account) {
return Base64.encode(URLEncoder.encode(account).getBytes());
}
private static String makeNonce(int len) {
String x = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
String str = "";
for (int i = 0; i < len; i++) {
str += x.charAt((int) (Math.ceil(Math.random() * 1000000) % x
.length()));
}
return str;
}
private static String getServerTime() {
long servertime = new Date().getTime() / 1000;
return String.valueOf(servertime);
}
public class SinaSSOEncoder {
private boolean i = false;
private int g = 8;
public SinaSSOEncoder() {
}
public String encode(String psw, String servertime, String nonce) {
String password;
password = hex_sha1("" + hex_sha1(hex_sha1(psw)) + servertime + nonce);
return password;
}
private String hex_sha1(String j) {
return h(b(f(j, j.length() * g), j.length() * g));
}
private String h(int[] l) {
String k = i ? "0123456789ABCDEF" : "0123456789abcdef";
String m = "";
for (int j = 0; j < l.length * 4; j++) {
m += k.charAt((l[j >> 2] >> ((3 - j % 4) * 8 + 4)) & 15) + ""
+ k.charAt((l[j >> 2] >> ((3 - j % 4) * 8)) & 15);
}
return m;
}
private int[] b(int[] A, int r) {
A[r >> 5]
[其他解释]
(j & l)
[其他解释]
((~j) & l);
}
;
if (k < 40) {
return j ^ m ^ l;
}
;
if (k < 60) {
return (j & m)
[其他解释]
(l & 65535);
}
private int d(int j, int k) {
return (j << k)
[其他解释]
(m & l);
}
;
return j ^ m ^ l;
}
private int c(int j) {
return (j < 20) ? 1518500249 : (j < 40) ? 1859775393
: (j < 60) ? -1894007588 : -899497514;
}
private int e(int j, int m) {
int l = (j & 65535) + (m & 65535);
int k = (j >> 16) + (m >> 16) + (l >> 16);
return (k << 16)
[其他解释]
= (m.charAt(k / g) & j) << (24 - k % 32);
}
return l;
}
}
QQ请教一下?我的QQ:940154985
[其他解释]
Content has been consumed是内容已经注销了。你再去拿数据是拿不到的
[其他解释]
有点繁琐啊
[其他解释]
很好!搞定!
[其他解释]
我也很想知道19楼大牛是怎么知道加密算法的!!!
[其他解释]
关键就是新浪加密算法啊。
19楼(afengsakura) 我挺好奇你怎么知道算法的。
牛逼破解分析 Or 新浪老员工 Or 认识新浪开发人员 ?