首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 开发语言 > 编程 >

使用httpClient跟httpParser获取指定网址的title

2013-10-24 
使用httpClient和httpParser获取指定网址的titlepackage com.xinhuanet.cloudDesk.controllerimport java

使用httpClient和httpParser获取指定网址的title

package com.xinhuanet.cloudDesk.controller;import java.util.regex.Matcher;import java.util.regex.Pattern;import org.apache.commons.httpclient.HttpClient;import org.apache.commons.httpclient.HttpConnectionManager;import org.apache.commons.httpclient.methods.GetMethod;import org.apache.commons.httpclient.params.HttpConnectionManagerParams;import org.htmlparser.Parser;import org.htmlparser.visitors.HtmlPage;public class R {public static void main(String[] args) throws Exception {HttpClient httpClient = new HttpClient();httpClient.getHostConfiguration().setProxy("202.84.17.41", 8080);HttpConnectionManager httpConnManager = httpClient.getHttpConnectionManager();if (httpConnManager != null) {HttpConnectionManagerParams mgrParams = new HttpConnectionManagerParams();mgrParams.setSoTimeout(20000000);mgrParams.setTcpNoDelay(true);mgrParams.setConnectionTimeout(20000000);mgrParams.setLinger(0);mgrParams.setStaleCheckingEnabled(false);httpConnManager.setParams(mgrParams);}String url = "http://www.poetry4cn.com";GetMethod methodGet = new GetMethod(url);httpClient.executeMethod(methodGet);String charset = getCharSet(new String(methodGet.getResponseBody()));System.out.println("getCharSet:" + charset);String responseGet = new String(methodGet.getResponseBody(), charset);System.out.println(responseGet);Parser myParser = Parser.createParser(responseGet.toString(), charset);HtmlPage visitor = new HtmlPage(myParser);myParser.visitAllNodesWith(visitor);String textInPage = visitor.getTitle();System.out.println("title:" + textInPage);}public static String getCharSet(String content) {// String regex = ".*charset=([^;]*).*";String regex = "<meta.+?charset=[^\\w]?([-\\w]+)";Pattern pattern = Pattern.compile(regex);Matcher matcher = pattern.matcher(content);if (matcher.find())return matcher.group(1);elsereturn null;}}

热点排行