使用jsoup解析一篇文章示例
jar包下载地址:http://jsoup.org/download
Utils方法://\\u4E00-\\u9FA5\\uF900-\\uFA2D是指汉字的Unicode编码范围 private static final Pattern REGEX_NUM = Pattern.compile("(^[\\u4E00-\\u9FA5\\uF900-\\uFA2D])(\\d+)([\\u4E00-\\u9FA5\\uF900-\\uFA2D])?"); public static int regexNum(String str){ Matcher matcher = REGEX_NUM.matcher(str); if(matcher.matches()){ return Integer.parseInt(matcher.group(2)); } return 0; }