首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > JAVA > J2SE开发 >

j2se 抓取网页下图片

2012-08-29 
j2se 抓取网页上图片[alignleft]package com.lee.testimport java.io.BufferedReaderimport java.io.Fi

j2se 抓取网页上图片

[align=left]package com.lee.test;import java.io.BufferedReader;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.net.MalformedURLException;import java.net.URL;import java.net.URLConnection;import java.util.ArrayList;import java.util.List;/** *  * @author Lee * 本来有很多想完善呢  其他的好像都是功夫问题了   * */public class GetImagesFromWeb {// 默认图片大小至少为1kprivate static long size = 1 ;       // 默认图片存放位置private static File folder = null ; // 默认文件扩展名private static List<String> exts= new ArrayList<String>() ;// 是否从一个网页上抓取private static boolean isSingle = true ;// 文件计数器private static long counter = 0 ;// 文件名前缀private static String fileNameSuffix = "default_filename_suffix_" ;static{String path = "C:\\Documents and Settings\\Administrator\\桌面\\MyImagesFolder" ;folder = new File(path) ;if(!folder.exists()){folder.mkdir() ;}exts.add("jpeg");exts.add("jpg") ;exts.add("gif") ;}private static String getExtName(String url){return url.substring(url.lastIndexOf(".")+1) ;}private static List<String> getImageUrls(String url){URL u = null;boolean flag = false ;try {u = new URL(url);} catch (MalformedURLException e) {System.out.println(url+"  不合法!");flag = true ;}if(flag) return null;List<String> urls = new ArrayList<String>() ;URLConnection connection = null;try {connection = u.openConnection();} catch (IOException e) {System.out.println("网络连接错误!");flag =true ;}if(flag) return null;BufferedReader br = null;try {br = new BufferedReader(new InputStreamReader(connection.getInputStream()));} catch (IOException e) {System.out.println("IO设备错误");flag =true ;}if(flag) return null;String line = null ;try {while((line = br.readLine() ) != null ){while(line.contains("<img")){int imgIndex = line.indexOf("<img") ;int first =  line.indexOf(""", imgIndex) ; if(first == -1 ) continue ;int second = line.indexOf(""", first+1);if(second == -1 ) continue ;String t = line.substring(first+1,second) ;// url 中可能传递参数if(t.indexOf('?') > -1)   t = t.substring(0, t.indexOf('?')) ;urls.add(t) ;line = line.substring(second) ;}}} catch (IOException e) {System.out.println("流读写错误");flag = true ;}return urls ;}public static void getImagesFromSinglePage(String url) {URL u =  null;;InputStream is = null ;FileOutputStream fos = null ;List<String> urls = getImageUrls(url) ;if(urls.size() < 1 ) return ;boolean flag = false ;for(String ur : urls ){flag = false ;try {u = new URL(ur) ;} catch (MalformedURLException e) {System.out.println(ur+ "不合法!");flag = true ;}if(flag) continue ;URLConnection connection = null;try {connection = u.openConnection();} catch (IOException e) {System.out.println("IO 错误!");flag =true ;}if(flag) continue ;    try {is = connection.getInputStream() ;} catch (IOException e) {System.out.println("IO 错误!");flag = true ;}if(flag) continue ;File file = new File(folder,fileNameSuffix+( counter++)+"."+getExtName(ur)) ;if(!file.exists())try {file.createNewFile() ;} catch (IOException e) {System.out.println("建立文件"+file.getAbsolutePath()+" 失败!");flag = true ;}if(flag) continue ;    try {fos = new FileOutputStream(file) ;} catch (FileNotFoundException e) {System.out.println("文件 "+file.getAbsolutePath()+"不存在!");flag =true ;}if(flag) continue ;byte[] b = new byte[1024] ;int len = 0 ;try {while((len = is.read(b, 0, 1024)) > 0 ){fos.write(b, 0, len) ;}fos.flush() ;} catch (IOException e) {System.out.println("IO错误!");}System.out.println(file.getName()+" 获取成功!");}try {if(fos != null ){fos.close() ;fos =null ;}if(is != null ){is.close() ;is =null ;}} catch (IOException e) {e.printStackTrace();}}public static void batchGetImages(String urlFirst,String urlLast,int beginIndex , int endIndex){for(int i = beginIndex ; i <= endIndex ;  i++ ){getImagesFromSinglePage(urlFirst+i+urlLast) ;}}public static void main(String[] args) {// 获取一个网页的所有图片//getImagesFromSinglePage("http://www.qiushibaike.com/new2/pic/20/page/6/") ;// 获取地址有数字特征规律的所有网页的图片// 以网址 http://www.qiushibaike.com/new2/pic/20/page/350/ 为例// 数字350 代表第350页  如果是1则代表的是第一页// 下面获取第一页到第三十页的所有图片// 大概获取600 张图片batchGetImages("http://www.qiushibaike.com/new2/pic/20/page/","/", 1, 30) ;}}[/align]

热点排行