视频地址挖掘抓取
年前做的视频地址挖掘工具,不知道现在关键标签还对不对,放上来算是个温习吧
代码又臭又长,现在看了很汗颜...,本人菜鸟,轻拍砖
//得到视频标题 public String getVideoTitle(String beginTitleStr,int beginTextNum,String endTitleStr){int beginTitleNum = videoStr.indexOf(beginTitleStr,beginTextNum)+beginTitleStr.length();int endTitleNum = videoStr.indexOf(endTitleStr,beginTitleNum);String videoTitle = new ToolsSubString().mySubString(videoStr,beginTitleNum,endTitleNum);return videoTitle; }//得到视频地址ID public String getVideoId(String beginIdStr,int beginTextNum,String endIdStr){int beginIdNum = videoStr.indexOf(beginIdStr,beginTextNum)+beginIdStr.length();int endIdNum = videoStr.indexOf(endIdStr,beginIdNum);String videoId = new ToolsSubString().mySubString(videoStr,beginIdNum,endIdNum);return videoId; } //抓取土豆网视频信息的方法 public void catchTudouVideo(int neadCatchNum,String beginSearchVideoNumStr){ //抓取内容定位String endSearchVideoNumStr = "</em>";String beginTextStr = "<a class="inner" target="new";String beginIdStr = "href="http://www.tudou.com/programs/view/";String endIdStr = "/"";String beginTitleStr = "title="";String endTitleStr = """;int beginTextNum = 0;//得到搜索到视频个数,循环得到视频信息int searchVideoNum = getVideoNum(neadCatchNum,beginSearchVideoNumStr, endSearchVideoNumStr);for(int i=0;i<searchVideoNum;i++){//内容定位beginTextNum = videoStr.indexOf(beginTextStr,beginTextNum)+beginTextStr.length();//得到视频信息String videoTitle = getVideoTitle(beginTitleStr, beginTextNum, endTitleStr);String videoId = getVideoId(beginIdStr, beginTextNum, endIdStr);String videoUrl = "http://www.tudou.com/v/"+videoId+"/v.swf";//创建video对象保存视频信息,并添加到video集合,为存入数据库做准备VideoEntity video = new VideoEntity(videoTitle,"土豆网",videoUrl);videoList.add(video);} }