正则表达式匹配替换网址
有这么一个需求,
网页里所有的src="/web/inde.jsp"src后面引号里面的网址全部添加为“www.baidu.com/web.inde.jsp”
另外<link >下的href也是同样。。
遇到里面是http: www. https:这样的不能替换
正则式为
s*(<LINK\\s+.+?href=|src=)\\s*['|"]\\s*((?!http:|https:|www\\.).+?)['|"]
关键点为 消除正则的惰性和负向预查
改为
代码如下public String parseTool(String file,String replaceText){Scanner scanner;BufferedWriter rf;String repText=replaceText;String resultTxt="e:/result.txt";//输出文件路径try {//"\\s[?=<LINK\\s+.*\\s+href=|?=src=]+\\s*['|"]\\s*(((?!http:)(?!https:)(?!www\\.)).+?)[\'|"]";scanner = new Scanner(new FileInputStream(file));String reg="\\s*(<LINK\\s+.+?href=|src=)\\s*['|"]\\s*((?!http:|https:|www\\.).+?)['|"]";//String reg="\\s*src=[\'|"]([^www\\.].+?)[\'|"]";//String str;Pattern pattern;Matcher matcher;int status=0;String str0;rf =new BufferedWriter( new FileWriter(resultTxt));while(scanner.hasNextLine()){str=scanner.nextLine();str0=str;pattern = Pattern.compile(reg);matcher = pattern.matcher(str);while (matcher.find()) { status=1; //System.out.println (matcher.group(2)); str=str.replaceAll(matcher.group(2),repText+matcher.group(2));}//while-findif(status==1){ rf.write(str);}else if(status==0){ rf.write(str0);} rf.write("\r\n");//status=0;}//while-hasnext rf.flush();//输出 rf.close(); //关闭输出流} catch (FileNotFoundException e) {e.printStackTrace();return "FileNotFinded.";} catch (IOException e) {e.printStackTrace();return "IOError.";} return "ok.";}public static void main(String []args){String result;result=new mainClass().parseTool("e:/to.txt","www.baidu.com");//result=new mainClass().parseTest("e:/to.txt","www.baidu.com");System.out.println("--"+result);}