从字符串中提取超链接地址
现有如下文本(只列出一小部分):
function checksq()
{
if (document.sq.user.value.length <1)
{alert( "请输入用户信息! ");
return false;}
return true;}
function check()
{
if (document.se.text.value.length <1)
{alert( "请输入关键字! ");
return false;}
return true;
}
//-->
<!--//
function check0()
{
if ((document.jstj.sm.value.length <1) && (document.jstj.zz.value.length <1) && (document.jstj.cbs.value.length <1) && (document.jstj.cbrq.value.length <1) )
{
alert( "请输入检索信息! ");
return false;
}
return true;
<img src= "images/folder.gif "> <span class= "p6 "> <font color= "#669900 "> yinqsj </font> </span> <br>
<img src= "images/yd.gif " width= "11 " height= "14 "> <a href=rcount.asp?ssnum=1002> yued </a>
<img src= "images/down.gif " width= "11 " height= "14 "> <a href=dcount.asp?ssnum=1002> xiaz </a>
<img src= "images/pl.gif " width= "11 " height= "14 "> <a href=guestbook/index.asp?ssnum=1002&bookname=yinqsj发表评论 </a>
<img src= "images/sq.gif " width= "11 " height= "14 "> <a href=sq/add.asp?ssnum=1002&bookname=yinqsj > 添加个人书签 </a> </font>
</span> </td>
<img src= "images/down.gif " width= "11 " height= "14 "> <a href=dcount.asp?ssnum=2745> xiaz </a>
<img src= "images/folder.gif "> <span class= "p6 "> <font color= "#669900 "> yinqsj </font> </span> <br>
<img src= "images/down.gif " width= "11 " height= "14 "> <a href=dcount.asp?ssnum=1279> xiaz </a>
.......
现要提取出所有xiaz所在行的 "dcount.asp?ssnum=XXXX ", 如何修改如下处理代码才能达到目的:
<html>
<head>
<title> 从字符串中提取超链接地址(by howwa@sina.com) </> </title>
<META http-equiv=Content-Type content= "text/html; charset=gb2312 ">
<script language=javascript>
// <!--
function getHrefs(str) {
var links,tmp,pos;
links= " ";
str=str.replace(/\r|\n/g, " "); //剔除字符串中的回车换行
while (true) {
pos=str.search(/ <a .*href\s*=\s*/i); //找 <a XXX href=XXX形式的字符串(避免分析link href=xxx样子的字符串)
if (pos <0) break;
str=str.substr(pos+2); //将 <a 及其以前的部分去掉
pos=str.search(/ href\s*=\s*/i); //找 href=XXX形式的字符串
if (pos <0) break;
str=str.substr(pos); //将href以前的部分去掉
pos=str.search(/ <\/a> /i); //找 </a>
if (pos <0) break;
tmp=str.substring(0,pos+4); //截取包含链接的字符串
tmp=getHref(tmp); //参数tmp的形式:href=aaaa xxx> yyyy </a>
if (tmp) {links += tmp+ "\n\r ";}
str=str.substr(pos+4); //将处理过的部分( </a> 及其以前的部分)去掉
}
return links;
}
function getHref(str) {
var strhref,pos,char0,r,noHref;
if (str==null|str== " ") {return " ";}
r=/href\s*=\s*([^> ]+)> .+ <\/a> /i;
noHref=/mailto:|#|javascript:|vbscript:|file:/i;
str=str.replace(r, "$1 "); //取得 href=xxx 的xxx部分
if ( str.search(noHref)> =0 ) {return " ";} //不处理此类链接
str=str.replace(/(^\s+)|(\s+$)/g, " "); //去除首尾空格
char0=str.charAt(0);
strhref= " ";
if (char0== " ' " | char0== "\ " ") { //有引号时,取引号内的部分
pos=str.indexOf(char0,1);
if (pos> 0) {strhref=str.substring(1,pos);}
} else { //没有引号时,分两种情况
pos=str.indexOf( " ",1);
if (pos> 0) //href=xxx后有空格,说明还有其他属性,取空格以前的部分
{strhref=str.substring(0,pos);}
else //href=xxx后没有空格,取全部,即是href=aaaa> 的形式
{strhref=str;}
}
var wr=/^ '|^ "|^\||^\\|^ <|^> |^&/;
if ( strhref.search(wr)> =0 ) {return " ";}
return strhref;
}
//-->
</script>
</head>
<body>
<h3> 从字符串中提取超链接地址 </h3>
<p>
作者:howwa(howwa@sina.com) <br>
时间:2007-7-1 <br>
blog: <a target=_blank href=http://blog.sina.com.cn/u/1495389370> http://blog.sina.com.cn/u/1495389370 </a> <br>
</p>
请在这里提供文本: <br>
<textarea cols=80 rows=10 name=mytext>
<p>
作者:howwa(howwa@sina.com) <br>
时间:2007-7-1 <br>
blog: <a target=_blank href=http://blog.sina.com.cn/u/1495389370> http://blog.sina.com.cn/u/1495389370 </a> <br>
</p>
<link href=scs.css type=text/css>
ns <a name=e2d> ss1234b </a> sd
nmm <a target=_blank href= 'ftp://abcd.com.cn ' onmouseover= "av1() "> naar <img src= "aaa.gif "> eeriera </a> ll <a onclick= "inputme() " href=b1234a target=_blank> aas </a>
oo <a href=/mailnet onclick= "a11234() "> opp </a> pqkjds
aa <a href=#fgs> aan </a> mm;sasa
<a name=ee/> qaaaad </a>
</textarea>
<br>
<input type=button value= "OK,Please click me! " onclick= "document.all.mylinks.value=getHrefs(document.all.mytext.value) ">
<br> <br> 这里是分析出来的链接地址: <br>
<textarea cols=80 rows=10 name=mylinks>
</textarea>
</body>
</html>
[解决办法]
try
<html>
<head>
<title> 从字符串中提取超链接地址(by howwa@sina.com) </> </title>
<META http-equiv=Content-Type content= "text/html; charset=gb2312 ">
<script language=javascript>
// <!--
function getHrefs(str) {
var links,tmp,pos;
links= " ";
str=str.replace(/\r|\n/g, " "); //剔除字符串中的回车换行
while (true) {
pos=str.search(/ <a .*href\s*=\s*/i); //找 <a XXX href=XXX形式的字符串(避免分析link href=xxx样子的字符串)
if (pos <0) break;
str=str.substr(pos+2); //将 <a 及其以前的部分去掉
pos=str.search(/ href\s*=\s*/i); //找 href=XXX形式的字符串
if (pos <0) break;
str=str.substr(pos); //将href以前的部分去掉
pos=str.search(/ <\/a> /i); //找 </a>
if (pos <0) break;
tmp=str.substring(0,pos+4); //截取包含链接的字符串
tmp=getHref(tmp); //参数tmp的形式:href=aaaa xxx> yyyy </a>
if (tmp) {links += tmp+ "\n\r ";}
str=str.substr(pos+4); //将处理过的部分( </a> 及其以前的部分)去掉
}
return links;
}
function getHref(str) {
var strhref,pos,char0,r,noHref,text;
if (str==null|str== " ") {return " ";}
r=/href\s*=\s*([^> ]+)> (.+) <\/a> /i;
r.test(str);
if(RegExp.$2 == "xiaz ")
{
noHref=/mailto:|#|javascript:|vbscript:|file:/i;
str=str.replace(r, "$1 "); //取得 href=xxx 的xxx部分
if ( str.search(noHref)> =0 ) {return " ";} //不处理此类链接
str=str.replace(/(^\s+)|(\s+$)/g, " "); //去除首尾空格
char0=str.charAt(0);
strhref= " ";
if (char0== " ' " | char0== "\ " ") { //有引号时,取引号内的部分
pos=str.indexOf(char0,1);
if (pos> 0) {strhref=str.substring(1,pos);}
} else { //没有引号时,分两种情况
pos=str.indexOf( " ",1);
if (pos> 0) //href=xxx后有空格,说明还有其他属性,取空格以前的部分
{strhref=str.substring(0,pos);}
else //href=xxx后没有空格,取全部,即是href=aaaa> 的形式
{strhref=str;}
}
var wr=/^ '|^ "|^\||^\\|^ <|^> |^&/;
if ( strhref.search(wr)> =0 ) {return " ";}
}
else {return " ";}
return strhref;
}
//-->
</script>
</head>
<body>
<h3> 从字符串中提取超链接地址 </h3>
<p>
作者:howwa(howwa@sina.com) <br/>
时间:2007-7-1 <br/>
blog: <a target=_blank href=http://blog.sina.com.cn/u/1495389370> http://blog.sina.com.cn/u/1495389370 </a> <br/>
</p>
请在这里提供文本: <br/>
<textarea cols=80 rows=10 name=mytext>
function checksq()
{
if (document.sq.user.value.length <1)
{alert( "请输入用户信息! ");
return false;}
return true;}
function check()
{
if (document.se.text.value.length <1)
{alert( "请输入关键字! ");
return false;}
return true;
}
//-->
<!--//
function check0()
{
if ((document.jstj.sm.value.length <1) && (document.jstj.zz.value.length <1) && (document.jstj.cbs.value.length <1) && (document.jstj.cbrq.value.length <1) )
{
alert( "请输入检索信息! ");
return false;
}
return true;
<img src= "images/folder.gif "> <span class= "p6 "> <font color= "#669900 "> yinqsj </font> </span> <br/>
<img src= "images/yd.gif " width= "11 " height= "14 "> <a href=rcount.asp?ssnum=1002> yued </a>
<img src= "images/down.gif " width= "11 " height= "14 "> <a href=dcount.asp?ssnum=1002> xiaz </a>
<img src= "images/pl.gif " width= "11 " height= "14 "> <a href=guestbook/index.asp?ssnum=1002&bookname=yinqsj发表评论 </a>
<img src= "images/sq.gif " width= "11 " height= "14 "> <a href=sq/add.asp?ssnum=1002&bookname=yinqsj > 添加个人书签 </a> </font>
</span> </td>
<img src= "images/down.gif " width= "11 " height= "14 "> <a href=dcount.asp?ssnum=2745> xiaz </a>
<img src= "images/folder.gif "> <span class= "p6 "> <font color= "#669900 "> yinqsj </font> </span> <br/>
<img src= "images/down.gif " width= "11 " height= "14 "> <a href=dcount.asp?ssnum=1279> xiaz </a>
</textarea>
<br/>
<input type=button value= "OK,Please click me! " onclick= "document.all.mylinks.value=getHrefs(document.all.mytext.value) ">
<br/> <br/> 这里是分析出来的链接地址: <br/>
<textarea cols=80 rows=10 name=mylinks>
</textarea>
</body>
</html>