【求助】关于正则表达式的匹配问题,请大神们帮忙!!!
有如下几个正规的问题:
1、用正则搜索TABLE表格,只保留新闻列表所在表格
2、将A标签代码和日期分离开存入数组
3、将A标签新闻链接前加上域名,补全地址。
主要是第1个问题,后两个问题如果能顺带解决就更好了。请大神们帮忙,正则新手,研究一天了。
源始代码如下:
<table border="1" width="99%" id="table204" cellspacing="0" cellpadding="0" style="border-collapse:collapse" bordercolor="#D2D2D7">
<tr>
<td background="/images/a_2.jpg">
<table border="0" width="100%" id="table207" cellspacing="0" cellpadding="0">
<tr>
<td width="27" height="26" align="center">
<img border="0" src="/images/4965789.gif" width="16" height="16"></td>
<td width="399" class="style27">
<span style="font-size:10pt"><a href="./" style=' color:#006600; '>重大新闻</a></span></td>
<td width="552" align="left">
<iframe src="/search.htm" scrolling="no" width="552" height="25" marginwidth=0 marginheight=0 frameborder="0"></iframe>
</td>
</tr>
</table></td>
</tr>
<tr>
<td valign="top" background="/images/s4573.jpg" height="500">
<table border="0" cellpadding="0" cellspacing="0" width="100%">
<tr>
<td width="80%">
·<a href="./201303/20130302_356057.htm" target="_blank">新闻标题</a>
</td>
<td>
(2013-03-02)
</td>
</tr>
<tr>
<td width="80%">
·<a href="./201302/20130228_355996.htm" target="_blank">新闻标题</a>
</td>
<td>(2013-02-28)</td>
</tr>
<tr>
<td width="80%">
·<a href="./201302/20130228_355992.htm" target="_blank">新闻标题</a>
</td>
<td>(2013-02-28)</td>
</tr>
<tr>
<td width="80%">
·<a href="./201302/20130228_355985.htm" target="_blank">新闻标题</a>
</td>
<td>(2013-02-28)</td>
</tr>
</table>
</td>
</tr>
<tr>
<td>
</td>
</tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%">
<tr>
<td width="80%">
·<a href="./201303/20130302_356057.htm" target="_blank">新闻标题</a>
</td>
<td>
(2013-03-02)
</td>
</tr>
<tr>
<td width="80%">
·<a href="./201302/20130228_355996.htm" target="_blank">新闻标题</a>
</td>
<td>(2013-02-28)</td>
</tr>
<tr>
<td width="80%">
·<a href="./201302/20130228_355992.htm" target="_blank">新闻标题</a>
</td>
<td>(2013-02-28)</td>
</tr>
<tr>
<td width="80%">
·<a href="./201302/20130228_355985.htm" target="_blank">新闻标题</a>
</td>
<td>(2013-02-28)</td>
</tr>
</table>
s="<td valign=""top"" background=""/images/s4573.jpg"" height=""500""><table><tr>"&_
"<td width=""80%"">"&_
" ·<a href=""./201303/20130302_356057.htm"" target=""_blank"">新闻标题</a>"&_
"</td>"&_
"<td>(2013-03-02)</td>"&_
"</tr><table></table>"
set rx=new regexp
rx.pattern="/images/s4573.jpg"" height=""500"">([\s\S]+?)</table>"
rx.Ignorecase=true
rx.global=true
set m=rx.execute(s)
domain="http://www.coding123.net"
if m.count>0 then
s=m.item(0).submatches(0)&"</table>"
'========获得url地址和时间
rx.pattern="href=""([^""]+)"""
set rxtm=new Regexp'时间的正则
rxtm.global=true
rxtm.Ignorecase=true
rxtm.pattern="\((\d{4}(-\d{2}){2})\)"
set mcurl=rx.execute(s)
set mctime=rxtm.execute(s)
set rxtm=nothing
for i=0 to mcurl.count-1
response.write mcurl.item(i).submatches(0)&"
------解决方案--------------------
"&mctime.item(i).submatches(0)&"<br/>"
next
'========'域名替换
rx.pattern="href="".(/[^""]+)"""
s=rx.replace(s,"href="""&domain&"$1""")
response.write s
end if
set rx=nothing