正则表达式取table的数据
请问用正则表达式如何取如下table里面的各行,各列里面的数据值。
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" ><head><title> title</title></head><body> <form name="form1" method="post" action="Index.aspx" id="form1"> <span id="XmlIndex1"><table width="100%" border="0" cellspacing="0" cellpadding="0" id="Table2" class="selPage" xmlns:fo="http://www.w3.org/1999/XSL/Format"> <tr> <td width="428" height="30" class="tabTitleLabel pdLeft">指数</td> <td width="25"> </td> <td width="220" align="center" class="tabTitleLabel">LAST</td> <td align="center" class="tabTitleLabel">%CHG</td> </tr> <tr> <td height="27" class="tabNote pdLeft bgF1">美国道琼斯工业指数</td> <td align="center" class="bgF1"><img alt="" src="/images/upArraw.jpg"></td> <td align="center" class="tabNote bgF1">12949.87</td> <td align="center" class="tabNote bgF1">0.35</td> </tr> <tr> <td height="27" class="tabNote pdLeft">纳斯达克指数</td> <td align="center"><img alt="" src="/images/downArraw.jpg"></td> <td align="center" class="tabNote">2951.78</td> <td align="center" class="tabNote">-0.27</td> </tr> <tr> <td height="27" class="tabNote pdLeft bgF1">日本日经指数</td> <td align="center" class="bgF1"><img alt="" src="/images/upArraw.jpg"></td> <td align="center" class="tabNote bgF1">9485.09</td> <td align="center" class="tabNote bgF1">1.08</td> </tr> <tr> <td height="27" class="tabNote pdLeft">香港恒生指数</td> <td align="center"><img alt="" src="/images/upArraw.jpg"></td> <td align="center" class="tabNote">21566.42</td> <td align="center" class="tabNote">0.35</td> </tr> <tr> <td height="27" class="tabNote pdLeft bgF1">上证指数</td> <td align="center" class="bgF1"><img alt="" src="/images/upArraw.jpg"></td> <td align="center" class="tabNote bgF1">2370.23</td> <td align="center" class="tabNote bgF1">0.55</td> </tr> <tr> <td height="27" class="tabNote pdLeft">深证综指</td> <td align="center"><img alt="" src="/images/upArraw.jpg"></td> <td align="center" class="tabNote">9654.27</td> <td align="center" class="tabNote">0.41</td> </tr> <tr> <td height="27" class="tabNote pdLeft bgF1">澳洲综合指数</td> <td align="center" class="bgF1"><img alt="" src="/images/upArraw.jpg"></td> <td align="center" class="tabNote bgF1">4332.80</td> <td align="center" class="tabNote bgF1">1.39</td> </tr> <tr> <td height="27" class="tabNote pdLeft">NZSE50</td> <td align="center"><img alt="" src="/images/upArraw.jpg"></td> <td align="center" class="tabNote">3315.57</td> <td align="center" class="tabNote">0.81</td> </tr> <tr> <td height="27" class="tabNote pdLeft bgF1">伦敦金融时报</td> <td align="center" class="bgF1"><img alt="" src="/images/upArraw.jpg"></td> <td align="center" class="tabNote bgF1">5905.07</td> <td align="center" class="tabNote bgF1">0.33</td> </tr> <tr> <td height="27" class="tabNote pdLeft">S&P 500</td> <td align="center"><img alt="" src="/images/upArraw.jpg"></td> <td align="center" class="tabNote">1361.23</td> <td align="center" class="tabNote">0.23</td> </tr> <tr> <td height="27" class="tabNote pdLeft bgF1">多伦多综合指数</td> <td align="center" class="bgF1"><img alt="" src="/images/downArraw.jpg"></td> <td align="center" class="tabNote bgF1">12458.30</td> <td align="center" class="tabNote bgF1">-0.22</td> </tr></table></span> </form></body></html>
string str = File.ReadAllText(@"E:\1.txt", Encoding.GetEncoding("gb2312")); Regex reg = new Regex(@"(?is)<table[^>]*?id=""Table2""[^>]*?>(?:\s*<tr>(?:\s*<td[^>]*?>(.*?)</td>){4}\s*</tr>)*\s*</table>"); foreach (Capture c in reg.Match(str).Groups[1].Captures) Console.WriteLine(c.Value);/*指数 LAST%CHG美国道琼斯工业指数<img alt="" src="/images/upArraw.jpg">12949.870.35纳斯达克指数<img alt="" src="/images/downArraw.jpg">2951.78-0.27日本日经指数<img alt="" src="/images/upArraw.jpg">9485.091.08香港恒生指数<img alt="" src="/images/upArraw.jpg">21566.420.35上证指数<img alt="" src="/images/upArraw.jpg">2370.230.55深证综指<img alt="" src="/images/upArraw.jpg">9654.270.41澳洲综合指数<img alt="" src="/images/upArraw.jpg">4332.801.39NZSE50<img alt="" src="/images/upArraw.jpg">3315.570.81伦敦金融时报<img alt="" src="/images/upArraw.jpg">5905.070.33S&P 500<img alt="" src="/images/upArraw.jpg">1361.230.23多伦多综合指数<img alt="" src="/images/downArraw.jpg">12458.30-0.22*/
[解决办法]
string s = File.ReadAllText(Server.MapPath("~/test.txt")); HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(Server.HtmlDecode(s)); HtmlNodeCollection trs = htmlDoc.DocumentNode.SelectNodes(@"//span[@id='XmlIndex1']/table/tr"); for (int i = 0; i < trs.Count; i++) { HtmlNodeCollection tds = trs[i].SelectNodes(@"td"); for (int j = 0; j < tds.Count; j++) { if (i > 0 && j == 1) Response.Write(trs[i].SelectSingleNode("//img").Attributes["src"].Value + " "); else Response.Write(tds[j].InnerText + " "); } Response.Write("<br/>");