正则表达式问题 html取值
<div class="fans_top" id="weibo_head"><a href="http://baidu.com/" target="_blank">评论(0)</a><div>dddd</div></div><div class="ddd" id="ccc"><a href="http://baidu.com/" target="_blank">评论(20)</a><a href="http://baidu.com/" target="_blank">评论(150)</a></div>
string htmsStr = @"<div class=""fans_top"" id=""weibo_head""><a href=""http://baidu.com/"" target=""_blank"">评论(0)</a><div>dddd</div></div><div class=""ddd"" id=""ccc""><a href=""http://baidu.com/"" target=""_blank"">评论(20)</a><a href=""http://baidu.com/"" target=""_blank"">评论(150)</a></div>"; Regex re = new Regex("(?is)(?<=<div\\s*class=\"fans_top\"\\s*id=\"weibo_head\">).*?(?=</div>)", RegexOptions.None); Match mt = re.Match(htmsStr); Console.WriteLine(mt.Value); Console.ReadLine();
[解决办法]
string tempStr = File.ReadAllText(@"C:\Documents and Settings\Administrator\桌面\Test.txt", Encoding.GetEncoding("GB2312")); string pattern = @"(?i)(?<=<div[^>]*?class=['""]?fans_top['""]?[^>]*?id=['""]?weibo_head['""]?[^>]*?>)(?><div[^>]*>(?<Open>)|</div>(?<-Open>)|(?:(?!</?div\b)[\s\S])*)*(?=</div>)"; string temp_result = Regex.Match(tempStr,pattern).Value; //<a href=\"http://baidu.com/\" target=\"_blank\">评论(0)</a>\r\n<div>dddd</div>