正则表达式(兔子党理事)
<ul>
<li>
<div class="members">
<a class="avatar_50 q_namecard" href="http://www.xxx.com/47110868" link="nameCard_47110868"
target="_blank">
<img src="http://www.xxx.com/qzone/47110868/47110868/50" /></a>
<div class="members_text">
<a class="member_name text_overflow" href="http://www.xxx.com/47110868" target="_blank">
鱼儿</a><span class="member_id">(47110868)</span>
<p class="member_role">
女</p>
</div>
</div>
</li>
<li>
<div class="members">
<a class="avatar_50 q_namecard" href="http://www.xxx.com/15729936" link="nameCard_15729936"
target="_blank">
<img src="http://www.xxx.com/qzone/15729936/15729936/50" /></a>
<div class="members_text">
<a class="member_name text_overflow" href="http://www.xxx.com/15729936" target="_blank">
狠好、很嗨</a><span class="member_id">(15729936)</span>
<p class="member_role">
男</p>
</div>
</div>
</li>
<li>
<div class="members">
<a class="avatar_50 q_namecard" href="http://www.xxx.com/156436857" link="nameCard_156436857"
target="_blank">
<img src="http://www.xxx.com/qzone/156436857/156436857/50" /></a>
<div class="members_text">
<a class="member_name text_overflow" href="http://www.xxx.com/156436856" target="_blank">
可乐、很傻</a><span class="member_id">(156436857)</span>
<p class="member_role">
男</p>
</div>
</div>
</li>
<li>
<div class="members">
<a class="avatar_50 q_namecard" link="nameCard_635593526"
target="_blank">
<img src="http://www.xxx.com/qzone/635593526/635593526/50" /></a>
<div class="members_text">
<a class="member_name text_overflow" href="http://www.xxx.com/635593526" target="_blank">
微笑、狠瘦 </a><span class="member_id">(635593526)</span>
<p class="member_role">
男</p>
</div>
</div>
</li>
</ul>
string pattern = @"(?is)<div\s*class=""members_text"">\s*<a\s*class=""member_name text_overflow""[^>]*?>(?<atxt>.*?)</a>\s*<span\s*class=""member_id""[^>]*?>\((?<phone>.*?)\)</span>\s*<p\s*class=""member_role""[^>]*?>(?<sex>.*?)</p>\s*</div>";
string htmlsource = File.ReadAllText(@"C:\1.txt", Encoding.GetEncoding("GB2312"));
foreach (Match m in Regex.Matches(htmlsource, pattern))
{
Console.WriteLine(m.Groups["atxt"].Value);
Console.WriteLine(m.Groups["phone"].Value);
Console.WriteLine(m.Groups["sex"].Value);
Console.WriteLine("------------------------------------");
}
string txt = File.ReadAllText("1.txt", Encoding.Default);
var vls = Regex.Matches(txt, "(?is)<a class="member_name text_overflow".*?>(.*?)</a>\\s*<span class="member_id">(.*?)</span>\\s*<p class="member_role">(.*?)</p>").OfType<Match>().Select(x => "名字:" + x.Groups[1].Value + " 号码:" + x.Groups[2] + " 性别:" + x.Groups[3]);
foreach (var item in vls)
{
Console.WriteLine(item);
}
Console.Read();