首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 网站开发 > CSS >

过滤HTML代码,只留上文本

2012-11-22 
过滤HTML代码,只留下文本using System   using System.Web   using System.Text.RegularExpressions pu

过滤HTML代码,只留下文本

    using System;   using System.Web;   using System.Text.RegularExpressions; public static string NoHTML(string Htmlstring)   {   //删除剧本   Htmlstring = Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>", "",RegexOptions.IgnoreCase);   //删除HTML   Htmlstring = Regex.Replace(Htmlstring,@"<(.[^>]*)>","",RegexOption s.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"([\r\n])[\s]+","",Regex Options.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"-->","",RegexOptions.Ign oreCase);   Htmlstring = Regex.Replace(Htmlstring,@"<!--.*","",RegexOptions. IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(quot|#34);",""",Rege xOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(amp|#38);","&",RegexO ptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(lt|#60);","<",RegexOpt ions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(gt|#62);",">",RegexOpt ions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(nbsp|#160);"," ",RegexOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(iexcl|#161);","\xa1", RegexOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(cent|#162);","\xa2",R egexOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(pound|#网易);","\xa3", RegexOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring,@"&(copy|#169);","\xa9",R egexOptions.IgnoreCase);   Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);","",RegexOptions.IgnoreCase);   Htmlstring.Replace("<","");   Htmlstring.Replace(">","");   Htmlstring.Replace("\r\n","");   Htmlstring=HttpContext.Current.Server.HtmlEncode(H tmlstring).Trim();   return Htmlstring;   } 

?

/**////提取HTML代码中书契的C#函数   /// <summary>   /// 去除HTML标记   /// </summary>   /// <param name="strHtml">包孕HTML的源码 </param>   /// <returns>已经去除后的书契</returns>   using System;   using System.Text.RegularExpressions;   public class StripHTMLTest{   public static void Main(){   string s=StripHTML("<HTML><HEAD><TITLE>资料测试</TITLE></HEAD><BODY>信息</BODY></HTML>");   Console.WriteLine(s);   }   public static string StripHTML(string strHtml){   string [] aryReg ={   @"<script[^>]*?>.*?</script>",   @"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",   @"([\r\n])[\s]+",   @"&(quot|#34);",   @"&(amp|#38);",   @"&(lt|#60);",   @"&(gt|#62);",   @"&(nbsp|#160);",   @"&(iexcl|#161);",   @"&(cent|#162);",   @"&(pound|#网易);",   @"&(copy|#169);",   @"&#(\d+);",   @"-->",   @"<!--.*\n"   };   string [] aryRep = {   "",   "",   "",   """,   "&",   "<",   ">",   " ",   "\xa1",//chr(161),   "\xa2",//chr(162),   "\xa3",//chr(网易),   "\xa9",//chr(169),   "",   "\r\n",   ""   };   string newReg =aryReg[0];   string strOutput=strHtml;   for(int i = 0;i<aryReg.Length;i++){   Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase);   strOutput = regex.Replace(strOutput,aryRep[i]);   }   strOutput.Replace("<","");   strOutput.Replace(">","");   strOutput.Replace("\r\n","");   return strOutput;   }   }   写一个静态要领   移除HTML标签#region 移除HTML标签   /**//// <summary>   /// 移除HTML标签   /// </summary>   /// <param name="HTMLStr">HTMLStr</param>   public static string ParseTags(string HTMLStr)   {   return System.Text.RegularExpressions.Regex.Replace(HTMLS tr, "<[^>]*>", "");   } 

?

 取出文本中的图片地址#region 取出文本中的图片地址   /**//// <summary>   /// 取出文本中的图片地址   /// </summary>   /// <param name="HTMLStr">HTMLStr</param>   public static string GetImgUrl(string HTMLStr)   {   string str = string.Empty;   string sPattern = @"^<img\s+[^>]*>";   Regex r = new Regex(@"<img\s+[^>]*\s*src\s*=\s*([']?)(?<url>\S+)'?[^> ]*>",   RegexOptions.Compiled);   Match m = r.Match(HTMLStr.ToLower());   if (m.Success)   str = m.Result("${url}");   return str;   } 

?

热点排行