正则替换 html
2010年11月16日
protected string str = "sdasasdsdd
sdsaaassss
说是道 ";
protected void Page_Load(object sender, EventArgs e)
{
//string regexstr = @"]*>"; //去除所有的标签
//@"]*?>.*?" //去除所有脚本,中间部分也删除
// string regexstr = @"]*>"; //去除图片的正则
// string regexstr = @""; //去除所有标签,只剩br
// string regexstr = @"]*?>.*?"; //去除table里面的所有内容
string regexstr = @""; //去除所有标签,只剩img,br,p
str = Regex.Replace(str, regexstr, string.Empty, RegexOptions.IgnoreCase);
} function ExecReg(re, content)
Dim myRegExp, ResultString
Set myRegExp =New RegExp
myRegExp.Global =True
myRegExp.Pattern = re
ResultString = myRegExp.Replace(content, "")
ExecReg = ResultString
end function
function DecodeFilter(html)
html =LCase(html)
' 去除所有客户端脚本javascipt,vbscript,jscript,js,vbs,event, html = ExecReg("]*>", html)
html = ExecReg("(javascript|jscript|vbscript|vbs):", html)
html = ExecReg("on(mouse|exit|error|click|key)", html)
html = ExecReg("", html)
' 去除表格
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
' 去除样式类class=""
html = ExecReg("(]+) class=[^ |^>]*([^>]*>)", html)
' 去除样式style=""
html = ExecReg("(]+) style=""[^""]*""([^>]*>)", html)
' 去除XML
html = ExecReg("]*>", html)
' 去除命名空间
html = ExecReg("]*>", html)
' 去除字体
html = ExecReg("]*>", html)
' 去除字幕
html = ExecReg("]*>", html)
' 去除对象
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
html = ExecReg("]*>", html)
DecodeFilter= html
end function
function nohtml(str)
dim re
Set re=new RegExp
re.IgnoreCase =true
re.Global=True
re.Pattern="(\)"
str=re.replace(str," ")
re.Pattern="(\)"
str=re.replace(str," ")
str=replace(str," ","")
str=replace(str," ","")
nohtml=str
set re=nothing
end function