var okTags = /^(<\/?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i;
var okLinks = /^(<a\shref="(\#\d+|(https?|ftp):\/\/[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+)"(\stitle="[^"<>]+")?\s?>|<\/a>)$/i;
var okImg = /^(<img\ssrc="https?:(\/\/[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+)"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$/i;
text = text.replace(/<[^<>]*>?/gi, function (tag) {
return (tag.match(okTags) || tag.match(okLinks) || tag.match(okImg)) ? tag : ""
})
var okTags = /^(<\/?(b|blockquote|code|del|dd|dl|dt|em|h1|h2|h3|i|kbd|li|ol|p|pre|s|sup|sub|strong|strike|ul)>|<(br|hr)\s?\/?>)$/i;
var okLinks = /^(<a\shref="(\#\d+|(https?|ftp):\/\/[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+)"(\stitle="[^"<>]+")?\s?>|<\/a>)$/i;
var okImg = /^(<img\ssrc="https?:(\/\/[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+)"(\swidth="\d{1,3}")?(\sheight="\d{1,3}")?(\salt="[^"<>]*")?(\stitle="[^"<>]*")?\s?\/?>)$/i;
text = text.replace(/<[^<>]*>?/gi, function (tag) {
return (tag.match(okTags) || tag.match(okLinks) || tag.match(okImg)) ? tag : ""
})
Share
Improve this question
asked May 11, 2011 at 5:58
Dipesh KCDipesh KC
3,3173 gold badges35 silver badges50 bronze badges
2 Answers
Reset to default 4replace()
can be replaced by preg_replace()
and match()
would be preg_match() which is very powerful
Check the manual to see how it would work with your code, they are not that different, and you can use your call back function too.
It looks like you are parsing HTML with regex.
In that case, PHP provides DOMDocument which is pretty good for doing DOM manipulation. You could use it ensure your string contains safe HTML.
A more domain specific solution is HTML Purifier.