What do you think???
Code: Select all
<?php
#cleanUpHTML function by Kelly Ehret
#http://www.phpfreaks.com/quickcode/Clean_Up_Word_HTML/45.php
function cleanUpHTML($text)
{
// remove escape slashes
$text = stripslashes($text);
// trim everything before the body tag right away, leaving possibility for body attributes
#$text = stristr( $text, "<body");
#TP EDIT: trims body tag as well to allow doc root
$text = stristr( $text, "<div class=Section1>");
// strip tags, still leaving attributes, second variable is allowable tags
#$text = strip_tags($text, '<p><b><i><u><a><h1><h2><h3><h4><h4><h5><h6>');
#TP EDIT: Added image and table tags to allowable tags parameter in strip_tags().
$text = strip_tags($text, '<p><b><i><img><u><a><table><tr><td><h1><h2><h3><h4><h4><h5><h6>
');
// removes the attributes for allowed tags, use separate replace for heading tags since a
// heading tag is two characters
$text = ereg_replace("<([p|b|i|u])[^>]*>", "<\\1>", $text);
$text = ereg_replace("<([h1|h2|h3|h4|h5|h6][1-6])[^>]*>", "<\\1>", $text);
#TP EDIT: replaces ending body & html tags with ' '.
#Note: preg_replace(), which uses a Perl-compatible regular expression syntax, is often a
faster alternative to ereg_replace().
#preg_replace() returned errors.
$text = ereg_replace("</body>\n\n</html>", " ", $text);
return $text;
}
?>