I built a stack based parser for HTML tags, but it's pretty sloppy and bad (and probably is really inefficient). But... it works!
Code: Select all
function chtml($render) {
//No doubt this might have been easier accomplished
//using regular expressions. =P
$chtml_turn = 0;
$chtml_offsetcount = 0;
$endsign = 0;
$allowone = 0;
$selfclose = array("hr","img","br","!");
$chtml_acceptedtags = array(
//Self Closing
"hr","img","br","!",
//Normal
"b","i","u","s","span","div","a",
"style","blockquote",
"h1","h2","h3","h4","h5","h6","p","address","big",
"cite","code","dfn","em","kbd","listing",
"plaintext","pre","strike","samp","small",
"strong","sub","sup","tt","var","ol","li","ul",
"table","td","tr");
while (strpos($render, "<", $chtml_offsetcount) !== false) {
//This will set the location of the < in the text
$chtml_turn = strpos($render, "<", $chtml_offsetcount);
$endsign = strpos($render, ">", $endsign);
if($endsign < $chtml_turn) {
if($allowone) {
$allowone = 0;
$endsign = strpos($render, ">", $endsign)+1;
} else {
$render = substr($render,0,$endsign)
.
">"
.
substr($render,$endsign+1);
continue;
}
}
//This is the location of the pointer right after the <
$chtml_offsetcount = $chtml_turn + 1;
//Loop through the okay tags.
$chtml_tagloop = 0;
while ($chtml_tagloop <= sizeof($chtml_acceptedtags)) {
//This will match whether or not it is okay
if(
//This check sees whether or not "span"
//would match "<span class=..."
substr(
$render,
$chtml_offsetcount,
strlen($chtml_acceptedtags[$chtml_tagloop])+1
)
==
$chtml_acceptedtags[$chtml_tagloop]." "
OR
//This check sees whether or not "span"
//would match "<span>"
substr(
$render,
$chtml_offsetcount,
strlen($chtml_acceptedtags[$chtml_tagloop])+1
)
==
$chtml_acceptedtags[$chtml_tagloop].">"
) {
$chtml_tagdebt[$chtml_acceptedtags[$chtml_tagloop]]++;
$allowone = 1;
break;
} elseif (
//This checks whether or not this is a closing tag.
substr(
$render,
$chtml_offsetcount,
strlen($chtml_acceptedtags[$chtml_tagloop])+2
)
==
"/".$chtml_acceptedtags[$chtml_tagloop].">"
) {
$chtml_tagdebt[$chtml_acceptedtags[$chtml_tagloop]]--;
$allowone = 1;
break;
}
//Increment to test the next good tag.
$chtml_tagloop++;
//This happens at the very end, depending on whether or not
//any of the tags were approved. If none were approved, this will
//occur.
if ($chtml_tagloop > sizeof($chtml_acceptedtags)) {
//A rather primitive encaptulation thing
$render = substr($render,0,$chtml_turn)
.
"<"
.
substr($render,$chtml_turn+1)
;
//Doesn't seem to do anything...
//$chtml_endsign = strpos($render, ">", $chtml_offsetcount+1);
}
//Begin Next Cycle ^^^
}
} //end scanning while
if (isset($chtml_tagdebt)) {//start debt relief
foreach ($chtml_tagdebt as $chtml_debtkey => $chtml_debtamount){
if (!in_array($chtml_debtkey,$selfclose)) {
while ($chtml_debtamount > 0) {
$render .= "</".$chtml_debtkey.">";
$chtml_debtamount--;
}
}
}
}//end debt relief
//kill any javascript event tags
$render = ereg_replace("<[a-zA-Z][^>]*[\n\r\t \x0B]on[A-Za-z0-9_]+=([A-Za-z0-9_]+|'[^']*'|\"[^\"]*\")[^>]*>","<b>ATTENTION! JavaScript Entity Blocked!</b>",$render);
return $render;
}