You need to use a tokenizer to do that. Perhaps looking over the BBCode parsing functions in the phpBB source would be a help
Here's a simple BBCode parser I wrote though.... sorry, you'll need to suss it out from this but basically, look at the method called "tokenize()" and then look at any of the other methods such as parseQuotes().
Code: Select all
<?php
/*
A custom BBCode handler with support
for plugins for new BBCode tags.
Chris Corbyn (Jan 2006)
*/
class BBCode
{
private
$source,
$output,
$tokens = array(),
$tags = array(
'b' => '\[b\]',
'u' => '\[u\]',
'i' => '\[i\]',
'size' => '\[size=([^\]]+)\]',
'color' => '\[color=([^\]]+)\]',
'img' => '\[img\]',
'url' => '\[url=([^\]]+)\]',
'quote' => '\[quote(?:="([^"]+)")?\]',
'code' => '\[code(?:=([^\]]+))?\]',
'list' => '\[list(?:=([^\]]+))?\]',
'smilies' => ':dense:|:drunk:|:\)|:-\)|:smile:|;\)|;-\)|:wink:|:p|:P|:-p|:-P|:\(|:-\(|:sad:|:razz:|:\?|:confused:|:\||:-\||:blank:|:nod:|:shake:|:reading:|:cool:|:D|:-D|:grin:|:hit:|:smart:|:laugh:|:lol:|:thumbsup:|:thumbsdown:|:cry:|:wavecry:|:arrow:|:angry:|:woot:|:love:|:rolleyes:|:unsure:|:angel:|:clap:|:buzz:|:shocked:|:food:|:sleep:|:smirk:|:wiz:|:wizard:'
);
private $smiliePath = 'sys/img/smilies';
function __construct($input)
{
$this->setSource($input);
}
//Tag name => RegExp
public function setBBTag($tag, $eval)
{
$this->tags[$tag] = $eval;
}
protected function setSource($source)
{
$this->source = $source;
}
//Recursive
protected function tokenize($tag, $text=false, $ignore=array(), $stack=array(), $recursing=false)
{
if (!$text && !$recursing) $text = $this->source;
elseif (!$text && $recursing) return $stack; //Nothing left to do
$block = '';
foreach ($ignore as $t) $block .= $this->tags[$t].'.*?\[/'.$t.'\]|';
$re = '@'.$block.$this->tags[$tag].'|\[/'.$tag.'\]@is'; //Opening tag or closing tag
if (preg_match($re, $text, $matches, PREG_OFFSET_CAPTURE))
{
$chunks = $matches[0];
$offset = 0;
if ($chunks[1] > 0)
{
$offset = $chunks[1]; //Preg offset (substr)
$plain_text = substr($text, 0, $offset);
$stack[] = $plain_text; //Text before the tag
}
$stack[] = $chunks[0]; //The actual tag
$text = substr($text, (strlen($chunks[0])+$offset)); //Drop chunk off the string since already processed
return $this->tokenize($tag, $text, $ignore, $stack, 1); //Recurse
}
else
{
$stack[] = $text; //No match, nothing left to do
return $stack;
}
}
//Nothing more than a cascade through all the handlers
public function parseComplete($source='')
{
if (empty($source)) $source = $this->source;
$source = $this->parseSmilies($source);
$source = $this->parseBold($source);
$source = $this->parseItalic($source);
$source = $this->parseUnderline($source);
$source = $this->parseQuotes($source);
$source = $this->parseUrl($source);
$source = $this->parseCode($source);
$this->output = $source;
return $this->output;
}
public function parseSmilies($source='')
{
if (empty($source)) $source = $this->source;
$this->tokens = $this->tokenize('smilies', $source); //Fetch tokens
$ret = '';
foreach ($this->tokens as $tok)
{
switch ($tok)
{
case ':)':
case ':-)':
case ':smile:':
$ret .= '<img src="'.$this->smiliePath.'/smile1.gif" alt="smile" />';
break;
case ';)':
case ';-)':
case ':wink:':
$ret .= '<img src="'.$this->smiliePath.'/wink.gif" alt="wink" />';
break;
case ':p':
case ':P':
case ':-p':
case ':-P':
case ':razz:':
$ret .= '<img src="'.$this->smiliePath.'/tongue.gif" alt="razz" />';
break;
case ':(':
case ':-(':
case ':sad:':
$ret .= '<img src="'.$this->smiliePath.'/sad.gif" alt="sad" />';
break;
case ':nod:':
$ret .= '<img src="'.$this->smiliePath.'/yes.gif" alt="yes" />';
break;
case ':shake:':
$ret .= '<img src="'.$this->smiliePath.'/no.gif" alt="no" />';
break;
case ':reading:':
$ret .= '<img src="'.$this->smiliePath.'/coffee.gif" alt="coffee" />';
break;
case ':cool:':
$ret .= '<img src="'.$this->smiliePath.'/cool2.gif" alt="cool" />';
break;
case ':D':
case ':-D':
case ':grin:':
$ret .= '<img src="'.$this->smiliePath.'/grin.gif" alt="grin" />';
break;
case ':laugh:':
case ':lol:':
$ret .= '<img src="'.$this->smiliePath.'/laugh.gif" alt="laugh" />';
break;
case ':hit:':
$ret .= '<img src="'.$this->smiliePath.'/hit.gif" alt="wollop" />';
break;
case ':thumbsup:':
$ret .= '<img src="'.$this->smiliePath.'/thumbsup.gif" alt="thumbs up" />';
break;
case ':thumbsdown:':
$ret .= '<img src="'.$this->smiliePath.'/thumbsdown.gif" alt="thumbs down" />';
break;
case ':dense:':
$ret .= '<img src="'.$this->smiliePath.'/dense.gif" alt="dense" />';
break;
case ':smart:':
$ret .= '<img src="'.$this->smiliePath.'/smartass.gif" alt="smart" />';
break;
case ':?':
case ':confused:':
$ret .= '<img src="'.$this->smiliePath.'/huh.gif" alt="confused" />';
break;
case ':arrow:':
$ret .= '<img src="'.$this->smiliePath.'/arrow.gif" alt="arrow" />';
break;
case ':cry:':
$ret .= '<img src="'.$this->smiliePath.'/cry.gif" alt="cry" />';
break;
case ':wavecry:':
$ret .= '<img src="'.$this->smiliePath.'/wavecry.gif" alt="waving crying" />';
break;
case ':angry:':
$ret .= '<img src="'.$this->smiliePath.'/wag.gif" alt="angry" />';
break;
case ':love:':
$ret .= '<img src="'.$this->smiliePath.'/wub.gif" alt="lovey dovey" />';
break;
case ':woot:':
$ret .= '<img src="'.$this->smiliePath.'/w00t.gif" alt="woot!" />';
break;
case ':rolleyes:':
$ret .= '<img src="'.$this->smiliePath.'/rolleyes.gif" alt="rolling eyes" />';
break;
case ':unsure:':
$ret .= '<img src="'.$this->smiliePath.'/hmmm.gif" alt="hmmm" />';
break;
case ':angel:':
$ret .= '<img src="'.$this->smiliePath.'/angel.gif" alt="angel" />';
break;
case ':clap:':
$ret .= '<img src="'.$this->smiliePath.'/clap2.gif" alt="clapping" />';
break;
case ':drunk:':
$ret .= '<img src="'.$this->smiliePath.'/drunk.gif" alt="drunk" />';
break;
case ':buzz:':
$ret .= '<img src="'.$this->smiliePath.'/mml.gif" alt="buzzing" />';
break;
case ':|':
case ':-|':
case ':blank:':
$ret .= '<img src="'.$this->smiliePath.'/noexpression.gif" alt="blank" />';
break;
case ':shocked:':
$ret .= '<img src="'.$this->smiliePath.'/ohmy.gif" alt="shocked" />';
break;
case ':food:':
$ret .= '<img src="'.$this->smiliePath.'/pizza.gif" alt="pizza" />';
break;
case ':sleep:':
$ret .= '<img src="'.$this->smiliePath.'/sleeping.gif" alt="sleeping" />';
break;
case ':smirk:':
$ret .= '<img src="'.$this->smiliePath.'/smirk.gif" alt="smirk" />';
break;
case ':wiz:':
case ':wizard:':
$ret .= '<img src="'.$this->smiliePath.'/wizard.gif" alt="wizard" />';
break;
default: $ret .= $tok;
}
}
return $ret;
}
public function parseUrl($source='')
{
if (empty($source)) $source = $this->source;
$this->tokens = $this->tokenize('url', $source); //Fetch tokens
if (!$this->checkClosure('url'))
{
$this->output = $source;
return $this->output;
}
$ret = '';
foreach ($this->tokens as $tok)
{
if (preg_match('@'.$this->tags['url'].'@is', $tok, $matches))
{ //Opening quote
$href = $this->makeAbsoluteUrl($matches[1]);
$ret .= '<a href="'.$href.'" target="_blank">';
}
elseif (preg_match('@\[/url\]@is', $tok, $matches))
{
$ret .= '</a>'; //Close the quote box
}
else
{
$ret .= $tok; //Insert the text
}
}
$this->output = $ret;
return $this->output;
}
private function checkClosure($tag)
{
$opened = 0;
$closed = 0;
foreach ($this->tokens as $tok)
{
if (preg_match('@'.$this->tags[$tag].'@is', $tok, $matches))
$opened++;
elseif (preg_match('@\[/'.$tag.'\]@is', $tok, $matches))
$closed++;
}
if ($opened === $closed) return true;
}
private function makeAbsoluteUrl($url)
{
if (!preg_match('@^[a-z]+://@i', $url)) $url = 'http://'.$url;
return $url;
}
public function parseQuotes($source='')
{
if (empty($source)) $source = $this->source;
$this->tokens = $this->tokenize('quote', $source, array('code')); //Fetch tokens
if (!$this->checkClosure('quote'))
{
$this->output = $source;
return $this->output;
}
$ret = '';
foreach ($this->tokens as $tok)
{
if (preg_match('@'.$this->tags['quote'].'@is', $tok, $matches))
{ //Opening quote
$info = 'Quote';
if (!empty($matches[1])) $info = $matches[1].' wrote'; //Name parameter given
$ret .= '<div style="font-style: italic; border: 1px dotted #777777; background: #FFFFF8; padding: 4px; margin: 4px;">
<div style="font-weight: bold; font-style: normal;">'.$info.':</div>';
}
elseif (preg_match('@\[/quote\]@is', $tok, $matches))
{
$ret .= '</div>'; //Close the quote box
}
else
{
$ret .= $tok; //Insert the text
}
}
$this->output = $ret;
return $this->output;
}
public function parseBold($source='')
{
if (empty($source)) $source = $this->source;
$this->tokens = $this->tokenize('b', $source, array('code')); //Fetch tokens
$ret = '';
if (!$this->checkClosure('b'))
{
$this->output = $source;
return $this->output;
}
foreach ($this->tokens as $tok)
{
if (preg_match('@'.$this->tags['b'].'@is', $tok, $matches))
{ //Opening tag
$ret .= '<strong>';
}
elseif (preg_match('@\[/b\]@is', $tok, $matches))
{
$ret .= '</strong>'; //Close the tag
}
else
{
$ret .= $tok; //Insert the text
}
}
$this->output = $ret;
return $this->output;
}
public function parseItalic($source='')
{
if (empty($source)) $source = $this->source;
$this->tokens = $this->tokenize('i', $source, array('code')); //Fetch tokens
$ret = '';
if (!$this->checkClosure('i'))
{
$this->output = $source;
return $this->output;
}
foreach ($this->tokens as $tok)
{
if (preg_match('@'.$this->tags['i'].'@is', $tok, $matches))
{ //Opening tag
$ret .= '<em>';
}
elseif (preg_match('@\[/i\]@is', $tok, $matches))
{
$ret .= '</em>'; //Close the tag
}
else
{
$ret .= $tok; //Insert the text
}
}
$this->output = $ret;
return $this->output;
}
public function parseUnderline($source='')
{
if (empty($source)) $source = $this->source;
$this->tokens = $this->tokenize('u', $source, array('code')); //Fetch tokens
$ret = '';
if (!$this->checkClosure('u'))
{
$this->output = $source;
return $this->output;
}
foreach ($this->tokens as $tok)
{
if (preg_match('@'.$this->tags['u'].'@is', $tok, $matches))
{ //Opening tag
$ret .= '<u>';
}
elseif (preg_match('@\[/u\]@is', $tok, $matches))
{
$ret .= '</u>'; //Close the tag
}
else
{
$ret .= $tok; //Insert the text
}
}
$this->output = $ret;
return $this->output;
}
public function parseCode($source='')
{
if (empty($source)) $source = $this->source;
$this->tokens = $this->tokenize('code', $source); //Fetch tokens
$ret = '';
if (!$this->checkClosure('code'))
{
$this->output = $source;
return $this->output;
}
$type = false;
$last = 0;
$i = 0;
foreach ($this->tokens as $tok)
{
$i++;
if (preg_match('@'.$this->tags['code'].'@is', $tok, $matches))
{ //Opening code block
if (!empty($matches[1]))
{
switch (strtolower($matches[1]))
{
case 'javascript':
case 'js':
$type = 'js';
break;
default: $type = false;
}
$last = $i;
}
else $type = false;
$ret .= '<div style=" border: 1px solid #AAAAAA; padding: 4px; margin: 4px; background: #FFFFFF;">
<code style="white-space: pre; font-family: courier,monospace; color: #007700;">';
}
elseif (preg_match('@\[/code\]@is', $tok, $matches))
{
$ret .= '</code></div>'; //Close the code box
}
else
{
switch ($type)
{
case 'js':
$js = new JSHighlight(str_replace('<br />', '', $tok));
if ($last == $i-1) $tok = $js->Generate(1);
break;
}
$ret .= $tok; //Insert the text
}
}
$this->output = $ret;
return $this->output;
}
//Just for debugging
public function dumpTokens()
{
echo '<pre>'.print_r($this->tokens, 1).'</pre>';
}
//Internal
protected function getOutput()
{
return $this->output;
}
public function fetchResult()
{
return $this->getOutput();
}
}
?>
Hmm... I think I must have removed the plugin support at some point.