Yikes, I've just wasted a good hour or so on this
Pattern Nester
Code: Select all
<?php
/**
* Thrown if something bad happens in the nester.
*/
class PatternNesterException extends Exception
{
}
/**
* Takes a series of regular expressions and allows them to be built "into" each other recursively.
*/
class PatternNester
{
/**
* All supported PCRE modifiers.
* @var array
*/
protected $allModifiers = array('i', 's', 'x', 'u', 'e', 'U', 'm');
/**
* Standard modifiers which can be set with (?modifier) or (?-modifier).
* @var array
*/
protected $stdModifiers = array('i', 's', 'x', 'u', 'm');
/**
* Patterns which have been added.
* @var array
*/
protected $patterns = array();
/**
* The delimiter consistent across all patterns.
* @var string
*/
protected $delimiter;
/**
* Ctor.
* @param string The PCRE delimiter.
* @throws PatternNesterException If the delimiter is not usable.
*/
public function __construct($delimiter='/')
{
if (strlen($delimiter) != 1)
{
throw new PatternNesterException('PCRE delimiters must be a single character. `' . $delimiter . '` given.');
}
$this->delimiter = $delimiter;
}
/**
* Get the delimiter which is being used.
* @return string
*/
public function getDelimiter()
{
return $this->delimiter;
}
/**
* Strip delimiters from a given pattern and substitute modifiers with inline modifiers.
* @param string Pattern wit delimiter equal to PatternNester::getDelimiter()
* @return string
* @throws PatternNesterException If the pattern has the wrong delimiters.
*/
public function stripDelimiters($pattern)
{
$delimiter_re = '~^' . preg_quote($this->getDelimiter(), '~') .
'(.*?)' . preg_quote($this->getDelimiter(), '~') .
'([' . implode($this->allModifiers) . ']*)$~D';
if (!preg_match($delimiter_re, $pattern, $matches))
{
throw new PatternNesterException('Patterns must be delimited with the ' . $this->getDelimiter() . ' delimiter.');
}
$expr = $matches[1];
$modifiers = $matches[2];
$len = strlen($modifiers);
for ($i = 0; $i < $len; $i++)
{
$char = $modifiers{$i};
if (!in_array($char, $this->stdModifiers))
{
continue;
}
$expr = '(?' . $char . ')' . $expr . '(?-' . $char . ')';
}
return '(?:' . $expr . ')';
}
/**
* Add a pattern which may be nested, identified by $key
* @param string $key
* @param string Pattern
* @throws PatternNesterException If the key is not valid.
*/
public function addPattern($key, $pattern)
{
if ($key == 'this')
{
throw new PatternNesterException('Cannot use special key `this` as a pattern identifier.');
}
elseif (!preg_match('/^[a-zA-Z0-9_]+$/D', $key))
{
throw new PatternNesterException('Key `' . $key . '` is not a safe identifier.');
}
$this->patterns[$key] = $this->stripDelimiters($pattern);
}
/**
* Get all added patterns as an associative array.
* @return array
*/
public function getPatterns()
{
return $this->patterns;
}
/**
* Get an individual pattern by its key.
* @param string Key name
* @return string
* @throws PatternNesterException If no pattern is found with the key given
*/
public function getPattern($key)
{
if (array_key_exists($key, $patterns = $this->getPatterns()))
{
return $patterns[$key];
}
throw new PatternNesterException('Pattern with key `' . $key . '` does not exist.');
}
/**
* Remove keys from a pattern.
* Keys are found in a pattern in the form (?{key})
* This replaces them with .*? if the key is valid, or simply quotes them if not valid.
* @param string Pattern
* @return string
*/
public function stripKeys($pattern)
{
if (preg_match_all('/\(\?\{[a-zA-Z0-9_]+\}\)/', $pattern, $matches))
{
foreach ($matches[0] as $match)
{
$len = strlen($match);
$pos = strpos($pattern, $match);
$key = substr($match, 3, -2);
$patterns = $this->getPatterns();
if ($key == 'this' || array_key_exists($key, $patterns))
{
$replace = '.*?';
}
else
{
$replace = preg_quote($match, $this->getDelimiter());
}
$pattern = substr_replace($pattern, $replace, $pos, $len);
}
}
return $pattern;
}
/**
* Create a nested pattern from the patterns added.
* @param string Pattern to scan
* @param int Maximum depth of recursion
* @return string
*/
public function buildNestedPattern($pattern, $depth, $current_depth=0, $orig_pattern=null)
{
if ($depth <= 0)
{
return $pattern;
}
if ($orig_pattern === null)
{
$orig_pattern = $pattern;
}
$current_depth++;
if (preg_match_all('/\(\?\{[a-zA-Z0-9_]+\}\)/', $pattern, $matches))
{
foreach ($matches[0] as $match)
{
$len = strlen($match);
$pos = strpos($pattern, $match);
$key = substr($match, 3, -2);
if ($key == 'this')
{
$replace = $this->stripDelimiters($orig_pattern);
}
else
{
$patterns = $this->getPatterns();
if (!array_key_exists($key, $patterns))
{
$replace = preg_quote($match, $this->getDelimiter());
}
else
{
$replace = $patterns[$key];
}
}
$pattern = substr_replace($pattern, $replace, $pos, $len);
}
}
if ($current_depth < $depth)
{
return $this->buildNestedPattern($pattern, $depth, $current_depth, $orig_pattern);
}
return $this->stripKeys($pattern);
}
}
Usage:
Code: Select all
$p = new PatternNester('/');
$p->addPattern('foo', '/abc .*? [xyz] 123/is');
$pattern = $p->buildNestedPattern('/something (?{this}) here with a (?{foo}) in it/m', 2);
echo $pattern;
Code: Select all
/something (?:(?m)something (?:(?m)something .*? here with a (?:(?s)(?i)abc .*? [xyz] 123(?-i)(?-s)) in it(?-m)) here with a (?:(?s)(?i)abc .*? [xyz] 123(?-i)(?-s)) in it(?-m)) here with a .*? in it/m
*sigh*