here is my solution so far.
It is case insesitive.Small bugs there.
Does not consider if ending tag is /> instead of </tagname>
Should implement better black/white list on what to scan..wheater to include all constrains or not.
Kind of <span style='color:blue' title='I'm naughty, are you naughty?'>smurf</span> but it is a first shot
Code: Select all
<?php
$string = <<<EOD
Something is here
<div class="editor" id="newsBox1">
<div class="foo">
Yadda yadda yadda
</div>
<div class="newsItem">
<div class="newsTitle">
THE TORONTO TOY AUCTION
</div>
November 19, 2005 <br/>
Preview 9am Sale 10am<br/>
International Centre, <br/>
Hall 4<br/>
6900 Airport Rd, Mississauga
</div>
</div>
Blah blah blah blah
<div final="foo">
Yadda yadda yadda
</div>
EOD;
/**
*
* <?php
* <code>
* $grepper = new GrepTagContent($string);
$grepper->setTag('div');
$grepper->setPropertyConstraints(
array('class' => array('foo','newsitem'),
'id' => array(7)
)
);
$grepper->process();
$result = $grepper->getFinalResult();
foreach ($result as $data) {
echo "=======================\n";
echo $grepper->getText($data['start'],$data['end'])."\n";
echo "=======================\n";
}
?>
</code>
*
*/
class GrepTagContent
{
private $data = '';//string to parse
private $decider = ''; //buffer used to decide wheather the tag is the one we search for.
private $matches = array(); //matches acccording to $searchForTag.
private $matchedTagEndPosition = array();
private $rawResult = array(); //result without making validation for propertie names and values.
private $finalResult = array();//this is what we are looking for.
/**
* Tag to search for.
* @todo make for multiple tags.
* @var array
*/
private $searchForTag = 'div';
/**
* Properties to search for this tag.
* e.g
* array('class' => array('foo','newsitem'),
* 'id' => array(7)
* );
*
*/
private $propertiesConstraints = array('class' => array('foo','newsitem'));
public function __construct($stringToManipulate)
{
$this->data = $stringToManipulate;
}
public function process()
{
for ($i = 0, $cnt = strlen($this->data); $i < $cnt;$i++) {
switch (true) {
case ($this->data[$i] == '<' && $this->data[$i+1] != '/')://start buffering ceratin tag. later decide if this is what we search for.
$this->decider .= $this->data[$i];
$start_pos = $i;
break;
case $this->data[$i] == '>':
/**
* @todo handle if tag end on />
*/
if (substr($this->data,$i - strlen($this->searchForTag),strlen($this->searchForTag)) == strtolower($this->searchForTag)) {//for example </div>
//$pop_me = true;
$this->matchedTagEndPosition[] = $i;
}
$end_pos = $i;
$this->decider = '';
//add track to maching. from start to closing tag like <div ....>
if ($tag > 0 && $searchable === true) {
$this->matches[$tag]['properties'] = substr($this->data, $start_pos, ($end_pos - $start_pos + 1));
$this->matches[$tag]['start'] = $start_pos;
$this->matches[$tag]['end'] = $end_pos;
$searchable = false;
}
break;
case preg_match('#\s#',$this->data[$i])://decide wheater this is what we search for.
//only if we are within tag description
if (!empty($this->decider) && strtolower(substr($this->decider,1)) == strtolower($this->searchForTag)) {
$this->decider .= $this->data[$i];
$tag++;
$searchable = true;
}
break;
default:
if (!empty($this->decider)){
$this->decider .= $this->data[$i];
}
break;
}
}
}
private function getRawResult()
{
foreach ($this->matchedTagEndPosition as $end_position) {
foreach ($this->matches as $key => $data) {
$end = $data['end'];
$start = $data['start'];
$property = $data['properties'];
$result = array();
if (count($this->matches) == 1 && $end <= $end_position) {
$result = array(
'start' => $start,
'end' => $end_position,
'key' => $key,
'properties' => $property
);
break;
}
if ($end > $end_position) {
$result = array(
'start' => $this->matches[$key-1]['start'],
'end' => $end_position,
'key' => $key-1,
'properties' => $this->matches[$key-1]['properties']
);
$del = $key-1;
break;
}
if ($end < $end_position && !next($this->matches)) {
$result = array(
'start' => $this->matches[$key]['start'],
'end' => $end_position,
'key' => $key,
'properties' => $property
);
$del = $key;
break;
}
}
unset($this->matches[$del]);
$this->matches = array_values($this->matches);
if (!empty($result)) {
$this->rawResult[] = $result;
}
}
return $this->rawResult;
}
public function getFinalResult()
{
$this->rawResult = $this->getRawResult();
foreach ($this->rawResult as $data) {
//echo $data['properties'];
if (!$this->isValidPropertyConstraint($this->propertiesConstraints,$data['properties'])) continue;
$this->finalResult[] = $data;
}
return $this->finalResult;
}
private function isValidPropertyConstraint($aConstraints,$properties_string) {
if (empty($aConstraints)) return true;
foreach ($aConstraints as $property_name => $aValue) {
if (in_array(strtolower($this->getPropertyValue($property_name,$properties_string)),$aValue)) {
return true;
}
}
return false;
}
private function getPropertyValue($propertyName,$string) {
$propertyName = trim($propertyName);
preg_match("%{$propertyName}=(.*?)(\s|\>)%",$string,$maches);
$result = $maches[1];
$result = trim($result);
$result = trim($result,"'");
$result = trim($result,'"');
return $result;
}
public function setTag($sting)
{
$this->searchForTag = trim($sting);
}
public function setPropertyConstraints($array)
{
$this->propertiesConstraints = $array;
}
public function getText($start,$end) {
return substr($this->data,$start, $end - $start +1 );
}
}
$grepper = new GrepTagContent($string);
$grepper->setTag('div');
$grepper->setPropertyConstraints(
array('class' => array('foo','newsitem'),
'id' => array(7)
)
);
$grepper->process();
$result = $grepper->getFinalResult();
foreach ($result as $data) {
echo "=======================\n";
echo $grepper->getText($data['start'],$data['end'])."\n";
echo "=======================\n";
}
?>