Page 1 of 1

DOMXPath and embedding HTML - HELP !

Posted: Fri Apr 17, 2009 9:06 am
by amk1212
Hi,

I'm using the script below to read an XML which contain embedded HTML text. but the script extracts all the html tags in the array. is there any way I can specify which nodes to query for and get all the data in the tag regardless of if its HTML or not

See my XML as an example, the footer for inline html ... I want return array value to be "2009 <b>Copyright</b>"

Code: Select all

<?xml version="1.0" encoding="UTF-8"?>
    <cms>
        <common>
            <footer>2009 <b>Copyright</b></footer>
            <backgroundimage>back.jpg</backgroundimage> 
        </common></cms>

Code: Select all

 
<?php 
/**
 * The function turns the supplied XML data into an array in the format specified
 * below.  This function drops any attributes specified.
 *
 * Example source XML:
 * <xmlData>
 *   <author>
 *     <name>John Doe</name>
 *     <magazine>Time</magazine>
 *     <magazine>National Geographic</magazine>
 *   </author>
 * </xmlData>
 *
 * Example result array with:
 * [xmlData][author][name] => John Doe
 * [xmlData][author][magazine][0] => Time
 * [xmlData][author][magazine][1] => National Geographic
 *
 *
 * @author Michael  ( http://www.msbware.com )
 * @param string $xpath          The root node we're starting with in the XML.  This should be a valid
 *                               absolute path XPATH string.  It can be left empty if you want to start
 *                               at the root node.
 * @param string $xml_data       The XML string we're going to parse
 * @param DOMNode $context_node  This isn't required, but can be specified if you want to use a relative $xpath
 * @return Multi-dimensional array of elements from the XML file
 */
function xml2Array($xpath = "/*", $xml_data = "", $context_node = NULL)
{
  static $xml;
  static $xp;
 
  // Add /* to the end of $xpath if it's not there
  if (!ereg("/\*$", $xpath))
    $xpath = ereg_replace("/*$", "", $xpath) . "/*";
 
  // Create out temporary array that we'll use to build the final array piece by piece
  $tmp_array = array();
 
  // If this is our first time in this function, initialize the DOM objects
  if ($xml_data)
  {
    $xml = new DOMDocument;
    $xml->loadXML($xml_data);
    $xp = new DOMXPath($xml);
  }
 
  // Get the appropriate nodes for the current path
  $nodelist = ($context_node) ? $xp->query($xpath, $context_node) : $xp->query($xpath);
 
  // This variable is used to keep track of how many times a node with the same name
  // has appeared.  It puts the appropriate count after each one.  ie: [<nodeName>][<count>]
  $counter = array();
 
  // Loop through the current list of nodes
  // If there is more than one child node in the current node with the same name,
  // create a sub array for them and add a counter.
  // E.g. [<element>] = <value>  would become [<element>][0] = <value1> and [<element>][1] = <value2>
  foreach ($nodelist AS $node)
  {
 
        $counter[$node->nodeName] = (isset($counter[$node->nodeName])) ? $counter[$node->nodeName] + 1 : 0;
    
        if ($xp->evaluate('count(./*)', $node) > 0)
        {
          if ($xp->evaluate('count('.$node->nodeName.')', $node->parentNode) > 1)
            $tmp_array[$node->nodeName][$counter[$node->nodeName]] = xml2Array($node->nodeName."[".($counter[$node->nodeName]+1)."]", "", $node->parentNode);
          else
            $tmp_array[$node->nodeName] = xml2Array($node->nodeName, "", $node->parentNode);
        }
        else
        {
          if ($xp->evaluate('count('.$node->nodeName.')', $node->parentNode) > 1)
            $tmp_array[$node->nodeName][$counter[$node->nodeName]] = $node->nodeValue;
          else
            $tmp_array[$node->nodeName] = $node->nodeValue;
        }
  }
 
  return $tmp_array;
}
?>