I was bored enough today to try this out.
text_attr.php
Code: Select all
<?php
/**
* Text_Attr Class
*
* @author Zoxive - kyle@zoxive.com
* @date 27th August 2009
*/
class Text_Attr{
protected $doc = NULL;
protected $dom = NULL;
protected $url = NULL;
/**
* Constructs a new Text_Attr object.
*
* @param string HTML document
* @param string URL of such document, if not local
* @return void
*/
public function __construct($doc = NULL, $url = NULL)
{
$this->doc = $doc;
$this->dom = DOMDocument::loadHTML($doc);
$this->url = $url;
}
/**
* Locates the Attribute inside the document
*
* @param string name of HTML ID
* @param string name of HTML/CSS Attr
* @return string value of Attr
*/
public function get_attr($selector, $attr)
{
if(
// Inline html
($Result = $this->inline_attr($selector,$attr)) ||
// Inline CSS
($Result = $this->inline_css_attr($selector,$attr)) ||
// Parent html
($Result = $this->parent_attr($selector,$attr)) ||
// Parent CSS
($Result = $this->parent_css_attr($selector,$attr)) ||
// Embedded CSS
($Result = $this->embedded_css_attr($selector,$attr)) ||
// Linked CSS
($Result = $this->linked_css_attr($selector,$attr))
)
return $Result;
else
return NULL;
}
/**
* Searches the local dom for the Attr
*
* @param string name of HTML ID
* @param string name of HTML/CSS Attr
* @return string value of Attr
*/
protected function inline_attr($selector, $attr)
{
//return pq($selector)->attr($attr);
$ele = $this->dom->getElementById($selector);
if(!$ele) return NULL;
if($ele->hasAttribute($attr))
return $ele->getAttribute($attr);
}
/**
* Looks at inline CSS for the Attr
*
* @param string name of HTML ID
* @param string name of HTML/CSS Attr
* @return string value of Attr
*/
protected function inline_css_attr($selector, $attr, $ele = NULL)
{
if(empty($ele))
$ele = $this->dom->getElementById($selector);
if(empty($ele)) return NULL;
if($ele->hasAttribute('style')){
$style = $ele->getAttribute('style');
$style = split(':',$style);
if(strtolower($attr) == strtolower($style[0]))
return trim($style[1]);
}
}
/**
* Looks at all the parents for the attr
*
* @param string name of HTML ID
* @param string name of HTML/CSS Attribute
* @return string value of Attr
*/
protected function parent_attr($selector, $attr)
{
$ele = $this->dom->getElementById($selector);
while($ele = $ele->parentNode){
if(is_a($ele,'DOMELEMENT') && ($result = $ele->getAttribute($attr))) return $result;
}
}
/**
* Looks at all parents for a inline CSS Attr
*
* @param string name of HTML ID
* @param string name of HTML/CSS Attribute
* @return string value of Attr
*/
protected function parent_css_attr($selector, $attr)
{
$ele = $this->dom->getElementById($selector);
while($ele = $ele->parentNode){
if(is_a($ele,'DOMELEMENT') && ($result = $this->inline_css_attr($selector,$attr,$ele))) return $result;
}
}
/**
* Searches for an embedded style for the given ID
*
* @param string name of HTML ID
* @param string name of HTML/CSS Attribute
* @return string value of Attr
*/
protected function embedded_css_attr($selector, $attr)
{
$css = $this->get_embedded_css();
$css = $this->prep_css($css);
return $this->css_array_id_value($css,$selector,$attr);
}
/**
* Looks for attr in linked css files
*
* @param string name of HTML ID
* @param string name of HTML/CSS Attribute
* @return string value of Attr
*/
protected function linked_css_attr($selector, $attr)
{
$css = $this->get_linked_css();
$css = explode("\n",$css);
unset($css[count($css)-1]);
$merged = '';
if(!@file_get_contents($css[0])){
//die('<strong>ERROR:</strong><br/>CSS finder isnt smart enough yet to find the location of the linked CSS files.');
$domain = $this->find_url_root();
}
foreach($css as $each){
if(isset($domain)){
$merged.= file_get_contents($domain.'/'.$each)."\n";
}else{
$merged.= file_get_contents($each)."\n";
}
}
$css = $this->prep_css($merged);
return $this->css_array_id_value($css,$selector,$attr);
}
/**
* Return Attr value from Css Array
*
* @param array css array
* @param string id value
* @param string attr to get
* @return string attr value
*/
protected function css_array_id_value($css, $selector, $attr)
{
return isset($css['#'.$selector][$attr])? $css['#'.$selector][$attr] : NULL;
}
/**
* Looks for linked CSS files
*
* @return string names of CSS files
*/
protected function get_linked_css()
{
$css = $this->dom->getElementsByTagName('link');
$length = $css->length;
$merged = '';
for($i=0;$i<$length; $i++){
$merged.= $css->item($i)->getAttribute('href') . "\n";
}
return $merged;
}
/**
* Converts given CSS into an array
*
* @param css string Css
* @return array array of CSS
*/
protected function prep_css($css)
{
$css_array = array();
// Remove comments
$css = preg_replace('/\/\*(.*)?\*\//','',$css);
$css = explode('}',$css);
// Remove empty
unset($css[count($css)-1]);
foreach($css as $ea){
$param_array = array();
$ea = explode('{',$ea);
$name = trim($ea[0]);
$value = trim($ea[1]);
$ea_atr = explode(';',$value);
// Remove empty
unset($ea_atr[count($ea_atr)-1]);
if(empty($ea_atr)) continue;
foreach($ea_atr as $parm){
$param = explode(':',$parm);
$param_array[trim($param[0])] = trim($param[1]);
}
// Cascade
// If it exists merge with the newest value dominate.
if(isset($css_array[$name]))
$css_array[$name] = array_merge($css_array[$name],$param_array);
else
$css_array[$name] = $param_array;
}
return $css_array;
}
/**
* Grabs all <style> in the document
*
* @return string all of the styles merged together
*/
protected function get_embedded_css()
{
$css = $this->dom->getElementsByTagName('style');
$length = $css->length;
$merged = '';
for($i=0;$i<$length; $i++){
$merged.= $css->item($i)->nodeValue . "\n";
}
return $merged;
}
/**
* Finds the root of the Document
* If linked styles dont have the relative path, we have to guess
*
* @return string url relative path
*/
protected function find_url_root()
{
$url = addslashes(urldecode($this->url));
$url = explode('/',$url);
unset($url[count($url)-1]);
return implode($url,'/');
}
}
testcolor.php
Code: Select all
<?php
include('text_attr.php');
/*
$url = 'example5.html';
$selector = 'mytext';
*/
$url = addslashes($_GET['url']);
$selector = addslashes($_GET['id']);
$html = file_get_contents($url);
$text = new Text_Attr($html,$_GET['url']);
echo '<strong>URL:</strong> ' .$url;
echo "\n<br/>\n";
echo '<strong>ID:</strong> ' . $selector;
echo "\n<br/>\n<br/>\n";
var_dump($text->get_attr($selector, 'color'));
Examples found at :
http://textcolor.zoxive.com/examples.html (I just made this CNAME a few hours ago, so it may not work yet for you..)
Few things..
My code is pretty sloppy and I took a few short cuts (explodes and such) which result into some inaccuracies down the line. I haven't really coded anything from scratch recently. Most of my recent work has been done inside a MVC framework environment.
I did not validate any of the data coming in, since this most likely will not be used in the way my examples are. Cascading Styles
*kind of* work. As far as a new attribute under the same ID is added. It does not have the ability for multiple names/ids yet. ex: #myid, #someother { color:#FF0000; }. I believe i can add the functionality by having regex search threw my CSS array, or rewrite how It currently finds values in css.
I spent about 4 hours on this today

, maybe tomorrow I'll get around to messing with real cascading.
Edit: Found a typo in the code