building a URL string from parts
Posted: Sat Oct 21, 2006 2:52 pm
Code: Select all
function GetUrls($url)
{
$info = @parse_url($url); // parse the url
$html = $this->temp_everything; // gets what was sent back
if (!$html) // check it's not false
{
return false; // if it is return false
}
$pieces = preg_split ("/(\r\n\r\n|\r\r|\n\n)/", $html, 2); // split the HTML from the headers
$html = $pieces[1]; // save the HTML
unset($pieces); // unset everything else
// find all the urls
preg_match_all("|href\=\"?'?`?([[]:?=&@/;._-]+)\"?'?`?|i", $html, &$matches);
$links = array(); // make an array to store them in
$ret = $matches[1];
for($i=0;isset($ret[$i]);$i++)
{
// if it starts with http:// save it without editing
if(preg_match("|^http://(.*)|i",$ret[$i]))
{
$links[] = $ret[$i];
}
// if it matches ../place.html
elseif(preg_match("|^../(.*)|i",$ret[ $i]))
{
$links[] = 'http://'.$info["host"].''.$info["path"].''.substr($ret[$i], 3);
}
// if it matches ./place.html
elseif(preg_match("|^./(.*)|i",$ret[ $i]))
{
$links[] = 'http://'.$info["host"].''.$info["path"].''.substr($ret[$i], 2);
}
// if it matches /place.html
elseif(preg_match("|^/(.*)|i",$ret[ $i]))
{
$links[] = 'http://'.$info["host"].''.$ret[$i];
}
// if it matches place.html
elseif(preg_match("|^(.*)|i",$ret[ $i]))
{
$links[] = 'http://'.$info["host"].''.$info["path"].''.$ret[$i];
}
// if it maches mailto:
elseif(preg_match("/^mailto:(.*)/i",$ret[$i]))
{
// could save email addresses here
}
}
return $links ; // return the array of links
}I need to reconstruct them into a full URL: http://www.server.com/directory/sub/file.html
The main problem reconstructing URL is with the following two:
Code: Select all
// if it matches ../place.html
elseif(preg_match("|^../(.*)|i",$ret[ $i]))
{
$links[] = 'http://'.$info["host"].''.$info["path"].''.substr($ret[$i], 3);
}
// if it matches ./place.html
elseif(preg_match("|^./(.*)|i",$ret[ $i]))
{
$links[] = 'http://'.$info["host"].''.$info["path"].''.substr($ret[$i], 2);
}