I have tried to think of as many combinations of strange URL's as possible but might not have thought of everything. Naturally, the URL's need to be well-formed, but thats another function, and another late night!
Two points:
If a link starts #, the same page will be viewed, just from a different point, the URL is not really different.
If a link has too many ../ what is suppose to happen? This function just places the page at the end of the host name.
Code: Select all
<?php
function construct_url($base_url, $link_url)
{
// remove anything after #
$link_url = preg_replace("/\#.*/", "", $link_url);
if ($link_url == "")
{
return $base_url;
}
else
{
// if link starts http://, it is a fully specified url
if (starts_with($link_url, "http:\/\/"))
{
return $link_url;
}
else
{
// if link starts with root directory, append to root
if (starts_with($link_url, "\/"))
{
// Split URL into components
$base_url = parse_url($base_url);
$full_url = $base_urlї"scheme"] . "://"
. $base_urlї"host"]
. $link_url;
return $full_url;
}
else
{
// if link starts with query, append to path
if (starts_with($link_url, "\?"))
{
// Split URL into components
$base_url = parse_url($base_url);
$full_url = $base_urlї"scheme"] . "://"
. $base_urlї"host"];
if ($base_urlї"path"] == "")
{
$full_url .= "/" . $link_url;
}
else
{
$full_url .= $base_urlї"path"] . $link_url;
}
return $full_url;
}
else
{
// if link starts ../ do directory work
if (starts_with($link_url, "\.\.\/"))
{
// Split URL into components
$link_url = parse_url($link_url);
// Count number of levels to move up
$levels = substr_count($link_urlї"path"], "../");
// Remove ../ from beginning of link_url
// starting at 3 * levels (../)
$link_urlї"path"] = substr($link_urlї"path"], $levels*3);
// Split URL into components
$base_url = parse_url($base_url);
// split path at directory seperator (/)
$directory_parts = explode("/", $base_urlї"path"]);
// remove last element
// SHOULD be file name or directory ending slash (/)
array_pop($directory_parts);
// remove (pop) number of directories from base_url
// that is required by link_url
for ($index = 0 ; $index < $levels ; $index++)
{
array_pop($directory_parts);
}
// join elements of base_url directory with seperator (/)
$directory = join("/", $directory_parts);
// Re-construct full_url
$full_url = $base_urlї"scheme"] . "://"
. $base_urlї"host"]
. $directory . "/"
. $link_urlї"path"];
if ($link_urlї"query"] != "")
{
$full_url .= "?" . $link_urlї"query"];
}
if ($link_urlї"fragment"] != "")
{
$full_url .= "#" . $link_urlї"fragment"];
}
return $full_url;
}
else
{
// file in current directory
// remove the first occurance of ./ from link if nessesary
// preg_replace(pattern, replacement, subject, limit)
$link_url = preg_replace("/^\.\//", "", $link_url , 1);
// Split URL into components
$base_url = parse_url($base_url);
$path = $base_urlї"path"];
// split string at directory seperator (/)
$directory_parts = explode("/", $path);
// remove last element
// SHOULD be file name if directory ends slash (/)
array_pop($directory_parts);
// join elements of directory with seperator (/)
$directory = join("/", $directory_parts);
$full_url = $base_urlї"scheme"] . "://"
. $base_urlї"host"]
. $directory . "/"
. $link_url;
return $full_url;
}
}
}
}
}
}
function starts_with($string, $regex)
{
$pattern = "/(^" .$regex. ")/";
return preg_match($pattern, $string);
}
function ends_with($string, $regex)
{
$pattern = "/(" .$regex. "$)/";
return preg_match($pattern, $string);
}
?>