An example is a file is located on http://www.example.org/example/test.php
And on that page it have a link that links to ../lol/file.php
And it should make this URL: http://www.example.org/lol/file.php
This function should work on both absolute and relative URLs, but the base URL must be absolute!
Suggestions wanted to make this better.
Thank you,
the DtTvB
.
Code (See some example and results below)
Code: Select all
<?php
//
// URL Combining Function
// by the DtTvB
// =======================
// This script will make a URL based on a base URL that must be an absolute
// URL, and it will take another argument that can be either relative or
// absolute, and the returned path will be absolute. An example is that you
// have an URL: "http://www.example.org/path/to/file.html" and there is a
// link that links to "../lol.php", and you want to do something that will
// get you the following: "http://www.example.org/path/lol.php", this
// function can help you to do it.
//
// This builds URL from the component.
function urlCombine__BuildURL($u) {
// The return value.
$r = '';
// Any user info?
$s = 0;
// It has a scheme.
if (isset($u['scheme']))
$r .= $u['scheme'] . '://';
// User? Wait, set the user info variable.
if (isset($u['user']))
($r .= $u['user']) + ($s = 1);
// Hey, remember that PHP accepts http://:password@domain/
// Pass? Set the user info also.
if (isset($u['pass']))
($r .= ':' . $u['pass']) + ($s = 1);
// There are user info, we need to add @.
if ($s)
$r .= '@';
// Then host,
if (isset($u['host']))
$r .= $u['host'];
if (isset($u['port']))
$r .= ':' . $u['port'];
// path,
if (isset($u['path']))
$r .= $u['path'] != '' ? $u['path'] : '/';
// query,
if (isset($u['query']))
$r .= '?' . $u['query'];
// and fragment.
if (isset($u['fragment']))
$r .= '#' . $u['fragment'];
// Finished! Now return the URL.
return $r;
}
// This function recieves $old as an absolute URL, and $new as any type of URL.
// Then combine $old and $new to make a final absolute URL.
// Example: BaseRel('http://dttvb.yi.org/test/hello?hi#lowls', '../index.php')
// returns 'http://dttvb.yi.org/index.php'.
function urlCombine($old, $new) {
// New URL is empty, it means the current page. Just return the old page.
if ($new == '') {
return $old;
}
// Parse the URL into components.
$first = @parse_url($old);
$second = @parse_url($new);
if (empty($first) || empty($second)) {
return false;
}
// The new URL has scheme, it must be absolute URL.
if (isset($second['scheme'])) {
// What? It was an absolute URL.
return $new;
}
// If the new URL starts with #, just replace its fragment.
if ($new{0} == '#') {
$oldmark = strpos($old, '#');
return ($oldmark !== false ? substr($old, 0, $oldmark) : $old) . '#' . substr($new, 1);
// Also for query, do it with ?.
} else if ($new{0} == '?') {
$oldmark = strpos($old, '?');
return ($oldmark !== false ? substr($old, 0, $oldmark) : $old) . '?' . substr($new, 1);
// This time for /. Replace the path.
} else if ($new{0} == '/') {
// We don't want to change http://localhost/ to http:/newpath, so we have to do more.
unset ($first['query'], $first['fragment']);
$first['path'] = $new;
return urlCombine__BuildURL($first);
}
// You two are needless now, so go away (to nowhere)!
unset ($first['query'], $first['fragment']);
// Any new query?
if (isset($second['query']))
$first['query'] = $second['query'];
// Fragment?
if (isset($second['fragment']))
$first['fragment'] = $second['fragment'];
// Remove the current file (if any) path (will change /path/to/file.html into /path/to/)
// and add the new path.
$first['path'] = substr($first['path'], 0, strrpos($first['path'], '/') + 1) . $second['path'];
// Well, strip the first '/', we need to split it into segments.
while ($first['path']{0} == '/')
$first['path'] = substr($first['path'], 1);
// Split / into smaller pieces.
$pathf = explode('/', preg_replace('~/+~', '/', $first['path']));
// This will remove useless .s and ..s.
// We should keep it clean, this means changing
// http://dttvb.yi.org/d/t/t/v/b/.././../.././hey/lol/..
// into http://dttvb.yi.org/d/t/hey/
foreach ($pathf as $k => $v) {
// . means current directory, we don't need it.
if ($v == '.') {
unset ($pathf[$k]);
// .. means parent directory, remove itself and it's parent (if any).
} else if ($v == '..') {
unset ($pathf[$k]);
for ($i = $k - 1; $i >= 0; $i --) {
if (isset($pathf[$i])) {
unset ($pathf[$i]);
break;
}
}
}
}
// We are finished, combine its path back and re-add / to the front.
$first['path'] = '/' . implode('/', $pathf);
// If the last value is . or .. we need to bu sure it is a directory.
if ($v == '.' || $v == '..')
$first['path'] .= '/';
// And it's the time to build the URL!
return urlCombine__BuildURL($first);
}
?>Some tests (in the following format: base + link = real)
Code: Select all
http://dttvb.yi.org:8081/path/to/file.html
+ ../../d/t/t/v/b/.././../.././hey/lol/../
= http://dttvb.yi.org:8081/d/t/hey/
http://dttvb.yi.org/path/to/file.html
+ .
= http://dttvb.yi.org/path/to/
http://dttvb.yi.org/path/to/file.html
+ /
= http://dttvb.yi.org/
http://dttvb.yi.org/path/to/file.html
+ ?yo
= http://dttvb.yi.org/path/to/file.html?yo
http://dttvb.yi.org/path/to/file.html
+ #cool
= http://dttvb.yi.org/path/to/file.html#cool
http://sasd@dttvb.yi.org/path/to/file.html?yo
+ ?wow#cool
= http://sasd@dttvb.yi.org/path/to/file.html?wow#cool
http://sasd:lowl@dttvb.yi.org/path/to/file.html?yo
+ /?wow#cool
= http://sasd:lowl@dttvb.yi.org/?wow#cool
http://:lowl@dttvb.yi.org/path/to/file.html?yo
+ /index.php
http://:lowl@dttvb.yi.org/index.php
http://dttvb.yi.org/path/to/file.html?yo
+ http://www.google.com/
= http://www.google.com/
file:///etc/passwd
+ shadow
= file:///etc/shadow
file:///etc/passwd
+ ../home/dttvb/file.txt
= file:///home/dttvb/file.txt
http://www.google.com/
+ javascript:alert('Wow');
= javascript:alert('Wow');