I am trying to use the program "site_search.php" for an intranet. It keeps coming back as "no pages found. I think it has to do with the - $search_dirs = array() - line. Shouldn't it search the whole directory and all the sub folders if I leave it blank? I have tried so many paths and links already. I am not sure if the path has to be slightly different because It is searching through an intranet. Also, what exactly does the DOCUMENT_ROOT refer to. The root of the whole server or the directory that the code is in? Any help would be greatly appreciated!! The code is shown below.
Code: Select all
<?php
function pc_search_dir ($dir){
global $body_regex,$title_regex,$seen;
$pages = array();
$dir = array();
$seen[realpath($dir)] = true;
if (is_readable($dir) && ($d = dir($dir))) {
while (false !== ($f = $d->read())) {
$path = $d->path.'/'.$f;
if (is_file($path) && is_readable($path)) {
$realpath = realpath($path);
if ($seen[$realpath]){
continue;
}else{
$seen[$realpath] = true;
}
$file = join('',file($path));
if (preg_match($body_regex,$file)) {
$uri = substr_replace($path,'',0,strlen($_SERVER['DOCUMENT_ROOT']));
if (preg_match('#<title>(.*?)</title>#Sis',$file,$match)) {
array_push($pages,array($uri,$match[1]));
}else{
array_push($pages,array($uri,$uri));
}
}
}else{
if (is_dir($path) && ('.' != $f) && ('..' != $f)) {
array_push($dirs,$path);
}
}
}
$d->close();
}
foreach ($dirs as $subdir) {
$realdir = realpath($subdir);
if (! $seen[$realdir]){
$seen[$realdir] = true;
$pages = array_merge($pages,pc_search_dir($subdir));
}
}
return $pages;
}
function pc_page_sort($a,$b) {
if ($a[1] == $b[1]) {
return strcmp($a[0],$b[0]);
}else{
return ($a[1] > $b[1]);
}
}
$matching_pages = array();
$seen = array();
$search_dirs = array ();
$body_regex = '#<body>(.*' .preg_quote($_REQUEST['term'],'#').'.*)</body>#Sis';
foreach ($search_dirs as $dir) {
$matching_pages = array_merge($matching_pages, pc_search_dir($_SERVER['DOCUMENT_ROOT'].'/'.$dir));
}
if (count($matching_pages)){
usort($matching_pages,'pc_page_sort');
print '<ul>';
foreach ($matching_pages as $k => $v) {
print sprintf('<li> <a href="%s">%s</a>',$v[0],$v[1]);
}
print '</ul>';
}else {
print 'No pages found. Please try your search again';
}
?>