read md5 directory recursively and return duplicate files
Posted: Mon Aug 30, 2010 9:41 pm
Hey there, I'm trying to write a script which takes in an md5 hash, reads a directory recursively and returns files in the directory with similar md5 hashes.
I've managed to get the user input as well as the md5 hash of the files recursively, but I've got a problem with trying to compare the input value to the indexed array of md5 values.
I've tried comparing it as a string to an array, as well as from an index array to another using the in_array, diff_array, array_search and array_intersect functions but could not get it to work.
Here's what I've got so far.
Any form of help is really much appreciated, I'm pretty new to this so I hope you guys could point me in the right direction! Thanks!
I've managed to get the user input as well as the md5 hash of the files recursively, but I've got a problem with trying to compare the input value to the indexed array of md5 values.
I've tried comparing it as a string to an array, as well as from an index array to another using the in_array, diff_array, array_search and array_intersect functions but could not get it to work.
Here's what I've got so far.
Code: Select all
<?php
set_time_limit(99999);
ini_set("max_execution_time",99999);
print "Input your md5 hash. \n";
{
$fh = fopen('php://stdin', 'r');
$last_line = false;
$message = '';
$fh_string = fgets($fh, 1024); // read the special file to get the user input from keyboard
function findDuplicateFiles($dirName){
global $fh_string;
$fh_array = array("$fh_string");
$dirName=trim($dirName);
if(empty($dirName)){ die("Fatal Error 0x01: Directory Name can NOT be empty"); }
if(!is_dir($dirName)){ die("Fatal Error 0x02: $dir is not a valid or readable Directory"); }
$filesArray=parseDirectory($dirName);
$c=count($filesArray);
for($i=0;$i<$c;$i++){
$md5FilesArray[$i]=md5_file($filesArray[$i]);
}
$duplicateFilesArray=array();
$duplicateFiles=array_values($md5FilesArray);
$identical=array_intersect_assoc($fh_array, $md5FilesArray);
print_r($identical);
if($identical==1){
foreach($duplicateFiles as $key=>$value){
if($value!==1){
$names=array_values($md5FilesArray);
$duplicate=array();
foreach($names as $name){
$duplicate[]=$filesArray[$name];
}
$duplicateFilesArray[]=$duplicate;
}
}
}
return $duplicateFilesArray;
}
function parseDirectory($rootPath,$returnOnlyFiles=true, $seperator="/"){
$fileArray=array();
if (($handle = opendir($rootPath))!==false) {
while( ($file = readdir($handle))!==false) {
if($file !='.' && $file !='..'){
if (is_dir($rootPath.$seperator.$file)){
$array=parseDirectory($rootPath.$seperator.$file);
$fileArray=array_merge($array,$fileArray);
if($returnOnlyFiles!==true){
$fileArray[]=$rootPath.$seperator.$file;
}
}
else {
$fileArray[]=$rootPath.$seperator.$file;
}
}
}
}
return $fileArray;
}
}
$dir="C:\Documents and Settings\Administrator\Desktop\php-5.3.3-Win32-VC6-x86/";
$duplicateFiles=findDuplicateFiles($dir);
//Display Results
$c=count($duplicateFiles);
if($c===0){ echo "No Duplicate Files were found"; }
else{
echo "Duplicate files found in ";
$col=0;
foreach($duplicateFiles as $duplicate){
$c=count($duplicate);
for($a=0;$a<$c;$a++){
$file=$duplicate[$a];
echo $file;
}
$col++;
}
}
?>