Page 1 of 1

Cant we reduce the parse time of 381MB file from 38 minutes

Posted: Sat Jul 31, 2004 2:21 am
by perumal
NO CROSS POSTING

feyd | Please use

Code: Select all

tags when posting code. Read:  [url=http://forums.devnetwork.net/viewtopic.php?t=21171]Posting Code in the Forums[/url][/color]


Actually im parsing a 381MB log file using php script .

Its taking too much time arounf 38 minutes.


is there any way to reduce it to below 5 minutes

__________ my code below

Code: Select all

class FileReader
{

var $filename;
var $totallines;
var $filepointer;
var $filesize;
var $errors;
var $count;
var $mFilterString;
var $uploadpath;
/*
var $datas1;
var $datas2;
var $datas3;
var $datas4;

var $position;
var $content;

*/


function __construct()
{
$this->filename='';
$this->totallines=0;
$this->filepointer='';
$this->filesize=0;
$this->errors='';
$this->count=0;
$this->uploadpath='';
$this->mFilterString='';
}

# +----------------------------------------------------------------------+
# | Create instance of FileReader takes filename as a input parameter |
# +----------------------------------------------------------------------+


function NewFileReader($filename)
{

/*
$this->datas1=array();
$this->datas2=array();
$this->datas3=array();
$this->datas4=array();
$this->position=-1;
$this->content="";
*/
$this->filename="";
$this->totallines=0;
$this->filepointer=NULL;
$this->filesize=0;
$this->errors="";
$this->count=0;


if(is_file($filename))
{
$this->filename=$filename;
}
else
{
$this->errors="This is not a file......";


}

# +----------------------------------------------------------------------+
# | If the File is exist ,find the size of the file & open for read mode | |
# +----------------------------------------------------------------------+

if (file_exists($this->filename))
{


$this->filesize=filesize($this->filename);
$this->filepointer = fopen ($this->filename, "r+");
rewind($this->filepointer);


# +----------------------------------------------------------------------+
# | Find the Total Lines from the File |
# +----------------------------------------------------------------------+

while(!feof($this->filepointer))
{
fgets($this->filepointer );
$this->totallines++;
}

rewind($this->filepointer);
}
else
{
$this->errors="File not Exsist.......";

}



}


# +-----------------------------------------------------------------------------------+
# | DoParse function filter the Datas into ip ,url ,time,browser type,referrer, etc., |
# | Note: This Parser will we perform only upto 400000 records |
# +-----------------------------------------------------------------------------------+

function DoParse($str)
{
$astring= str_replace("[en]","",$str);
$astring= str_replace("POST","GET",$astring );
$astring= str_replace("HEAD","GET",$astring );

$astring = preg_replace("/(\n|\r|\t)/", " ", $astring);

$records = preg_split("/(\n)/", $astring, -1, PREG_SPLIT_DELIM_CAPTURE);


$sizerecs = sizeof($records);


$i = 0;


$fpp=fopen($this->uploadpath."/databank","a+");
while($i<$sizerecs)
{




// $ip = $records[$i];
$all = $records[$i];

preg_match("/([0-9]{1,3}+\.[0-9]{1,3}+\.[0-9]{1,3}+\.[0-9]{1,3})/",$all,$match);
$ip=$match[0];

preg_match("/\[(.+)\]/", $all, $match);
$access_time = $match[1];

//take care
// preg_match("/(.*)(\+|\-(.*))/",$access_time,$match);
// $access_time = $match[1];
;
//print_r($all);
// $all = str_replace($match[1], "", $all);

$date_time=explode("/",trim(str_replace(":","/",$access_time )));



switch(trim(strtolower($date_time[1])))
{
case "jan":
$mm=1;
break;
case "feb":
$mm=2;
break;
case "mar": 
$mm=3;
break;
case "apr":
$mm=4;
break; 
case "may":
$mm=5;
break;
case "jun":
$mm=6;
break;
case "jul":
$mm=7;
break;
case "aug":
$mm=8;
break;
case "sep":
$mm=9;
break;
case "oct":
$mm=10;
break;
case "nov":
$mm=11;
break;
case "dec":
$mm=12;
break;
default:
break;

}






$dd=$date_time[0];

$yyyy=$date_time[2];
$hh=$date_time[3];
$min=$date_time[4];
$sec=$date_time[5];

$access_time=$yyyy."-".$mm."-".$dd." ".$hh.":".$min.":".$sec;



preg_match("/"GET (.[^"]+)/", $all, $match);


$http = $match[1];
$link = explode(" ", $http);
$all = str_replace(""GET $match[1]"", "", $all);
$all=substr($all,strpos($all,"[]"));



$pu=parse_url(trim($link[0]));

$page=$pu[path];
$querystring=$pu[query];




preg_match("/(( [0-9]{3})\s([0-9]+\b|-))/", $all, $match);
$success_code = $match[2];
$bytes = $match[3];
$all = str_replace($match[1], "", $all);
$all = str_replace("GET", "", $all);

if(!$bytes || $bytes=="-")
{
$bytes=0;
}



preg_match("/"(.[^"]+)/", $all, $match);

$ref = $match[1];
$all = str_replace(""$match[1]"", "", $all);
preg_match("/"(.[^"]+)/", $all, $match);
$browser = trim($match[1]);
$all = str_replace(""$match[1]"", "", $all);



if($browser==' ' || $browser==NULL ) 
{
$browser=$ref;
$ref="";
}
else

{
preg_match("/(http:\/\/)+(.*)/",trim($ref),$match);
if($match[2]=="")
{
$ref="http://localhost/";
} 

$preff=parse_url($ref); 
$referrer=$preff[host];

$str=strtolower($page);
} 


$mdatas="NULL\t";
$mdatas.="".addslashes(trim($ip))."\t";
$mdatas.="".addslashes(trim($page))."\t";
$mdatas.="".addslashes(trim($querystring))."\t";
$mdatas.="".addslashes(trim($referrer))."\t";
$mdatas.="".addslashes(trim($ref))."\t";
$mdatas.="".addslashes(trim($browser))."\t";
$mdatas.="".addslashes(trim($access_time))."\t";
$mdatas.=trim($success_code)."\t";
$mdatas.=trim($bytes)."\t";
$mdatas.="".addslashes(trim($cookies));
$mdatas.="\n\r";
fwrite($fpp,$mdatas);


// ob_end_flush();

$this->count++;
$i = $i + 2;
$all="";
}


fclose($fpp);
}//end DoParse function




# +-----------------------------------------------------------------------------------+
# | FirstLine function Read the first line of the file |
# +-----------------------------------------------------------------------------------+

function FirstLine()
{

if(!$this->filepointer)
{

$this->filepointer = fopen($this->filename, "r+");
rewind($this->filepointer);
$this->content = fgets($this->filepointer);
if($this->content!="" && $this->content && !(preg_match("/$this->mFilterString|\.js|\.gif|\.jpeg|\.jpg|\.css|\.swf|\.ico/", $this->content)) )
$this->DoParse($this->content);

}
else
{
$this->content = fgets($this->filepointer);
if($this->content!="" && $this->content && !(preg_match("/$this->mFilterString|\.js|\.gif|\.jpeg|\.jpg|\.css|\.swf|\.ico/", $this->content)) )
$this->DoParse($this->content);

}


}

# +-----------------------------------------------------------------------------------+
# |NextLine function Read the next line of the file |
# +-----------------------------------------------------------------------------------+


function NextLine()
{
if(!$this->filepointer)
{

$this->filepointer = fopen($this->filename, "r+");
rewind($this->filepointer);
$this->content = fgets($this->filepointer);
if($this->content!="" && $this->content && !(preg_match("/$this->mFilterString|\.js|\.gif|\.jpeg|\.jpg|\.css|\.swf|\.ico/", $this->content)) )
$this->DoParse($this->content);

}
else
{
$this->content = fgets($this->filepointer);
if($this->content!="" && $this->content && !(preg_match("/$this->mFilterString|\.js|\.gif|\.jpeg|\.jpg|\.css|\.swf|\.ico/", $this->content)) )
$this->DoParse($this->content);

}

}


# +--------------------------------------------------------------------------------------------------+
# | This Destructor function will be called automatically when the object is going to be destroyed |
# +--------------------------------------------------------------------------------------------------+

function __FileReader()
{

if($this->filepointer!='' )
{
fclose($this->filepointer);
}

}

function toString()
{

}
function totallines()
{
echo $this->totallines;
}


function __destruct()
{
if($this->filepointer!='' )
{
fclose($this->filepointer);
}
unset($this);
}

} // End of the FileReader Class
___________________





Thanks
Perumal.


feyd | Please use

Code: Select all

tags when posting code. Read:  [url=http://forums.devnetwork.net/viewtopic.php?t=21171]Posting Code in the Forums[/url][/color]

Posted: Sat Jul 31, 2004 9:38 am
by Buddha443556
In DoParse() this might be slowing you down some - opening and closing the databank file for every line parsed:

Code: Select all

$fpp=fopen($this->uploadpath."/databank","a+");
That file must get pretty big? I doubt fixing this will get you anywhere near your 5 minute goal though.