all links from website
Posted: Tue Dec 23, 2008 4:53 pm
Hello,
I am trying to get all links from website. Below is the code i tried. I am getting Allowed memory size of 8388608 bytes exhausted. If there is any alteration has to be done in the code please let me know.
Thanks.
I am trying to get all links from website. Below is the code i tried. I am getting Allowed memory size of 8388608 bytes exhausted. If there is any alteration has to be done in the code please let me know.
Code: Select all
<?php
$first_link = mysql_query("SELECT * FROM urls WHERE url='http://localhost/mysite/'");
if(mysql_num_rows($first_link) == 0)
{
mysql_query("INSERT INTO urls SET url='http://localhost/mysite/'");
}
callMain();
function callMain()
{
usleep(100000);
$get_links_res = mysql_query("SELECT * FROM urls WHERE depth < 2");
if(mysql_num_rows($get_links_res) >0)
{
while($get_links_ret = mysql_fetch_assoc($get_links_res))
{
$url = $get_links_ret['url'];
$url_id = $get_links_ret['url_id'];
$fl = @fopen($url, "r");
if ($fl) {
while ($buffer = @fgets($fl, 4096)) {
$contents .= $buffer;
}
} else {
echo 'error in reading file <br/>';
}
fclose ($fl);
preg_match_all("/href\s*=\s*[\'\"]?([+:%\/\?~=&;\\\(\),._a-zA-Z0-9-]*)(#[.a-zA-Z0-9-]*)?[\'\" ]?(\s*rel\s*=\s*[\'\"]?(nofollow)[\'\"]?)?/i", $contents, $regs, PREG_SET_ORDER);
mysql_query("UPDATE urls SET depth = depth+1 WHERE url='$url'");
foreach($regs as $val)
{
$check_outer_links_res = mysql_query("SELECT * FROM urls WHERE url = '$val[1]'");
if(mysql_num_rows($check_outer_links_res)== 0)
{
echo "INSERT INTO urls SET url = '$val[1]' <br/>";
mysql_query("INSERT INTO urls SET url = '$val[1]'");
}
}
}
callMain();
}
}