Page 1 of 1

Program Stops after 10 Minutes

Posted: Wed May 26, 2004 8:15 am
by chvol
I am trying to run my PHP program that uses class Snoopy to access the HTML of web pages. It goes through about 300 pages for 10 minutes, then stops. A program that uses only CPU, periodically writing the current time, runs for hours with no problem. If I can’t fix it, an alternate solution would be for another program to start it up, and restart it after it stops, if possible.

Charlie chvol@aol.com

Posted: Wed May 26, 2004 8:43 am
by lostboy
Is it the script timing out? That can be set with the max_execution time in the ini file

Posted: Wed May 26, 2004 9:26 am
by malcolmboston
if it says something to the effect of:

"max execution time exceeded"

then its definitely can be changed in php.ini, however 10 minutes for a sript seems to be a very long time, so maybe theres something wrong?

Posted: Wed May 26, 2004 10:31 am
by kettle_drum

Code: Select all

set_time_limit(0);

Posted: Wed May 26, 2004 6:52 pm
by chvol
1. max_executiontime=0
2. no set_time_limit
3. no error messages
4. the time it takes is random (even when rerun)
5. Remember: the program with only CPU use runs indifinately - for hours.
6. It is going through thousands of URLs

Posted: Wed May 26, 2004 7:17 pm
by jason
Maybe you could show some code?

Posted: Wed May 26, 2004 7:30 pm
by lostboy
Could it be some network issue that isn't handled correctly? Some unanticipated header or protocol not defined in the code? Just tossing out some ideas?

programs using

Posted: Wed May 26, 2004 7:34 pm
by chvol
Main Program:

Code: Select all

include "Class Fetch http://www.php" ;

function freq($word,$se) { #  Frequency of Word (Google)
$spar=spar($se,$word) ; $url=$spar[0] ; $prompt=$spar[1] ;
$start=$spar[2] ; $end=$spar[3] ;
$webpg = new webpage ; $res=$webpg->html($url) ; 
$html=strtr(strtolower($res["results"]),array(chr(9)=>"",chr(10)=>"",chr(13)=>"")) ;
$fr=strpos($html,$prompt) ; if (!$fr) return "?" ;
$to=strpos($html,$start,$fr) ; if (!$to) return "?" ;
$freq="" ; $cont=1 ;
for ($to+=strlen($start);$cont;++$to) { $ch=$html[$to] ;
      if (($ch=="0")||($ch>0)) { $freq.=$ch ; }
  elseif ($ch==$end) { $cont=0 ; }
  elseif (!($ch==",")) {$freq="?" ; $cont=0 ; } ; } ;
return $freq ; }

Class Fetch http://www.php:

include "snoopy.class.inc";

Function html($url) #   URL => HTML
{ $snoopy=new snoopy ; $snoopy->fetch($url) ;
  return array("results"=>$snoopy->results,"error"=>$snoopy->error) ;}

snoopy.class.inc:


	function fetch($URI)
	{
	
		//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
		$URI_PARTS = parse_url($URI);
		if (!empty($URI_PARTS["user"]))
			$this->user = $URI_PARTS["user"];
		if (!empty($URI_PARTS["pass"]))
			$this->pass = $URI_PARTS["pass"];
				
		switch($URI_PARTS["scheme"])
		{
			case "http":
				$this->host = $URI_PARTS["host"];
				if(!empty($URI_PARTS["port"]))
					$this->port = $URI_PARTS["port"];
				if($this->_connect($fp))
				{
					if($this->_isproxy)
					{
						// using proxy, send entire URI
						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
					}
					else
					{
						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
						// no proxy, send only the path
						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
					}
					
					$this->_disconnect($fp);

					if($this->_redirectaddr)
					{
						/* url was redirected, check if we've hit the max depth */
						if($this->maxredirs > $this->_redirectdepth)
						{
							// only follow redirect if it's on this site, or offsiteok is true
							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
							{
								/* follow the redirect */
								$this->_redirectdepth++;
								$this->lastredirectaddr=$this->_redirectaddr;
								$this->fetch($this->_redirectaddr);
							}
						}
					}

					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
					{
						$frameurls = $this->_frameurls;
						$this->_frameurls = array();
						
						while(list(,$frameurl) = each($frameurls))
						{
							if($this->_framedepth < $this->maxframes)
							{
								$this->fetch($frameurl);
								$this->_framedepth++;
							}
							else
								break;
						}
					}					
				}
				else
				{
					return false;
				}
				return true;					
				break;
			case "https":
				if(!$this->curl_path || (!is_executable($this->curl_path)))
					return false;
				$this->host = $URI_PARTS["host"];
				if(!empty($URI_PARTS["port"]))
					$this->port = $URI_PARTS["port"];
				if($this->_isproxy)
				{
					// using proxy, send entire URI
					$this->_httpsrequest($URI,$URI,$this->_httpmethod);
				}
				else
				{
					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
					// no proxy, send only the path
					$this->_httpsrequest($path, $URI, $this->_httpmethod);
				}

				if($this->_redirectaddr)
				{
					/* url was redirected, check if we've hit the max depth */
					if($this->maxredirs > $this->_redirectdepth)
					{
						// only follow redirect if it's on this site, or offsiteok is true
						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
						{
							/* follow the redirect */
							$this->_redirectdepth++;
							$this->lastredirectaddr=$this->_redirectaddr;
							$this->fetch($this->_redirectaddr);
						}
					}
				}

				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
				{
					$frameurls = $this->_frameurls;
					$this->_frameurls = array();

					while(list(,$frameurl) = each($frameurls))
					{
						if($this->_framedepth < $this->maxframes)
						{
							$this->fetch($frameurl);
							$this->_framedepth++;
						}
						else
							break;
					}
				}					
				return true;					
				break;
			default:
				// not a valid protocol
				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
				return false;
				break;
		}		
		return true;
	}
edit patrikG: added

Code: Select all

-tags for readability

Posted: Wed May 26, 2004 7:41 pm
by John Cartwright
only added php tags for readability

Code: Select all

<?php
include "Class Fetch http://www.php" ; 

function freq($word,$se) { # Frequency of Word (Google) 
$spar=spar($se,$word) ; $url=$spar[0] ; $prompt=$spar[1] ; 
$start=$spar[2] ; $end=$spar[3] ; 
$webpg = new webpage ; $res=$webpg->html($url) ; 
$html=strtr(strtolower($res["results"]),array(chr(9)=>"",chr(10)=>"",chr(13)=>"")) ; 
$fr=strpos($html,$prompt) ; if (!$fr) return "?" ; 
$to=strpos($html,$start,$fr) ; if (!$to) return "?" ; 
$freq="" ; $cont=1 ; 
for ($to+=strlen($start);$cont;++$to) { $ch=$html[$to] ; 
if (($ch=="0")||($ch>0)) { $freq.=$ch ; } 
elseif ($ch==$end) { $cont=0 ; } 
elseif (!($ch==",")) {$freq="?" ; $cont=0 ; } ; } ; 
return $freq ; } 

Class Fetch http://www.php: 

include "snoopy.class.inc"; 

Function html($url) # URL => HTML 
{ $snoopy=new snoopy ; $snoopy->fetch($url) ; 
return array("results"=>$snoopy->results,"error"=>$snoopy->error) ;} 

snoopy.class.inc: 


function fetch($URI) 
{ 

//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); 
$URI_PARTS = parse_url($URI); 
if (!empty($URI_PARTS["user"])) 
$this->user = $URI_PARTS["user"]; 
if (!empty($URI_PARTS["pass"])) 
$this->pass = $URI_PARTS["pass"]; 

switch($URI_PARTS["scheme"]) 
{ 
case "http": 
$this->host = $URI_PARTS["host"]; 
if(!empty($URI_PARTS["port"])) 
$this->port = $URI_PARTS["port"]; 
if($this->_connect($fp)) 
{ 
if($this->_isproxy) 
{ 
// using proxy, send entire URI 
$this->_httprequest($URI,$fp,$URI,$this->_httpmethod); 
} 
else 
{ 
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 
// no proxy, send only the path 
$this->_httprequest($path, $fp, $URI, $this->_httpmethod); 
} 

$this->_disconnect($fp); 

if($this->_redirectaddr) 
{ 
/* url was redirected, check if we've hit the max depth */ 
if($this->maxredirs > $this->_redirectdepth) 
{ 
// only follow redirect if it's on this site, or offsiteok is true 
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 
{ 
/* follow the redirect */ 
$this->_redirectdepth++; 
$this->lastredirectaddr=$this->_redirectaddr; 
$this->fetch($this->_redirectaddr); 
} 
} 
} 

if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 
{ 
$frameurls = $this->_frameurls; 
$this->_frameurls = array(); 

while(list(,$frameurl) = each($frameurls)) 
{ 
if($this->_framedepth < $this->maxframes) 
{ 
$this->fetch($frameurl); 
$this->_framedepth++; 
} 
else 
break; 
} 
} 
} 
else 
{ 
return false; 
} 
return true; 
break; 
case "https": 
if(!$this->curl_path || (!is_executable($this->curl_path))) 
return false; 
$this->host = $URI_PARTS["host"]; 
if(!empty($URI_PARTS["port"])) 
$this->port = $URI_PARTS["port"]; 
if($this->_isproxy) 
{ 
// using proxy, send entire URI 
$this->_httpsrequest($URI,$URI,$this->_httpmethod); 
} 
else 
{ 
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 
// no proxy, send only the path 
$this->_httpsrequest($path, $URI, $this->_httpmethod); 
} 

if($this->_redirectaddr) 
{ 
/* url was redirected, check if we've hit the max depth */ 
if($this->maxredirs > $this->_redirectdepth) 
{ 
// only follow redirect if it's on this site, or offsiteok is true 
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 
{ 
/* follow the redirect */ 
$this->_redirectdepth++; 
$this->lastredirectaddr=$this->_redirectaddr; 
$this->fetch($this->_redirectaddr); 
} 
} 
} 

if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 
{ 
$frameurls = $this->_frameurls; 
$this->_frameurls = array(); 

while(list(,$frameurl) = each($frameurls)) 
{ 
if($this->_framedepth < $this->maxframes) 
{ 
$this->fetch($frameurl); 
$this->_framedepth++; 
} 
else 
break; 
} 
} 
return true; 
break; 
default: 
// not a valid protocol 
$this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; 
return false; 
break; 
} 
return true; 
}
?>