help with php cURL to get data from the site
Posted: Mon Jul 03, 2006 8:33 pm
I was using php cURL successfully to gather data from the website. But one site is returning ‰‹dÈ/åLó²²ó{‡iÚºfzû;Tg`²ÿ ŠsäƒFB¢Ž‘à8Uù–¤e¦„Ò ý¦ðR¢|¬ä2¡1%
A community of PHP developers offering assistance, advice, discussion, and friendship.
http://forums.devnetwork.net/
Code: Select all
class CurlAbs {
var $version;
var $baseURL;
var $cookie;
var $cookieexpirytime;
var $agent;
var $debug;
function CurlAbs($baseURL,$ua="",$debug=0) {
$this->version = 0.1;
$this->baseURL = $baseURL;
$this->cookie = getcwd()."/cookie.txt";
$this->cookieexpirytime = 3600; //1 hour
$this->agent = $this->set_agent($ua);
$this->debug = $debug;
$this->_debug("CurlAbs initiated");
}
function curl($url,$post="") {
if (!$this->curl) { $this->curl = curl_init(); }
$curl = $this->curl;
curl_setopt ($curl, CURLOPT_URL, $this->baseURL.$url);
if (is_array($post)) {
curl_setopt ($curl, CURLOPT_POST, TRUE);
curl_setopt ($curl, CURLOPT_POSTFIELDS, $post);
} else {
curl_setopt ($curl, CURLOPT_POST, FALSE);
}
curl_setopt ($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt ($curl, CURLOPT_CONNECTTIMEOUT, 1);
curl_setopt ($curl, CURLOPT_USERAGENT, $this->agent);
curl_setopt ($curl, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt ($curl, CURLOPT_COOKIEFILE, $this->cookie);
curl_setopt ($curl, CURLOPT_COOKIEJAR, $this->cookie);
$page = curl_exec($curl);
if (curl_errno($curl)) { echo curl_error($curl); }
return($page);
}
function close() {
if ($this->curl) { curl_close($this->curl); }
}
function set_agent($ua="") {
switch ($ua) {
case "NS6": return "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:0.9.2) Gecko/20020508 Netscape6/6.1"; break; //NS6
case "NS7": return "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.4) Gecko/20030624 Netscape/7.1 (ax)"; break; //NS7
case "IE6": return "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)"; break; //IE6
case "IE7": return "Mozilla/4.0 (compatible; MSIE 7.0b; Win32)"; break; //IE7
case "FF1": return "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.10) Gecko/20050716 Firefox/1.0.6"; break; //FF1
case "FF15": return "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8) Gecko/20051107 Firefox/1.5"; break; //FF15
case "OP8": return "Opera/8.00 (Windows NT 5.1; U; en)"; break; //OP8
case "OP9": return "Opera/9.00 (Macintosh; PPC Mac OS X; U; en)"; break; //OP9
case "SA2": return "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-us) AppleWebKit/412 (KHTML, like Gecko) Safari/412"; break; //SA2
case "CA1": return "Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.1) Gecko/20060214 Camino/1.0"; break; //CA1
case "LYNX": return "Lynx/2.8.4rel.1 libwww-FM/2.14"; break; //LYNX
default: return "CurlAbs/".$this->version." (PHP/cURL)"; break; //PHLOPPY
}
}
function get_agent() {
return $this->agent;
}
function _debug($message="") {
if ($this->debug) {
echo $message."<br>";
flush();
}
}
}Code: Select all
$c = new CurlAbs("http://www.ooer.com/","FF15",1);
$page = $c->curl("index.php");
$c->close();
echo $page;