Page 1 of 1

Scrape login-required page using curl

Posted: Tue Feb 22, 2011 3:17 pm
by benjiw
Hi all, I'm trying to scrape the contents of a page that is behind a login screen; namely: http://my.mail.ru/apps. Here's my code. It almost works, but doesn't appear to be properly logging in -- I just get a login screen on the url download. Any ideas? Thanks much.


Here's my code

Code: Select all


<?php
 
$ch=login();
$html=downloadUrl('http://my.mail.ru/apps', $ch);
echo $html;
  
function downloadUrl($Url, $ch){
curl_setopt($ch, CURLOPT_URL, $Url);
curl_setopt($ch, CURLOPT_POST, 0);
curl_setopt($ch, CURLOPT_REFERER, "http://my.mail.ru/cgi-bin/login?noclear=1&page=http%3a%2f%2fmy.mail.ru%2fapps%2f");
curl_setopt($ch, CURLOPT_USERAGENT, "MozillaXYZ/1.0");
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$output = curl_exec($ch);
return $output;
}
 
 
 
function login(){
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, 'http://my.mail.ru/cgi-bin/login?noclear=1&page=http%3a%2f%2fmy.mail.ru%2fapps%2f'); //login URL
    curl_setopt ($ch, CURLOPT_POST, 1);
$postData='
		page=http%3A%2F%2Fmy.mail.ru%2Fapps%2F
		&Login=username
		&Domain=mail.ru
    &Password=password';
    curl_setopt ($ch, CURLOPT_POSTFIELDS, $postData);
    curl_setopt ($ch, CURLOPT_COOKIEJAR, 'cookie.txt');
		curl_setopt ($ch, CURLOPT_FOLLOWLOCATION,1);
		curl_setopt ($ch, CURLOPT_MAXREDIRS, 10);  
    curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1);
    $store = curl_exec ($ch);
    return $ch;
}


?>