Pull data from website
Posted: Fri Dec 03, 2004 3:06 am
I would like to know what keyword should i use to perform a search in this forum for pulling data from another website to be display on my own site.
Thanks ya.
Thanks ya.
A community of PHP developers offering assistance, advice, discussion, and friendship.
http://forums.devnetwork.net/
Code: Select all
<?php
require_once 'ES_SimpleTimer.php';
require_once 'ES_URLManipulator.php';
class ES_WebPageFetcher {
function ES_WebPageFetcher() {
}
function fetch ($webPageURL) {
$error = false;
$maxTime = 15;
$packetSize = 1024;
$streamTimeOut = 5;
$urlManipulator =& new ES_URLManipulator();
$timer =& new ES_SimpleTimer();
$timer->start();
// extract domain
$domain = $urlManipulator->extractDomain($webPageURL);
// extract protocol
$protocolessURL = $urlManipulator->removeProtocol($webPageURL);
// connect to the server
$socketResource = @fsockopen($domain, 80, $errNo, $errStr, $streamTimeOut);
if (!is_resource($socketResource)) {
return NULL;
}
// set time out
stream_set_timeout($socketResource, $streamTimeOut);
$socketRequest = 'GET http://'.$protocolessURL.' HTTP/1.0'."\n";
$socketRequest .= 'Host: '.$domain."\n";
$socketRequest .= 'User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)'."\n\n";
fputs($socketResource, $socketRequest);
// recieve status line
$statusPattern = '#^http/[\d+]\.[\d+] (\d+)#i';
$headContent = trim(fgets($socketResource, $packetSize));
// check timeout
$streamMetaData = stream_get_meta_data($socketResource);
if ($streamMetaData['timeout']) {
fclose($socketResource);
return NULL;
}
// check timer
if ($timer->fetchRunningTime() > $maxTime) {
fclose($socketResource);
return NULL;
}
list($statusId) = preg_split($statusPattern, $headContent, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
$statusPattern = '#200|302#is';
if (preg_match($statusPattern, $statusId) != 1) {
fclose($socketResource);
return NULL;
}
// remove the rest of the header
while(!empty($headContent)) {
$headContent = trim(fgets($socketResource, $packetSize));
// check timeout
$streamMetaData = stream_get_meta_data($socketResource);
if ($streamMetaData['timeout']) {
fclose($socketResource);
return NULL;
}
// check timer
if ($timer->fetchRunningTime() > $maxTime) {
fclose($socketResource);
return NULL;
}
}
// get the page contents
$contents = fgets($socketResource, $packetSize);
// check timeout
$streamMetaData = stream_get_meta_data($socketResource);
if ($streamMetaData['timeout']) {
fclose($socketResource);
return NULL;
}
// check timer
if ($timer->fetchRunningTime() > $maxTime) {
fclose($socketResource);
return NULL;
}
$goodOpeningPattern = '#^<#i';
if (!preg_match($goodOpeningPattern, $contents)) {
$contents = '';
}
while (!feof($socketResource)) {
$contents .= fread($socketResource, $packetSize);
// check timeout
$streamMetaData = stream_get_meta_data($socketResource);
if ($streamMetaData['timeout']) {
fclose($socketResource);
return NULL;
}
// check timer
if ($timer->fetchRunningTime() > $maxTime) {
fclose($socketResource);
return NULL;
}
}
fclose($socketResource);
return $contents;
}
}
?>Code: Select all
<?php
class ES_SimpleTimer {
var $startTime;
var $stopTime;
function ES_SimpleTimer() {
}
function start() {
list($_usec, $_sec) = explode(" ", microtime());
$this->startTime = (float)$_usec + (float)$_sec;
}
function stop() {
list($_usec, $_sec) = explode(" ", microtime());
$this->stopTime = (float)$_usec + (float)$_sec;
}
function fetchTime() {
$totalTime = $this->stopTime - $this->startTime;
return number_format($totalTime, 5);
}
function fetchRunningTime() {
list($_usec, $_sec) = explode(" ", microtime());
$currentTime = (float)$_usec + (float)$_sec;
$elapsedTime = $currentTime - $this->startTime;
return number_format($elapsedTime, 5);
}
}
?>Code: Select all
<?php
class ES_URLManipulator {
function ES_URLManipulator () {
}
function extractDomain ($url) {
// remove protocol
$pattern = '#http://|https://|ftp://|ftps://|smb://#is';
$url = preg_replace($pattern, '', $url);
// extract domain
$pattern = '#/#is';
$urlParts = preg_split($pattern, $url, -1, PREG_SPLIT_NO_EMPTY);
return $urlParts[0];
}
function removeProtocol ($url) {
// remove protocol
$pattern = '#http://|https://|ftp://|ftps://|smb://#is';
return preg_replace($pattern, '', $url);
}
}
?>Code: Select all
<?php
if (!is_resource($socketResource)) {
return NULL;
}
?>Code: Select all
<?php
// connect to the server
$socketResource = @fsockopen($domain, 80, $errNo, $errStr, $streamTimeOut);
if (!is_resource($socketResource)) {
return NULL;
}
?>Code: Select all
$socketRequest = 'GET http://'.$protocolessURL.' HTTP/1.0'."\n";
$socketRequest .= 'Host: '.$domain."\n";
$socketRequest .= 'User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)'."\n\n";Code: Select all
<?php
print_r(apache_request_headers());
?>Code: Select all
<?php
$socketResource = @fsockopen("www.mechg.com", 80, $errNo, $errStr, 5);
if (!is_resource($socketResource))
echo "Error!";
?>