Help Needed : Magpie RSS - PHP RSS Parser Error
Posted: Thu Jul 09, 2009 11:38 am
I tried to use Magpie RSS (magpierss.sourceforge.net) in PHP to crawl the RSS Feeds, but i could not get any RSS contents from http://rss.news.yahoo.com/rss/business using the below PHP codes, can anyone help ? Thanks 
$URL = http://rss.news.yahoo.com/rss/business
$URL = http://rss.news.yahoo.com/rss/business
Code: Select all
if ( $data = get_site_content( $url ) )
{
$items = parse_feed_contents( $data );
if ( is_array( $items ) )
{
$kolko = 0;
foreach ( $items as $item )
{
if ( !$item['title'] && !$item['content'] )
{
$mdsum = md5( $item['title'] );
@mysql_unbuffered_query( "INSERT into baentries values('',".$id.",'{$mdsum}','".@prep_for_mysql( $item['title'] )."','".@prep_for_mysql( $item['link'] )."','".@prep_for_mysql( $item['content'] )."','".$item['pubdate']."')" );
if ( !mysql_error( ) )
{
++$kolko;
}
}
}
echo "<b><font color=\"blue\">OK</font> (Imported ".$kolko." new entries)</b><br>";
}
else
{
echo "<b><font color=\"red\">Parser Error: </font>".$items."</b><br>";
$ierr = $items;
}
}
function get_site_content( $url = "" )
{
global $err;
if ( extension_loaded( "curl" ) )
{
$ch = curl_init( );
curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, 0 );
curl_setopt( $ch, CURLOPT_URL, $url );
curl_setopt( $ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; Windows XP 5.1)" );
curl_setopt( $ch, CURLOPT_HEADER, 0 );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 );
curl_setopt( $ch, CURLOPT_TIMEOUT, 20 );
curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 );
curl_setopt( $ch, CURLOPT_ENCODING, "gzip" );
$result = curl_exec( $ch );
if ( !curl_errno( $ch ) )
{
curl_close( $ch );
return $result;
}
$err = curl_error( $ch );
curl_close( $ch );
return FALSE;
}
$result = @file_get_contents( $url );
if ( $result )
{
return $result;
}
$err = "Can't Connect";
return FALSE;
}
function parse_feed_contents( $data = "" )
{
@require_once( "templates/rss_parse.inc" );
$data = eregi_replace( "<!--more-->", "{MORE_TAG}", $data );
$rss = new magpierss( $data );
if ( !$rss->ERROR )
{
$for_return = array( );
foreach ( $rss->items as $item )
{
$title = "";
$link = "";
$description = "";
$contentencoded = "";
$pubdate = "";
$guid = "";
if ( strtoupper( $rss->feed_type ) == "RSS" )
{
if ( isset( $item['title'] ) )
{
$title = trim( $item['title'] );
}
if ( isset( $item['link'] ) )
{
$link = trim( $item['link'] );
}
if ( isset( $item['description'] ) )
{
$description = trim( $item['description'] );
}
if ( isset( $item['content']['encoded'] ) )
{
$contentencoded = trim( $item['content']['encoded'] );
}
if ( isset( $item['pubdate'] ) )
{
$pubdate = strtotime( trim( $item['pubdate'] ) );
}
else if ( isset( $item['date_timestamp'] ) )
{
$pubdate = trim( $item['date_timestamp'] );
}
if ( isset( $item['guid'] ) )
{
$guid = trim( $item['guid'] );
}
if ( !empty( $contentencoded ) )
{
$content = $contentencoded;
}
else if ( !empty( $description ) )
{
$content = $description;
}
$content = eregi_replace( "{MORE_TAG}", "<!--more-->", $content );
$for_return[] = array(
"title" => $title,
"link" => $link,
"content" => $content,
"pubdate" => $pubdate,
"guid" => $guid
);
}
else if ( strtoupper( $rss->feed_type ) == "ATOM" )
{
if ( isset( $item['title'] ) )
{
$title = trim( $item['title'] );
}
if ( isset( $item['link'] ) )
{
$link = trim( $item['link'] );
}
if ( isset( $item['atom_content'] ) )
{
$content = trim( $item['atom_content'] );
}
if ( isset( $item['published'] ) )
{
$pubdate = strtotime( trim( $item['published'] ) );
}
else if ( isset( $item['date_timestamp'] ) )
{
$pubdate = trim( $item['date_timestamp'] );
}
$for_return[] = array(
"title" => $title,
"link" => $link,
"content" => $content,
"pubdate" => $pubdate
);
}
}
return $for_return;
}
return $rss->ERROR;
}