Parsing XML file, not getting all data
Moderator: General Moderators
Parsing XML file, not getting all data
I have an XML file that I'm looping through with simplexml, there are a total of 46,000 nodes... when looping through I'm each node to a database... The problem I'm having is that after the script is done running, I only have 22,000 items in my DB... meaning it must have skipped some for whatever reason. I have set_time_limit to zero, and I tried adding sleep(1) to the bottom of the loop, which grabbed me an extra 15 or so...
Any idea why some would be missing? I'm escaping all text fields also..
Any idea why some would be missing? I'm escaping all text fields also..
I've had this same problem before with large XML files... Not really sure what the deal is... here is what I have...
Any ideas? I tried upping the sleep to 2, but that made no difference, it's just not adding any more... and no errors..
Code: Select all
<?php
ini_set("error_log" , "error.log");
set_time_limit(0);
ini_set("memory_limit","256M");
if (file_exists('test.xml')) {
$xml = simplexml_load_file('test.xml');
foreach($items as $item){
$items = $xml->children();
$productId = 0;
foreach($items as $item){
$ProductId = $xml->product[$productId]->ProductId;
$name = $xml->product[$productId]->name;
$description = $xml->product[$productId]->description;
$imageUrl = $xml->product[$productId]->imageUrl;
$productUrl = $xml->product[$productId]->productUrl;
$categoryId = $xml->product[$productId]->Categories->Category[0]->id;
$categoryName = $xml->product[$productId]->TDCategories->Category[0]->name;
$actor = $xml->product[$productId]->fields->field[0]->value;
$directors = $xml->product[$productId]->fields->field[1]->value;
$name = mysql_real_escape_string($name);
$description = mysql_real_escape_string($description);
$categoryName = mysql_real_escape_string($categoryName);
$actor = mysql_real_escape_string($actor);
$directors = mysql_real_escape_string($directors);
mysql_query("replace into products (ProductId, name, description, imageUrl, productUrl, categoryId, categoryName, actors, directors) VALUES ('$ProductId','$name','$description','$imageUrl','$productUrl','$categoryId','$categoryName','$actor','$directors')")or die(mysql_error());
$productId++;
sleep(1);
}
}
} else {
exit('Failed to open test.xml.');
}
?>Any ideas? I tried upping the sleep to 2, but that made no difference, it's just not adding any more... and no errors..
Look for my comments in the code.
Code: Select all
<?php
ini_set("error_log" , "error.log");
set_time_limit(0);
ini_set("memory_limit","256M");
if (file_exists('test.xml')) {
$xml = simplexml_load_file('test.xml');
// What is this? How is $items getting set with it's initial value?
// If this is all the code then this foreach loop should fail immediately.
foreach($items as $item){
$items = $xml->children();
$productId = 0;
foreach($items as $item){
$ProductId = $xml->product[$productId]->ProductId;
$name = $xml->product[$productId]->name;
$description = $xml->product[$productId]->description;
$imageUrl = $xml->product[$productId]->imageUrl;
$productUrl = $xml->product[$productId]->productUrl;
$categoryId = $xml->product[$productId]->Categories->Category[0]->id;
$categoryName = $xml->product[$productId]->TDCategories->Category[0]->name;
$actor = $xml->product[$productId]->fields->field[0]->value;
$directors = $xml->product[$productId]->fields->field[1]->value;
$name = mysql_real_escape_string($name);
$description = mysql_real_escape_string($description);
$categoryName = mysql_real_escape_string($categoryName);
$actor = mysql_real_escape_string($actor);
$directors = mysql_real_escape_string($directors);
mysql_query("replace into products (ProductId, name, description, imageUrl, productUrl, categoryId, categoryName, actors, directors) VALUES ('$ProductId','$name','$description','$imageUrl','$productUrl','$categoryId','$categoryName','$actor','$directors')")or die(mysql_error());
$productId++;
sleep(1);
}
}
} else {
exit('Failed to open test.xml.');
}
?>EricS wrote:Look for my comments in the code.
Code: Select all
<?php ini_set("error_log" , "error.log"); set_time_limit(0); ini_set("memory_limit","256M"); if (file_exists('test.xml')) { $xml = simplexml_load_file('test.xml'); $items = $xml->children(); $productId = 0; // What is this? How is $items getting set with it's initial value? // If this is all the code then this foreach loop should fail immediately. foreach($items as $item){ $items = $xml->children(); $productId = 0; foreach($items as $item){ $ProductId = $xml->product[$productId]->ProductId; $name = $xml->product[$productId]->name; $description = $xml->product[$productId]->description; $imageUrl = $xml->product[$productId]->imageUrl; $productUrl = $xml->product[$productId]->productUrl; $categoryId = $xml->product[$productId]->Categories->Category[0]->id; $categoryName = $xml->product[$productId]->TDCategories->Category[0]->name; $actor = $xml->product[$productId]->fields->field[0]->value; $directors = $xml->product[$productId]->fields->field[1]->value; $name = mysql_real_escape_string($name); $description = mysql_real_escape_string($description); $categoryName = mysql_real_escape_string($categoryName); $actor = mysql_real_escape_string($actor); $directors = mysql_real_escape_string($directors); mysql_query("replace into products (ProductId, name, description, imageUrl, productUrl, categoryId, categoryName, actors, directors) VALUES ('$ProductId','$name','$description','$imageUrl','$productUrl','$categoryId','$categoryName','$actor','$directors')")or die(mysql_error()); $productId++; sleep(1); } } } else { exit('Failed to open test.xml.'); } ?>
Sorry, I updated it... was changing a few values before posting, forgot to add it.
Look for my comments.
Code: Select all
<?php
ini_set("error_log" , "error.log");
set_time_limit(0);
ini_set("memory_limit","256M");
if (file_exists('test.xml')) {
$xml = simplexml_load_file('test.xml');
$items = $xml->children();
$productId = 0;
// Okay you fixed this.
foreach($items as $item){
// But now you have recursion bug here!
$items = $xml->children();
$productId = 0;
foreach($items as $item){
$ProductId = $xml->product[$productId]->ProductId;
$name = $xml->product[$productId]->name;
$description = $xml->product[$productId]->description;
$imageUrl = $xml->product[$productId]->imageUrl;
$productUrl = $xml->product[$productId]->productUrl;
$categoryId = $xml->product[$productId]->Categories->Category[0]->id;
$categoryName = $xml->product[$productId]->TDCategories->Category[0]->name;
$actor = $xml->product[$productId]->fields->field[0]->value;
$directors = $xml->product[$productId]->fields->field[1]->value;
$name = mysql_real_escape_string($name);
$description = mysql_real_escape_string($description);
$categoryName = mysql_real_escape_string($categoryName);
$actor = mysql_real_escape_string($actor);
$directors = mysql_real_escape_string($directors);
mysql_query("replace into products (ProductId, name, description, imageUrl, productUrl, categoryId, categoryName, actors, directors) VALUES ('$ProductId','$name','$description','$imageUrl','$productUrl','$categoryId','$categoryName','$actor','$directors')")or die(mysql_error());
$productId++;
sleep(1);
}
}
} else {
exit('Failed to open test.xml.');
}
?>lol.... WHOOPSIE.. that shouldn't be in there, this is what I have.... I fixed it in your quoteEricS wrote:Look for my comments.
Code: Select all
<?php ini_set("error_log" , "error.log"); set_time_limit(0); ini_set("memory_limit","256M"); if (file_exists('test.xml')) { $xml = simplexml_load_file('test.xml'); $items = $xml->children(); $productId = 0; foreach($items as $item){ $ProductId = $xml->product[$productId]->ProductId; $name = $xml->product[$productId]->name; $description = $xml->product[$productId]->description; $imageUrl = $xml->product[$productId]->imageUrl; $productUrl = $xml->product[$productId]->productUrl; $categoryId = $xml->product[$productId]->Categories->Category[0]->id; $categoryName = $xml->product[$productId]->TDCategories->Category[0]->name; $actor = $xml->product[$productId]->fields->field[0]->value; $directors = $xml->product[$productId]->fields->field[1]->value; $name = mysql_real_escape_string($name); $description = mysql_real_escape_string($description); $categoryName = mysql_real_escape_string($categoryName); $actor = mysql_real_escape_string($actor); $directors = mysql_real_escape_string($directors); mysql_query("replace into products (ProductId, name, description, imageUrl, productUrl, categoryId, categoryName, actors, directors) VALUES ('$ProductId','$name','$description','$imageUrl','$productUrl','$categoryId','$categoryName','$actor','$directors')")or die(mysql_error()); $productId++; sleep(1); } } else { exit('Failed to open test.xml.'); } ?>
Yes, I checked it out.. everything looks good.. and whats weired is that the log stopped at 597, yet I've been able to get 12,391 items in the product table. The xml file is 56mb also, I'm wondering if it's just too big..EricS wrote:Did you examine the XML in the document you loading around the node id your stopping on? Sounds like SimpleXML is reading in something it doesn't like and considers that the end of the document.