i need to identify the similar texts in two web pages and remove themm...
i need to store the text that is between the <b> and</b> in an array...that is for example... <b> hello everyone</b> ..i need to store " hello everyone" in an array ...
i will do it for all <b> tags in page 1 and then page 2 and then compare the text in both the pages to find similar matches and remove them ..
please help me code it in php
identifying same texts in 2 web pages
Moderator: General Moderators
Re: identifying same texts in 2 web pages
i found out all the b tags .. but how do i take the text content from the b tags and store it in an array so that i can compare them and remove the repeated texts..please help
<?php
$doc = new DOMDocument();
@$doc->loadHTMLFile('http://www.web-source.net/web_design_tips/');
$doc2 = new DOMDocument();
@$doc2->loadHTMLFile('http://www.web-source.net/html_codes_chart.htm');
$xpath = new DOMXPath($doc);
$xpath2 = new DOMXPath($doc2);
$List=array();
$List2=array();
$List[] = $doc->getElementsByTagName("b");
$List2[] = $doc2->getElementsByTagName("b");
$textBoth = array_intersect($List, $List2);
foreach ($textBoth as $text)
{
$text->parentNode->removeChild($text);
}
echo $doc->saveHTML();
?>
<?php
$doc = new DOMDocument();
@$doc->loadHTMLFile('http://www.web-source.net/web_design_tips/');
$doc2 = new DOMDocument();
@$doc2->loadHTMLFile('http://www.web-source.net/html_codes_chart.htm');
$xpath = new DOMXPath($doc);
$xpath2 = new DOMXPath($doc2);
$List=array();
$List2=array();
$List[] = $doc->getElementsByTagName("b");
$List2[] = $doc2->getElementsByTagName("b");
$textBoth = array_intersect($List, $List2);
foreach ($textBoth as $text)
{
$text->parentNode->removeChild($text);
}
echo $doc->saveHTML();
?>