I run a link site, ppl submit their pages(always just a single page at a time) to me to post on my website, but before i post them I want to check for for broken images on that page. I have no way of knowing the name of the images tho they will always be jpg files. i know its possible to do but i am having no luck figuring out how to do it.
any help?
checking if files exist
Moderator: General Moderators
Code: Select all
$fp = fopen($url, 'r');
fclose($fp);
print_r($http_response_header);well you have to fetch the page, then get all the image tags, and then check each image
you still need to resolve relative urls into absolute urls so you can check them, which isnt very hard. youll also nee to check the server response, but this should get you going
you still need to resolve relative urls into absolute urls so you can check them, which isnt very hard. youll also nee to check the server response, but this should get you going
Code: Select all
<?php
function extract_img_tags($document)
{
$document = strtolower($document);
$img_tags = array();
$pointer = 0;
while (false !== ($open_pos = strpos($document, '<img', $pointer))) {
$close_pos = strpos($document, '>', $open_pos) + 1;
$tag_length = $close_pos - $open_pos;
$pointer = $close_pos;
$img_tags[] = substr($document, $open_pos, $tag_length);
}
return $img_tags;
}
function extract_img_url($tag)
{
$tag = strtolower($tag);
$url = false;
if (false !== ($start_pos = strpos($tag, 'src="'))) {
$url_begin = $start_pos + 5;
$url_end = strpos($tag, '"', $url_begin);
$url_length = $url_end - $url_begin;
$url = substr($tag, $url_begin, $url_length);
} elseif (false !== ($start_pos = strpos($link, "src='"))) {
$url_begin = $start_pos + 5;
$url_end = strpos($tag, "'", $url_begin);
$url_length = $url_end - $url_begin;
$url = substr($tag, $url_begin, $url_length);
}
return $url;
}
$page = 'http://cnn.com';
$document = @file_get_contents($page);
$img_tags = extract_img_tags($document);
$img_urls = array();
foreach ($img_tags as $tag) {
$img_urls[] = extract_img_url($tag);
}
print_r($img_urls);
?>