SQL command to search for full file name on the web
Posted: Sat Oct 18, 2008 4:48 am
hello all,
i am a doing search engine, (for my college project)
i based my code on Daniel Solin's examples
http://www.onlamp.com/pub/a/php/2002/10 ... tml?page=3
its a good example but i`m wondering how do i make it useful by searching for whole content?
the current operation will populate single words by words.
eg: it will populate the keyword "image" instead of "image.jpg"
1.) how do i tweak the sql command so that it will populate both keyword and keyword with their file extension?
it is also not efficient because when i search for "buil", the "building" does not appear,
2.) how do i tweak the sql command so that it can search for both "buil" and "building" ?
thanks.. i`m a real beginner here..
heres my populate.php code
i am a doing search engine, (for my college project)
i based my code on Daniel Solin's examples
http://www.onlamp.com/pub/a/php/2002/10 ... tml?page=3
its a good example but i`m wondering how do i make it useful by searching for whole content?
the current operation will populate single words by words.
eg: it will populate the keyword "image" instead of "image.jpg"
1.) how do i tweak the sql command so that it will populate both keyword and keyword with their file extension?
it is also not efficient because when i search for "buil", the "building" does not appear,
2.) how do i tweak the sql command so that it can search for both "buil" and "building" ?
thanks.. i`m a real beginner here..
heres my populate.php code
Code: Select all
<?php
/*
* populate.php
*
* Script for populating the search database with words,
* pages and word-occurences.
*/
/* populating eg.: http://localhost/fyp/populate.php?url=h ... chedet.com */
/* Connect to the database: */
mysql_pconnect("localhost","admin","password")
or die("ERROR: Could not connect to database!");
mysql_select_db("fyp");
// clear the previous saved session
mysql_query(" TRUNCATE page; ");
mysql_query(" TRUNCATE word; ");
mysql_query(" TRUNCATE occurrence; ");
/* Define the URL that should be processed: */
$url = addslashes( $_POST['url'] );
if( !$url )
{
die( "<center> <br>Could not open URL! <br><br> <a href=\"javascript:history.back()\">Back</a></center> <br><br><br>" );
}
else if( substr($url,0,7) != "http://" )
{
$url = "http://$url";
}
/* Does this URL already have a record in the page-table? */
$result = mysql_query("SELECT page_id FROM page WHERE page_url = \"$url\"");
$row = mysql_fetch_array($result);
if( $row['page_id'] )
{
/* If yes, use the old page_id: */
$page_id = $row['page_id'];
}
else
{
/* If not, create one: */
mysql_query("INSERT INTO page (page_url) VALUES (\"$url\")");
$page_id = mysql_insert_id();
}
/* Start parsing through the text, and build an index in the database: */
if( !($fd = fopen($url,"r")) )
die( "<center> <br>Could not open URL! <br><br> <a href=\"javascript:history.back()\">Back</a></center> <br><br><br>" );
while( $buf = fgets($fd,1024) )
{
/* Remove whitespace from beginning and end of string: */
$buf = trim($buf);
/* Try to remove all HTML-tags: */
$buf = strip_tags($buf);
$buf = ereg_replace('/&\w;/', '', $buf);
/* Extract all words matching the regexp from the current line: */
preg_match_all("/(\b[\w+]+\b)/",$buf,$words);
/* Loop through all words/occurrences and insert them into the database: */
for( $i = 0; $words[$i]; $i++ )
{
for( $j = 0; $words[$i][$j]; $j++ )
{
/* Does the current word already have a record in the word-table? */
$cur_word = addslashes( strtolower($words[$i][$j]) );
$result = mysql_query("SELECT word_id FROM word
WHERE word_word = '$cur_word'");
$row = mysql_fetch_array($result);
if( $row['word_id'] )
{
/* If yes, use the old word_id: */
$word_id = $row['word_id'];
}
else
{
/* If not, create one: */
mysql_query("INSERT INTO word (word_word) VALUES (\"$cur_word\")");
$word_id = mysql_insert_id();
}
/* And finally, register the occurrence of the word: */
mysql_query("INSERT INTO occurrence (word_id,page_id)
VALUES ($word_id,$page_id)");
print "Indexing: $cur_word<br>";
}
}
}
fclose($fd);
?>
<!-- <script language=javascript>
setTimeout("location.href='http://localhost/fyp/search.php'",2);
</script> -->
<?php
if(isset($_POST['submit1'])) {
print "
<script language=javascript>
setTimeout(\"location.href='http://localhost/fyp/search.php'\",2);
</script>
";
}
if(isset($_POST['submit2'])) {
print "
<script language=javascript>
setTimeout(\"location.href='http://localhost/fyp/search2.php'\",2);
</script>
";
}