Homepage
Demos
Overview
Downloads
Tutorials
Reference
Credits

roboop/search.php

Go to the documentation of this file.
00001 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
00002 <html>
00003   <head>
00004     <meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
00005     <title>Search</title>
00006     <link href="doxygen.css" rel="stylesheet" type="text/css">
00007     <link rel="home" href="../index.html">
00008     <link rel="up" href="../index.html">
00009     <link rel="SHORTCUT ICON" href="favicon.ico">
00010   </head>
00011   <body>
00012     <!-- <img src="aibosmall.jpg" width=92 height=75 align=right> -->
00013     <table cellpadding="1" cellspacing="6" border="0"
00014            style="text-align: left; margin-left: auto; margin-right: auto;">
00015       <tbody>
00016         <tr>
00017           <!-- #Homepage# --> <td style="vertical-align: top;"><a target="_top" href="../../index.html">Homepage</a></td>
00018           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00019           <!-- #Demos# --> <td style="vertical-align: top;"><a target="_top" href="../../Samples.html">Demos</a></td>
00020           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00021           <!-- #Overview# --> <td style="vertical-align: top;"><a target="_top" href="../../Overview.html">Overview</a></td>
00022           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00023           <!-- #Downloads# --> <td style="vertical-align: top;"><a target="_top" href="../../VersionHistory.html">Downloads</a></td>
00024           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00025           <!-- #Tutorials# --> <td style="vertical-align: top;"><a target="_top" href="../../Tutorials.html">Tutorials</a></td>
00026           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00027           <!-- #Reference# --> <td style="vertical-align: top;"><a target="_top" href="../index.html">Reference</a></td>
00028           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"><br></td>
00029           <!-- #Credits# --> <td style="vertical-align: top;"><a target="_top" href="../../Credits.html">Credits</a> </td>
00030         </tr>
00031       </tbody>
00032     </table>
00033 <table style="text-align: left; margin-left: auto; margin-right: auto; width: 700px;" border="0" cellspacing="2" cellpadding="2">
00034   <tbody>
00035     <tr>
00036       <td style="vertical-align: top;">
00037 <!-- Generated by Doxygen 1.4.0 -->
00038 <div class="qindex">  <form class="search" action="search.php" method="get">
00039 <table border=0 class="qindex"><tr><td width="20%" align="center"><a class="qindex" href="main.html">Main&nbsp;Page</a></td>
00040 <td width="0px" bgcolor="000000"></td>
00041 <td valign="top" width="20%" align="left"><b>Classes: </b><center><a class="qindex" href="annotated.html">List</a>, <a class="qindex" href="classes.html">Index</a>, <a class="qindex" href="hierarchy.html">Hierarchy</a>, <a class="qindex" href="functions.html">Members</a></center></td>
00042 <td width="0px" bgcolor="000000"></td>
00043 <td valign="top" width="20%" align="left"><b>Namespaces: </b><center><a class="qindex" href="namespaces.html">List</a>, <a class="qindex" href="namespacemembers.html">Members</a></center></td>
00044 <td width="0px" bgcolor="000000"></td>
00045 <td valign="top" width="20%" align="left"><b>Files: </b><center><a class="qindex" href="files.html">List</a>, <a class="qindex" href="dirs.html">Directories</a>, <a class="qindex" href="globals.html">Members</a></center></td>
00046 <td width="0px" bgcolor="000000"></td>
00047 <td valign="top" width="20%" align="center"><hr><span class="searchHL"><u>S</u>earch&nbsp;
00048 <?php
00049 
00050 function search_results()
00051 {
00052   return "Search Results";
00053 }
00054 
00055 function matches_text($num)
00056 {
00057   if ($num==0)
00058   {
00059     return "Sorry, no documents matching your query.";
00060   }
00061   else if ($num==1)
00062   {
00063     return "Found <b>1</b> document matching your query.";
00064   }
00065   else // $num>1
00066   {
00067     return "Found <b>$num</b> documents matching your query. Showing best matches first.";
00068   }
00069 }
00070 
00071 function report_matches()
00072 {
00073   return "Matches: ";
00074 }
00075 
00076 function readInt($file)
00077 {
00078   $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
00079   $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
00080   return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
00081 }
00082 
00083 function readString($file)
00084 {
00085   $result="";
00086   while (ord($c=fgetc($file))) $result.=$c;
00087   return $result;
00088 }
00089 
00090 function readHeader($file)
00091 {
00092   $header =fgetc($file); $header.=fgetc($file);
00093   $header.=fgetc($file); $header.=fgetc($file);
00094   return $header;
00095 }
00096 
00097 function computeIndex($word)
00098 {
00099   if (strlen($word)<2) return -1;
00100   // high char of the index
00101   $hi = ord($word{0});
00102   if ($hi==0) return -1;
00103   // low char of the index
00104   $lo = ord($word{1});
00105   if ($lo==0) return -1;
00106   // return index
00107   return $hi*256+$lo;
00108 }
00109 
00110 function search($file,$word,&$statsList)
00111 {
00112   $index = computeIndex($word);
00113   if ($index!=-1) // found a valid index
00114   {
00115     fseek($file,$index*4+4); // 4 bytes per entry, skip header
00116     $index = readInt($file);
00117     if ($index) // found words matching first two characters
00118     {
00119       $start=sizeof($statsList);
00120       $count=$start;
00121       fseek($file,$index);
00122       $w = readString($file);
00123       while ($w)
00124       {
00125         $statIdx = readInt($file);
00126         if ($word==substr($w,0,strlen($word)))
00127         { // found word that matches (as substring)
00128           $statsList[$count++]=array(
00129               "word"=>$word,
00130               "match"=>$w,
00131               "index"=>$statIdx,
00132               "full"=>strlen($w)==strlen($word),
00133               "docs"=>array()
00134               );
00135         }
00136         $w = readString($file);
00137       }
00138       $totalHi=0;
00139       $totalFreqHi=0;
00140       $totalFreqLo=0;
00141       for ($count=$start;$count<sizeof($statsList);$count++)
00142       {
00143         $statInfo = &$statsList[$count];
00144         $multiplier = 1;
00145         // whole word matches have a double weight
00146         if ($statInfo["full"]) $multiplier=2;
00147         fseek($file,$statInfo["index"]); 
00148         $numDocs = readInt($file);
00149         $docInfo = array();
00150         // read docs info + occurrence frequency of the word
00151         for ($i=0;$i<$numDocs;$i++)
00152         {
00153           $idx=readInt($file); 
00154           $freq=readInt($file); 
00155           $docInfo[$i]=array("idx"  => $idx,
00156                              "freq" => $freq>>1,
00157                              "rank" => 0.0,
00158                              "hi"   => $freq&1
00159                             );
00160           if ($freq&1) // word occurs in high priority doc
00161           {
00162             $totalHi++;
00163             $totalFreqHi+=$freq*$multiplier;
00164           }
00165           else // word occurs in low priority doc
00166           {
00167             $totalFreqLo+=$freq*$multiplier;
00168           }
00169         }
00170         // read name and url info for the doc
00171         for ($i=0;$i<$numDocs;$i++)
00172         {
00173           fseek($file,$docInfo[$i]["idx"]);
00174           $docInfo[$i]["name"]=readString($file);
00175           $docInfo[$i]["url"]=readString($file);
00176         }
00177         $statInfo["docs"]=$docInfo;
00178       }
00179       $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;
00180       for ($count=$start;$count<sizeof($statsList);$count++)
00181       {
00182         $statInfo = &$statsList[$count];
00183         $multiplier = 1;
00184         // whole word matches have a double weight
00185         if ($statInfo["full"]) $multiplier=2;
00186         for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
00187         {
00188           $docInfo = &$statInfo["docs"];
00189           // compute frequency rank of the word in each doc
00190           $freq=$docInfo[$i]["freq"];
00191           if ($docInfo[$i]["hi"])
00192           {
00193             $statInfo["docs"][$i]["rank"]=
00194               (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;
00195           }
00196           else
00197           {
00198             $statInfo["docs"][$i]["rank"]=
00199               (float)($freq*$multiplier)/$totalFreq;
00200           }
00201         }
00202       }
00203     }
00204   }
00205   return $statsList;
00206 }
00207 
00208 function combine_results($results,&$docs)
00209 {
00210   foreach ($results as $wordInfo)
00211   {
00212     $docsList = &$wordInfo["docs"];
00213     foreach ($docsList as $di)
00214     {
00215       $key=$di["url"];
00216       $rank=$di["rank"];
00217       if (in_array($key, array_keys($docs)))
00218       {
00219         $docs[$key]["rank"]+=$rank;
00220       }
00221       else
00222       {
00223         $docs[$key] = array("url"=>$key,
00224             "name"=>$di["name"],
00225             "rank"=>$rank
00226             );
00227       }
00228       $docs[$key]["words"][] = array(
00229                "word"=>$wordInfo["word"],
00230                "match"=>$wordInfo["match"],
00231                "freq"=>$di["freq"]
00232                );
00233     }
00234   }
00235   return $docs;
00236 }
00237 
00238 function filter_results($docs,&$requiredWords,&$forbiddenWords)
00239 {
00240   $filteredDocs=array();
00241   while (list ($key, $val) = each ($docs)) 
00242   {
00243     $words = &$docs[$key]["words"];
00244     $copy=1; // copy entry by default
00245     if (sizeof($requiredWords)>0)
00246     {
00247       foreach ($requiredWords as $reqWord)
00248       {
00249         $found=0;
00250         foreach ($words as $wordInfo)
00251         { 
00252           $found = $wordInfo["word"]==$reqWord;
00253           if ($found) break;
00254         }
00255         if (!$found) 
00256         {
00257           $copy=0; // document contains none of the required words
00258           break;
00259         }
00260       }
00261     }
00262     if (sizeof($forbiddenWords)>0)
00263     {
00264       foreach ($words as $wordInfo)
00265       {
00266         if (in_array($wordInfo["word"],$forbiddenWords))
00267         {
00268           $copy=0; // document contains a forbidden word
00269           break;
00270         }
00271       }
00272     }
00273     if ($copy) $filteredDocs[$key]=$docs[$key];
00274   }
00275   return $filteredDocs;
00276 }
00277 
00278 function compare_rank($a,$b)
00279 {
00280   if ($a["rank"] == $b["rank"]) 
00281   {
00282     return 0;
00283   }
00284   return ($a["rank"]>$b["rank"]) ? -1 : 1; 
00285 }
00286 
00287 function sort_results($docs,&$sorted)
00288 {
00289   $sorted = $docs;
00290   usort($sorted,"compare_rank");
00291   return $sorted;
00292 }
00293 
00294 function report_results(&$docs)
00295 {
00296   echo "<table cellspacing=\"2\">\n";
00297   echo "  <tr>\n";
00298   echo "    <td colspan=\"2\"><h2>".search_results()."</h2></td>\n";
00299   echo "  </tr>\n";
00300   $numDocs = sizeof($docs);
00301   if ($numDocs==0)
00302   {
00303     echo "  <tr>\n";
00304     echo "    <td colspan=\"2\">".matches_text(0)."</td>\n";
00305     echo "  </tr>\n";
00306   }
00307   else
00308   {
00309     echo "  <tr>\n";
00310     echo "    <td colspan=\"2\">".matches_text($numDocs);
00311     echo "\n";
00312     echo "    </td>\n";
00313     echo "  </tr>\n";
00314     $num=1;
00315     foreach ($docs as $doc)
00316     {
00317       echo "  <tr>\n";
00318       echo "    <td align=\"right\">$num.</td>";
00319       echo     "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
00320       echo "  <tr>\n";
00321       echo "    <td></td><td class=\"tiny\">".report_matches()." ";
00322       foreach ($doc["words"] as $wordInfo)
00323       {
00324         $word = $wordInfo["word"];
00325         $matchRight = substr($wordInfo["match"],strlen($word));
00326         echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
00327       }
00328       echo "    </td>\n";
00329       echo "  </tr>\n";
00330       $num++;
00331     }
00332   }
00333   echo "</table>\n";
00334 }
00335 
00336 function main()
00337 {
00338   if(strcmp('4.1.0', phpversion()) > 0) 
00339   {
00340     die("Error: PHP version 4.1.0 or above required!");
00341   }
00342   if (!($file=fopen("search.idx","rb"))) 
00343   {
00344     die("Error: Search index file could NOT be opened!");
00345   }
00346   if (readHeader($file)!="DOXS")
00347   {
00348     die("Error: Header of index file is invalid!");
00349   }
00350   $query="";
00351   if (array_key_exists("query", $_GET))
00352   {
00353     $query=$_GET["query"];
00354   }
00355   echo "<input class=\"search\" type=\"text\" name=\"query\" value=\"$query\" size=\"10\" accesskey=\"s\"/>\n";
00356   echo "</span></td></tr></table>\n";
00357   echo "</form>\n";
00358   echo "</div>\n";
00359   $results = array();
00360   $requiredWords = array();
00361   $forbiddenWords = array();
00362   $foundWords = array();
00363   $word=strtok($query," ");
00364   while ($word) // for each word in the search query
00365   {
00366     if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
00367     if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
00368     if (!in_array($word,$foundWords))
00369     {
00370       $foundWords[]=$word;
00371       search($file,$word,$results);
00372     }
00373     $word=strtok(" ");
00374   }
00375   $docs = array();
00376   combine_results($results,$docs);
00377   // filter out documents with forbidden word or that do not contain
00378   // required words
00379   $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
00380   // sort the results based on rank
00381   $sorted = array();
00382   sort_results($filteredDocs,$sorted);
00383   // report results to the user
00384   report_results($sorted);
00385   fclose($file);
00386 }
00387 
00388 main();
00389 
00390 
00391 ?>
00392 </td></tr></tbody></table>
00393 
00394   <br>
00395   <table cellpadding="2" cellspacing="2" border="0" style="text-align: left; width: 100%; color: rgb(0, 0, 0);">
00396     <tbody>
00397       <tr>
00398         <td style="vertical-align: top;"><small>
00399             <b><a href="http://www.cours.polymtl.ca/roboop/">ROBOOP</a> v1.21a<br></b>
00400           </small>
00401         </td>
00402         <td style="vertical-align: top; text-align: right; font-style: italic;">
00403           <small>
00404             Generated Sat Jan 29 02:25:11 2005 by <a href="http://www.doxygen.org/">Doxygen</a> 1.4.0
00405           </small>
00406           <script type="text/javascript" language="javascript">
00407             <!--
00408             s="na";c="na";j="na";f=""+escape(document.referrer)
00409             //-->
00410           </script>
00411           <script type="text/javascript" language="javascript1.2">
00412             <!--
00413             s=screen.width;v=navigator.appName
00414             if (v != "Netscape") {c=screen.colorDepth}
00415             else {c=screen.pixelDepth}
00416             j=navigator.javaEnabled()
00417             //-->
00418           </script>
00419           <script type="text/javascript" language="javascript">
00420             <!--
00421             function pr(n) {document.write(n,"\n");}
00422             NS2Ch=0
00423             if (navigator.appName == "Netscape" &&
00424             navigator.appVersion.charAt(0) == "2") {NS2Ch=1}
00425             if (NS2Ch == 0) {
00426             r="size="+s+"&colors="+c+"&referer="+f+"&java="+j+"&stamp="+(new Date()).getTime()+""
00427             pr("<IMG BORDER=0 width=16 height=16 align=\"middle\" SRC=\"http://aibo2.boltz.cs.cmu.edu/head.gif?"+r+"\">")}
00428             //-->
00429           </script> 
00430           
00431           <noscript>
00432             <img src="http://aibo2.boltz.cs.cmu.edu/head.gif" border="0" width=16 height=16 align="middle">
00433           </noscript>
00434         </td>
00435       </tr>
00436     </tbody>
00437   </table>
00438 </body>
00439 </html>

Tekkotsu v2.3
Generated Sat Jan 29 02:25:23 2005 by Doxygen 1.4.0