Homepage
Demos
Overview
Downloads
Tutorials
Reference
Credits

newmat/search.php

Go to the documentation of this file.
00001 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
00002 <html>
00003   <head>
00004     <meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1">
00005     <title>Search</title>
00006     <link href="doxygen.css" rel="stylesheet" type="text/css">
00007     <link rel="home" href="../index.html">
00008     <link rel="up" href="../index.html">
00009     <link rel="SHORTCUT ICON" href="favicon.ico">
00010   </head>
00011   <body>
00012     <!-- <img src="aibosmall.jpg" width=92 height=75 align=right> -->
00013     <table cellpadding="1" cellspacing="6" border="0"
00014            style="text-align: left; margin-left: auto; margin-right: auto;">
00015       <tbody>
00016         <tr>
00017           <!-- #Homepage# --> <td style="vertical-align: top;"><a target="_top" href="../../index.html">Homepage</a></td>
00018           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00019           <!-- #Demos# --> <td style="vertical-align: top;"><a target="_top" href="../../Samples.html">Demos</a></td>
00020           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00021           <!-- #Overview# --> <td style="vertical-align: top;"><a target="_top" href="../../Overview.html">Overview</a></td>
00022           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00023           <!-- #Downloads# --> <td style="vertical-align: top;"><a target="_top" href="../../VersionHistory.html">Downloads</a></td>
00024           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00025           <!-- #Tutorials# --> <td style="vertical-align: top;"><a target="_top" href="../../Tutorials.html">Tutorials</a></td>
00026           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"></td>
00027           <!-- #Reference# --> <td style="vertical-align: top;"><a target="_top" href="../index.html">Reference</a></td>
00028           <!-- #Bar# --> <td style="vertical-align: top; background-color: rgb(0, 0, 0);"><br></td>
00029           <!-- #Credits# --> <td style="vertical-align: top;"><a target="_top" href="../../Credits.html">Credits</a> </td>
00030         </tr>
00031       </tbody>
00032     </table>
00033 <table style="text-align: left; margin-left: auto; margin-right: auto; width: 700px;" border="0" cellspacing="2" cellpadding="2">
00034   <tbody>
00035     <tr>
00036       <td style="vertical-align: top;">
00037 <!-- Generated by Doxygen 1.4.4 -->
00038 <div class="qindex">  <form class="search" action="http://cvs.tekkotsu.org/search.php" method="get">
00039 <table border=0 class="qindex"><tr><td width="20%" align="center"><a class="qindex" href="main.html">Main&nbsp;Page</a></td>
00040 <td width="0px" bgcolor="000000"></td>
00041 <td valign="top" width="20%" align="left"><b>Classes: </b><center><a class="qindex" href="annotated.html">List</a>, <a class="qindex" href="classes.html">Index</a>, <a class="qindex" href="hierarchy.html">Hierarchy</a>, <a class="qindex" href="functions.html">Members</a></center></td>
00042 <td width="0px" bgcolor="000000"></td>
00043 <td valign="top" width="20%" align="left"><b>Namespaces: </b><center><a class="qindex" href="namespaces.html">List</a>, <a class="qindex" href="namespacemembers.html">Members</a></center></td>
00044 <td width="0px" bgcolor="000000"></td>
00045 <td valign="top" width="20%" align="left"><b>Files: </b><center><a class="qindex" href="files.html">List</a>, <a class="qindex" href="dirs.html">Directories</a>, <a class="qindex" href="globals.html">Members</a></center></td>
00046 <td width="0px" bgcolor="000000"></td>
00047 <td valign="top" width="20%" align="center"><hr><span class="searchHL"><u>S</u>earch&nbsp;
00048 <?php
00049 
00050 function search_results()
00051 {
00052   return "Search Results";
00053 }
00054 
00055 function matches_text($num)
00056 {
00057   if ($num==0)
00058   {
00059     return "Sorry, no documents matching your query.";
00060   }
00061   else if ($num==1)
00062   {
00063     return "Found <b>1</b> document matching your query.";
00064   }
00065   else // $num>1
00066   {
00067     return "Found <b>$num</b> documents matching your query. Showing best matches first.";
00068   }
00069 }
00070 
00071 function report_matches()
00072 {
00073   return "Matches: ";
00074 }
00075 
00076 function readInt($file)
00077 {
00078   $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
00079   $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
00080   return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
00081 }
00082 
00083 function readString($file)
00084 {
00085   $result="";
00086   while (ord($c=fgetc($file))) $result.=$c;
00087   return $result;
00088 }
00089 
00090 function readHeader($file)
00091 {
00092   $header =fgetc($file); $header.=fgetc($file);
00093   $header.=fgetc($file); $header.=fgetc($file);
00094   return $header;
00095 }
00096 
00097 function computeIndex($word)
00098 {
00099   // Fast string hashing
00100   //$lword = strtolower($word);
00101   //$l = strlen($lword);
00102   //for ($i=0;$i<$l;$i++)
00103   //{
00104   //  $c = ord($lword{$i});
00105   //  $v = (($v & 0xfc00) ^ ($v << 6) ^ $c) & 0xffff;
00106   //}
00107   //return $v;
00108 
00109   // Simple hashing that allows for substring search
00110   if (strlen($word)<2) return -1;
00111   // high char of the index
00112   $hi = ord($word{0});
00113   if ($hi==0) return -1;
00114   // low char of the index
00115   $lo = ord($word{1});
00116   if ($lo==0) return -1;
00117   // return index
00118   return $hi*256+$lo;
00119 }
00120 
00121 function search($file,$word,&$statsList)
00122 {
00123   $index = computeIndex($word);
00124   if ($index!=-1) // found a valid index
00125   {
00126     fseek($file,$index*4+4); // 4 bytes per entry, skip header
00127     $index = readInt($file);
00128     if ($index) // found words matching the hash key
00129     {
00130       $start=sizeof($statsList);
00131       $count=$start;
00132       fseek($file,$index);
00133       $w = readString($file);
00134       while ($w)
00135       {
00136         $statIdx = readInt($file);
00137         if ($word==substr($w,0,strlen($word)))
00138         { // found word that matches (as substring)
00139           $statsList[$count++]=array(
00140               "word"=>$word,
00141               "match"=>$w,
00142               "index"=>$statIdx,
00143               "full"=>strlen($w)==strlen($word),
00144               "docs"=>array()
00145               );
00146         }
00147         $w = readString($file);
00148       }
00149       $totalHi=0;
00150       $totalFreqHi=0;
00151       $totalFreqLo=0;
00152       for ($count=$start;$count<sizeof($statsList);$count++)
00153       {
00154         $statInfo = &$statsList[$count];
00155         $multiplier = 1;
00156         // whole word matches have a double weight
00157         if ($statInfo["full"]) $multiplier=2;
00158         fseek($file,$statInfo["index"]); 
00159         $numDocs = readInt($file);
00160         $docInfo = array();
00161         // read docs info + occurrence frequency of the word
00162         for ($i=0;$i<$numDocs;$i++)
00163         {
00164           $idx=readInt($file); 
00165           $freq=readInt($file); 
00166           $docInfo[$i]=array("idx"  => $idx,
00167                              "freq" => $freq>>1,
00168                              "rank" => 0.0,
00169                              "hi"   => $freq&1
00170                             );
00171           if ($freq&1) // word occurs in high priority doc
00172           {
00173             $totalHi++;
00174             $totalFreqHi+=$freq*$multiplier;
00175           }
00176           else // word occurs in low priority doc
00177           {
00178             $totalFreqLo+=$freq*$multiplier;
00179           }
00180         }
00181         // read name and url info for the doc
00182         for ($i=0;$i<$numDocs;$i++)
00183         {
00184           fseek($file,$docInfo[$i]["idx"]);
00185           $docInfo[$i]["name"]=readString($file);
00186           $docInfo[$i]["url"]=readString($file);
00187         }
00188         $statInfo["docs"]=$docInfo;
00189       }
00190       $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;
00191       for ($count=$start;$count<sizeof($statsList);$count++)
00192       {
00193         $statInfo = &$statsList[$count];
00194         $multiplier = 1;
00195         // whole word matches have a double weight
00196         if ($statInfo["full"]) $multiplier=2;
00197         for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
00198         {
00199           $docInfo = &$statInfo["docs"];
00200           // compute frequency rank of the word in each doc
00201           $freq=$docInfo[$i]["freq"];
00202           if ($docInfo[$i]["hi"])
00203           {
00204             $statInfo["docs"][$i]["rank"]=
00205               (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;
00206           }
00207           else
00208           {
00209             $statInfo["docs"][$i]["rank"]=
00210               (float)($freq*$multiplier)/$totalFreq;
00211           }
00212         }
00213       }
00214     }
00215   }
00216   return $statsList;
00217 }
00218 
00219 function combine_results($results,&$docs)
00220 {
00221   foreach ($results as $wordInfo)
00222   {
00223     $docsList = &$wordInfo["docs"];
00224     foreach ($docsList as $di)
00225     {
00226       $key=$di["url"];
00227       $rank=$di["rank"];
00228       if (in_array($key, array_keys($docs)))
00229       {
00230         $docs[$key]["rank"]+=$rank;
00231       }
00232       else
00233       {
00234         $docs[$key] = array("url"=>$key,
00235             "name"=>$di["name"],
00236             "rank"=>$rank
00237             );
00238       }
00239       $docs[$key]["words"][] = array(
00240                "word"=>$wordInfo["word"],
00241                "match"=>$wordInfo["match"],
00242                "freq"=>$di["freq"]
00243                );
00244     }
00245   }
00246   return $docs;
00247 }
00248 
00249 function filter_results($docs,&$requiredWords,&$forbiddenWords)
00250 {
00251   $filteredDocs=array();
00252   while (list ($key, $val) = each ($docs)) 
00253   {
00254     $words = &$docs[$key]["words"];
00255     $copy=1; // copy entry by default
00256     if (sizeof($requiredWords)>0)
00257     {
00258       foreach ($requiredWords as $reqWord)
00259       {
00260         $found=0;
00261         foreach ($words as $wordInfo)
00262         { 
00263           $found = $wordInfo["word"]==$reqWord;
00264           if ($found) break;
00265         }
00266         if (!$found) 
00267         {
00268           $copy=0; // document contains none of the required words
00269           break;
00270         }
00271       }
00272     }
00273     if (sizeof($forbiddenWords)>0)
00274     {
00275       foreach ($words as $wordInfo)
00276       {
00277         if (in_array($wordInfo["word"],$forbiddenWords))
00278         {
00279           $copy=0; // document contains a forbidden word
00280           break;
00281         }
00282       }
00283     }
00284     if ($copy) $filteredDocs[$key]=$docs[$key];
00285   }
00286   return $filteredDocs;
00287 }
00288 
00289 function compare_rank($a,$b)
00290 {
00291   if ($a["rank"] == $b["rank"]) 
00292   {
00293     return 0;
00294   }
00295   return ($a["rank"]>$b["rank"]) ? -1 : 1; 
00296 }
00297 
00298 function sort_results($docs,&$sorted)
00299 {
00300   $sorted = $docs;
00301   usort($sorted,"compare_rank");
00302   return $sorted;
00303 }
00304 
00305 function report_results(&$docs)
00306 {
00307   echo "<table cellspacing=\"2\">\n";
00308   echo "  <tr>\n";
00309   echo "    <td colspan=\"2\"><h2>".search_results()."</h2></td>\n";
00310   echo "  </tr>\n";
00311   $numDocs = sizeof($docs);
00312   if ($numDocs==0)
00313   {
00314     echo "  <tr>\n";
00315     echo "    <td colspan=\"2\">".matches_text(0)."</td>\n";
00316     echo "  </tr>\n";
00317   }
00318   else
00319   {
00320     echo "  <tr>\n";
00321     echo "    <td colspan=\"2\">".matches_text($numDocs);
00322     echo "\n";
00323     echo "    </td>\n";
00324     echo "  </tr>\n";
00325     $num=1;
00326     foreach ($docs as $doc)
00327     {
00328       echo "  <tr>\n";
00329       echo "    <td align=\"right\">$num.</td>";
00330       echo     "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
00331       echo "  <tr>\n";
00332       echo "    <td></td><td class=\"tiny\">".report_matches()." ";
00333       foreach ($doc["words"] as $wordInfo)
00334       {
00335         $word = $wordInfo["word"];
00336         $matchRight = substr($wordInfo["match"],strlen($word));
00337         echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
00338       }
00339       echo "    </td>\n";
00340       echo "  </tr>\n";
00341       $num++;
00342     }
00343   }
00344   echo "</table>\n";
00345 }
00346 
00347 function main()
00348 {
00349   if(strcmp('4.1.0', phpversion()) > 0) 
00350   {
00351     die("Error: PHP version 4.1.0 or above required!");
00352   }
00353   if (!($file=fopen("search.idx","rb"))) 
00354   {
00355     die("Error: Search index file could NOT be opened!");
00356   }
00357   if (readHeader($file)!="DOXS")
00358   {
00359     die("Error: Header of index file is invalid!");
00360   }
00361   $query="";
00362   if (array_key_exists("query", $_GET))
00363   {
00364     $query=$_GET["query"];
00365   }
00366   echo "<input class=\"search\" type=\"text\" name=\"query\" value=\"$query\" size=\"10\" accesskey=\"s\"/>\n";
00367   echo "</span></td></tr></table>\n";
00368   echo "</form>\n";
00369   echo "</div>\n";
00370   echo "<div class=\"searchresults\">\n";
00371   $results = array();
00372   $requiredWords = array();
00373   $forbiddenWords = array();
00374   $foundWords = array();
00375   $word=strtok($query," ");
00376   while ($word) // for each word in the search query
00377   {
00378     if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
00379     if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
00380     if (!in_array($word,$foundWords))
00381     {
00382       $foundWords[]=$word;
00383       search($file,$word,$results);
00384     }
00385     $word=strtok(" ");
00386   }
00387   $docs = array();
00388   combine_results($results,$docs);
00389   // filter out documents with forbidden word or that do not contain
00390   // required words
00391   $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
00392   // sort the results based on rank
00393   $sorted = array();
00394   sort_results($filteredDocs,$sorted);
00395   // report results to the user
00396   report_results($sorted);
00397   echo "</div>\n";
00398   fclose($file);
00399 }
00400 
00401 main();
00402 
00403 
00404 ?>
00405 </td></tr></tbody></table>
00406 
00407   <br>
00408   <table cellpadding="2" cellspacing="2" border="0" style="text-align: left; width: 100%; color: rgb(0, 0, 0);">
00409     <tbody>
00410       <tr>
00411         <td style="vertical-align: top;"><small>
00412             <b><a href="http://www.robertnz.net/nm_intro.htm">newmat11b</a><br></b>
00413           </small>
00414         </td>
00415         <td style="vertical-align: top; text-align: right; font-style: italic;">
00416           <small>
00417             Generated Wed Aug 10 11:04:04 2005 by <a href="http://www.doxygen.org/">Doxygen</a> 1.4.4
00418           </small>
00419           <script type="text/javascript" language="javascript">
00420             <!--
00421             s="na";c="na";j="na";f=""+escape(document.referrer)
00422             //-->
00423           </script>
00424           <script type="text/javascript" language="javascript1.2">
00425             <!--
00426             s=screen.width;v=navigator.appName
00427             if (v != "Netscape") {c=screen.colorDepth}
00428             else {c=screen.pixelDepth}
00429             j=navigator.javaEnabled()
00430             //-->
00431           </script>
00432           <script type="text/javascript" language="javascript">
00433             <!--
00434             function pr(n) {document.write(n,"\n");}
00435             NS2Ch=0
00436             if (navigator.appName == "Netscape" &&
00437             navigator.appVersion.charAt(0) == "2") {NS2Ch=1}
00438             if (NS2Ch == 0) {
00439             r="size="+s+"&colors="+c+"&referer="+f+"&java="+j+"&stamp="+(new Date()).getTime()+""
00440             pr("<IMG BORDER=0 width=16 height=16 align=\"middle\" SRC=\"http://aibo2.boltz.cs.cmu.edu/head.gif?"+r+"\">")}
00441             //-->
00442           </script> 
00443           
00444           <noscript>
00445             <img src="http://aibo2.boltz.cs.cmu.edu/head.gif" border="0" width=16 height=16 align="middle">
00446           </noscript>
00447         </td>
00448       </tr>
00449     </tbody>
00450   </table>
00451 </body>
00452 </html>

Tekkotsu v2.4
Generated Wed Aug 10 11:04:39 2005 by Doxygen 1.4.4