Tekkotsu Homepage
Demos
Overview
Downloads
Dev. Resources
Reference
Credits

sift.cc

Go to the documentation of this file.
00001 // file:        sift.cpp
00002 // author:      Andrea Vedaldi
00003 // description: Sift definition
00004 
00005 // AUTORIGHTS
00006 // Copyright (c) 2006 The Regents of the University of California
00007 // All Rights Reserved.
00008 // 
00009 // Created by Andrea Vedaldi (UCLA VisionLab)
00010 // 
00011 // Permission to use, copy, modify, and distribute this software and its
00012 // documentation for educational, research and non-profit purposes,
00013 // without fee, and without a written agreement is hereby granted,
00014 // provided that the above copyright notice, this paragraph and the
00015 // following three paragraphs appear in all copies.
00016 // 
00017 // This software program and documentation are copyrighted by The Regents
00018 // of the University of California. The software program and
00019 // documentation are supplied "as is", without any accompanying services
00020 // from The Regents. The Regents does not warrant that the operation of
00021 // the program will be uninterrupted or error-free. The end-user
00022 // understands that the program was developed for research purposes and
00023 // is advised not to rely exclusively on the program for any reason.
00024 // 
00025 // This software embodies a method for which the following patent has
00026 // been issued: "Method and apparatus for identifying scale invariant
00027 // features in an image and use of same for locating an object in an
00028 // image," David G. Lowe, US Patent 6,711,293 (March 23,
00029 // 2004). Provisional application filed March 8, 1999. Asignee: The
00030 // University of British Columbia.
00031 // 
00032 // IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY
00033 // FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,
00034 // INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND
00035 // ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN
00036 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE UNIVERSITY OF
00037 // CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
00038 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00039 // A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
00040 // BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE
00041 // MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
00042 
00043 #include "sift.hpp"
00044 #include "sift-conv.tpp"
00045 
00046 #include<algorithm>
00047 #include<iostream>
00048 #include<sstream>
00049 #include<cassert>
00050 #include<cstring>
00051 
00052 using namespace VL ;
00053 
00054 // on startup, pre-compute expn(x) = exp(-x)
00055 namespace VL { 
00056 namespace Detail {
00057 
00058 int const         expnTableSize = 256 ;
00059 VL::float_t const expnTableMax  = VL::float_t(25.0) ;
00060 VL::float_t       expnTable [ expnTableSize + 1 ] ;
00061 
00062 struct buildExpnTable
00063 {
00064   buildExpnTable() {
00065     for(int k = 0 ; k < expnTableSize + 1 ; ++k) {
00066     expnTable[k] = std::exp( - VL::float_t(k) / expnTableSize * expnTableMax ) ;
00067     }
00068   }
00069 } _buildExpnTable ;
00070 
00071 } }
00072 
00073 
00074 namespace VL {
00075 namespace Detail {
00076 
00077 /** Comment eater istream manipulator */
00078 class _cmnt {} cmnt ;
00079 
00080 /** @brief Extract a comment from a stream
00081  **
00082  ** The function extracts a block of consecutive comments from an
00083  ** input stream. A comment is a sequence of whitespaces, followed by
00084  ** a `#' character, other characters and terminated at the next line
00085  ** ending. A block of comments is just a sequence of comments.
00086  **/
00087 std::istream& 
00088 operator>>(std::istream& is, _cmnt& manip)
00089 {
00090   char c ;
00091   char b [1024] ; 
00092   is>>c ;
00093   if( c != '#' ) 
00094     return is.putback(c) ;
00095   is.getline(b,1024) ;
00096   return is ;
00097 }
00098 
00099 }
00100 
00101 /** @brief Insert PGM file into stream
00102  **
00103  ** The function iserts into the stream @a os the grayscale image @a
00104  ** im encoded as a PGM file. The immage is assumed to be normalized
00105  ** in the range 0.0 - 1.0.
00106  **
00107  ** @param os output stream.
00108  ** @param im pointer to image data.
00109  ** @param width image width.
00110  ** @param height image height.
00111  ** @return the stream @a os.
00112  **/
00113 std::ostream& 
00114 insertPgm(std::ostream& os, pixel_t const* im, int width, int height)
00115 {
00116   os<< "P5"   << "\n"
00117     << width  << " "
00118     << height << "\n"
00119     << "255"  << "\n" ;
00120   for(int y = 0 ; y < height ; ++y) {
00121     for(int x = 0 ; x < width ; ++x) {
00122       unsigned char v = 
00123         (unsigned char)
00124         (std::max(std::min(*im++, 1.0f),0.f) * 255.0f) ;
00125       os << v ;
00126     }
00127   }
00128   return os ;
00129 }
00130 
00131 /** Start of code added by Xinghao Pan on 14 Apr, 2008 **/
00132 void
00133 createPgmBufferFromArray(int w, int h, pixel_t* d, PgmBuffer& buffer){
00134   buffer.width = w;
00135   buffer.height = h;
00136   buffer.data = new pixel_t[w * h];
00137   
00138   for (int i = 0; i < w*h; i++){
00139     buffer.data[i] = d[i];
00140   }
00141 }
00142 /** End of code added by Xinghao Pan on 14 Apr, 2008 **/
00143 
00144 /** @brief Extract PGM file from stream.
00145  **
00146  ** The function extracts from the stream @a in a grayscale image
00147  ** encoded as a PGM file. The function fills the structure @a buffer,
00148  ** containing the image dimensions and a pointer to the image data.
00149  **
00150  ** The image data is an array of floats and is owned by the caller,
00151  ** which should erase it as in
00152  ** 
00153  ** @code
00154  **   delete [] buffer.data.
00155  ** @endcode
00156  **
00157  ** When the function encouters an error it throws a generic instance
00158  ** of VL::Exception.
00159  **
00160  ** @param in input stream.
00161  ** @param buffer buffer descriptor to be filled.
00162  ** @return the stream @a in.
00163  **/
00164 std::istream& 
00165 extractPgm(std::istream& in, PgmBuffer& buffer)
00166 {
00167   pixel_t* im_pt ;
00168   int      width ;
00169   int      height ;
00170   int      maxval ;
00171 
00172   char c ;
00173   in>>c ;
00174   if( c != 'P') VL_THROW("File is not in PGM format") ;
00175   
00176   bool is_ascii ;
00177   in>>c ;
00178   switch( c ) {
00179   case '2' : is_ascii = true ; break ;
00180   case '5' : is_ascii = false ; break ;
00181   default  : VL_THROW("File is not in PGM format") ;
00182   }
00183   
00184   in >> Detail::cmnt
00185      >> width
00186      >> Detail::cmnt 
00187      >> height
00188      >> Detail::cmnt
00189      >> maxval ;
00190 
00191   // after maxval no more comments, just a whitespace or newline
00192   {char trash ; in.get(trash) ;}
00193 
00194   if(maxval > 255)
00195     VL_THROW("Only <= 8-bit per channel PGM files are supported") ;
00196 
00197   if(! in.good()) 
00198     VL_THROW("PGM header parsing error") ;
00199   
00200   im_pt = new pixel_t [ width*height ];
00201   
00202   try {
00203     if( is_ascii ) {
00204       pixel_t* start = im_pt ;
00205       pixel_t* end   = start + width*height ; 
00206       pixel_t  norm  = pixel_t( maxval ) ;
00207       
00208       while( start != end ) {        
00209         int i ;
00210         in >> i ; 
00211         if( ! in.good() ) VL_THROW
00212                             ("PGM parsing error file (width="<<width
00213                              <<" height="<<height
00214                              <<" maxval="<<maxval
00215                              <<" at pixel="<<start-im_pt<<")") ;    
00216         *start++ = pixel_t( i ) / norm ;        
00217       }
00218     } else {
00219       std::streampos beg = in.tellg() ;
00220       char* newBuffer = new char [width*height] ;
00221       in.read(newBuffer, width*height) ;
00222       if( ! in.good() ) VL_THROW
00223         ("PGM parsing error file (width="<<width
00224          <<" height="<<height
00225          <<" maxval="<<maxval
00226          <<" at pixel="<<in.tellg()-beg<<")") ;
00227       
00228       pixel_t* start = im_pt ;
00229       pixel_t* end   = start + width*height ; 
00230       uint8_t* src = reinterpret_cast<uint8_t*>(newBuffer) ;      
00231       while( start != end ) *start++ = *src++ / 255.0f ;
00232     }       
00233   } catch(...) {
00234     delete [] im_pt ; 
00235     throw ;
00236   }
00237   
00238   buffer.width  = width ;
00239   buffer.height = height ;
00240   buffer.data   = im_pt ;
00241 
00242   return in ;
00243 }
00244 
00245 // ===================================================================
00246 //                                          Low level image operations
00247 // -------------------------------------------------------------------
00248 
00249 namespace Detail {
00250 
00251 /** @brief Copy an image
00252  ** @param dst    output imgage buffer.
00253  ** @param src    input image buffer.
00254  ** @param width  input image width.
00255  ** @param height input image height.
00256  **/
00257 void
00258 copy(pixel_t* dst, pixel_t const* src, int width, int height)
00259 {
00260   memcpy(dst, src, sizeof(pixel_t)*width*height)  ;
00261 }
00262 
00263 /** @brief Copy an image upsampling two times
00264  **
00265  ** The destination buffer must be at least as big as two times the
00266  ** input buffer. Bilinear interpolation is used.
00267  **
00268  ** @param dst     output imgage buffer.
00269  ** @param src     input image buffer.
00270  ** @param width   input image width.
00271  ** @param height  input image height.
00272  **/
00273 void 
00274 copyAndUpsampleRows
00275 (pixel_t* dst, pixel_t const* src, int width, int height)
00276 {
00277   for(int y = 0 ; y < height ; ++y) {
00278     pixel_t b, a ;
00279     b = a = *src++ ;
00280     for(int x = 0 ; x < width-1 ; ++x) {
00281       b = *src++ ;
00282       *dst = a ;         dst += height ;
00283       *dst = (a+b)/2 ;   dst += height ;
00284       a = b ;
00285     }
00286     *dst = b ; dst += height ;
00287     *dst = b ; dst += height ;
00288     dst += 1 - width * 2 * height ;
00289   }  
00290 }
00291 
00292 /** @brief Copy and downasample an image
00293  **
00294  ** The image is downsampled @a d times, i.e. reduced to @c 1/2^d of
00295  ** its original size. The parameters @a width and @a height are the
00296  ** size of the input image. The destination image is assumed to be @c
00297  ** floor(width/2^d) pixels wide and @c floor(height/2^d) pixels high.
00298  **
00299  ** @param dst output imgage buffer.
00300  ** @param src input image buffer.
00301  ** @param width input image width.
00302  ** @param height input image height.
00303  ** @param d downsampling factor.
00304  **/
00305 void 
00306 copyAndDownsample(pixel_t* dst, pixel_t const* src, 
00307                   int width, int height, int d)
00308 {
00309   for(int y = 0 ; y < height ; y+=d) {
00310     pixel_t const * srcrowp = src + y * width ;    
00311     for(int x = 0 ; x < width - (d-1) ; x+=d) {     
00312       *dst++ = *srcrowp ;
00313       srcrowp += d ;
00314     }
00315   }
00316 }
00317 
00318 }
00319 
00320 /** @brief Smooth an image 
00321  **
00322  ** The function convolves the image @a src by a Gaussian kernel of
00323  ** variance @a s and writes the result to @a dst. The function also
00324  ** needs a scratch buffer @a dst of the same size of @a src and @a
00325  ** dst.
00326  **
00327  ** @param dst output image buffer.
00328  ** @param temp scratch image buffer.
00329  ** @param src input image buffer.
00330  ** @param width width of the buffers.
00331  ** @param height height of the buffers.
00332  ** @param s standard deviation of the Gaussian kernel.
00333  **/
00334 void Sift::smooth (pixel_t* dst, pixel_t* newTemp, 
00335        pixel_t const* src, int newWidth, int newHeight, 
00336        VL::float_t s) {
00337   // make sure a buffer larege enough has been allocated
00338   // to hold the filter
00339   int W = int( ceil( VL::float_t(4.0) * s ) ) ;
00340   if( ! filter ) {
00341     filterReserved = 0 ;
00342   }
00343   
00344   if( filterReserved < W ) {
00345     filterReserved = W ;
00346     if( filter ) delete [] filter ;
00347     filter = new pixel_t [ 2* filterReserved + 1 ] ;
00348   }
00349   
00350   // pre-compute filter
00351   for(int j = 0 ; j < 2*W+1 ; ++j) 
00352     filter[j] = VL::pixel_t
00353       (std::exp
00354        (VL::float_t
00355         (-0.5 * (j-W) * (j-W) / (s*s) ))) ;
00356   
00357   // normalize to one
00358   normalize(filter, W) ;
00359   
00360   // convolve
00361   econvolve(newTemp, src, newWidth, newHeight, filter, W) ;
00362   econvolve(dst, newTemp, newHeight, newWidth, filter, W) ;
00363 }
00364 
00365 // ===================================================================
00366 //                                                     Sift(), ~Sift()
00367 // -------------------------------------------------------------------
00368 
00369 /** @brief Initialize Gaussian scale space parameters
00370  **
00371  ** @param _im_pt  Source image data
00372  ** @param _width  Soruce image width
00373  ** @param _height Soruce image height
00374  ** @param _sigman Nominal smoothing value of the input image.
00375  ** @param _sigma0 Base smoothing level.
00376  ** @param _O      Number of octaves.
00377  ** @param __S     Number of levels per octave.
00378  ** @param _omin   First octave.
00379  ** @param _smin   First level in each octave.
00380  ** @param _smax   Last level in each octave.
00381  **/
00382 Sift::Sift(const pixel_t* _im_pt, int _width, int _height,
00383      VL::float_t _sigman,
00384      VL::float_t _sigma0,
00385      int _O, int __S,
00386      int _omin, int _smin, int _smax)
00387   : sigman( _sigman ), 
00388     sigma0( _sigma0 ),
00389     sigmak(),
00390     O( _O ),
00391     S( __S ),
00392     omin( _omin ),
00393     smin( _smin ),
00394     smax( _smax ),
00395     width(),
00396     height(),
00397     magnif( 3.0f ),
00398     normalizeDescriptor( true ),
00399     
00400     temp( NULL ),
00401     tempReserved(),
00402     tempIsGrad(),
00403     tempOctave(),
00404     octaves( NULL ),
00405     filter( NULL ),
00406     filterReserved(),
00407     keypoints()
00408 {
00409   process(_im_pt, _width, _height) ;
00410 }
00411 
00412 /** @b
00413 rief Destroy SIFT filter.
00414  **/
00415 Sift::~Sift()
00416 {
00417   freeBuffers() ;
00418 }
00419 
00420 /** Allocate buffers. Buffer sizes depend on the image size and the
00421  ** value of omin.
00422  **/
00423 void
00424 Sift::
00425 prepareBuffers()
00426 {
00427   // compute buffer size
00428   int w = (omin >= 0) ? (width  >> omin) : (width  << -omin) ;
00429   int h = (omin >= 0) ? (height >> omin) : (height << -omin) ;
00430   int size = w*h* std::max
00431     ((smax - smin), 2*((smax+1) - (smin-2) +1)) ;
00432 
00433   if( temp && tempReserved == size ) return ;
00434   
00435   freeBuffers() ;
00436   
00437   // allocate
00438   temp           = new pixel_t [ size ] ; 
00439   tempReserved   = size ;
00440   tempIsGrad     = false ;
00441   tempOctave     = 0 ;
00442 
00443   octaves = new pixel_t* [ O ] ;
00444   for(int o = 0 ; o < O ; ++o) {
00445     octaves[o] = new pixel_t [ (smax - smin + 1) * w * h ] ;
00446     w >>= 1 ;
00447     h >>= 1 ;
00448   }
00449 }
00450   
00451 /** @brief Free buffers.
00452  **
00453  ** This function releases any buffer allocated by prepareBuffers().
00454  **
00455  ** @sa prepareBuffers().
00456  **/
00457 void
00458 Sift::
00459 freeBuffers()
00460 {
00461   if( filter ) {
00462     delete [] filter ;
00463   }
00464   filter = 0 ;
00465 
00466   if( octaves ) {
00467     for(int o = 0 ; o < O ; ++o) {
00468       delete [] octaves[ o ] ;
00469     }
00470     delete [] octaves ;
00471   }
00472   octaves = 0 ;
00473   
00474   if( temp ) {
00475     delete [] temp ;   
00476   }
00477   temp = 0  ; 
00478 }
00479 
00480 // ===================================================================
00481 //                                                         getKeypoint
00482 // -------------------------------------------------------------------
00483 
00484 /** @brief Get keypoint from position and scale
00485  **
00486  ** The function returns a keypoint with a given position and
00487  ** scale. Note that the keypoint structure contains fields that make
00488  ** sense only in conjunction with a specific scale space. Therefore
00489  ** the keypoint structure should be re-calculated whenever the filter
00490  ** is applied to a new image, even if the parameters @a x, @a y and
00491  ** @a sigma do not change.
00492  **
00493  ** @param x x-coordinate of the center.
00494  ** @param y y-coordinate of the center.
00495  ** @param sigma scale.
00496  ** @return Corresponing keypoint.
00497  **/
00498 Sift::Keypoint
00499 Sift::getKeypoint(VL::float_t x, VL::float_t y, VL::float_t sigma) const
00500 {
00501 
00502   /*
00503     The formula linking the keypoint scale sigma to the octave and
00504     scale index is
00505 
00506     (1) sigma(o,s) = sigma0 2^(o+s/S)
00507 
00508     for which
00509     
00510     (2) o + s/S = log2 sigma/sigma0 == phi.
00511 
00512     In addition to the scale index s (which can be fractional due to
00513     scale interpolation) a keypoint has an integer scale index is too
00514     (which is the index of the scale level where it was detected in
00515     the DoG scale space). We have the constraints:
00516  
00517     - o and is are integer
00518 
00519     - is is in the range [smin+1, smax-2  ]
00520 
00521     - o  is in the range [omin,   omin+O-1]
00522 
00523     - is = rand(s) most of the times (but not always, due to the way s
00524       is obtained by quadratic interpolation of the DoG scale space).
00525 
00526     Depending on the values of smin and smax, often (2) has multiple
00527     solutions is,o that satisfy all constraints.  In this case we
00528     choose the one with biggest index o (this saves a bit of
00529     computation).
00530 
00531     DETERMINING THE OCTAVE INDEX O
00532 
00533     From (2) we have o = phi - s/S and we want to pick the biggest
00534     possible index o in the feasible range. This corresponds to
00535     selecting the smallest possible index s. We write s = is + ds
00536     where in most cases |ds|<.5 (but in general |ds|<1). So we have
00537 
00538        o = phi - s/S,   s = is + ds ,   |ds| < .5 (or |ds| < 1).
00539 
00540     Since is is in the range [smin+1,smax-2], s is in the range
00541     [smin+.5,smax-1.5] (or [smin,smax-1]), the number o is an integer
00542     in the range phi+[-smax+1.5,-smin-.5] (or
00543     phi+[-smax+1,-smin]). Thus the maximum value of o is obtained for
00544     o = floor(phi-smin-.5) (or o = floor(phi-smin)).
00545 
00546     Finally o is clamped to make sure it is contained in the feasible
00547     range.
00548 
00549     DETERMINING THE SCALE INDEXES S AND IS
00550 
00551     Given o we can derive is by writing (2) as
00552 
00553       s = is + ds = S(phi - o).
00554 
00555     We then take is = round(s) and clamp its value to be in the
00556     feasible range.
00557   */
00558 
00559   int o,ix,iy,is ;
00560   VL::float_t s,phi ;
00561 
00562   phi = log2f(sigma/sigma0) ;
00563   o   = fast_floor( phi -  (VL::float_t(smin)+.5f)/S ) ;
00564   o   = std::min(o, omin+O-1) ;
00565   o   = std::max(o, omin    ) ;
00566   s   = S * (phi - o) ;
00567 
00568   is  = int(s + 0.5) ;
00569   is  = std::min(is, smax - 2) ;
00570   is  = std::max(is, smin + 1) ;
00571   
00572   VL::float_t per = getOctaveSamplingPeriod(o) ;
00573   ix = int(x / per + 0.5) ;
00574   iy = int(y / per + 0.5) ;
00575   
00576   Keypoint key ;
00577   key.o  = o ;
00578 
00579   key.ix = ix ;
00580   key.iy = iy ;
00581   key.is = is ;
00582 
00583   key.x = x ;
00584   key.y = y ;
00585   key.s = s ;
00586   
00587   key.sigma = sigma ;
00588   
00589   return key ;
00590 }
00591 
00592 // ===================================================================
00593 //                                                           process()
00594 // -------------------------------------------------------------------
00595 
00596 /** @brief Compute Gaussian Scale Space
00597  **
00598  ** The method computes the Gaussian scale space of the specified
00599  ** image. The scale space data is managed internally and can be
00600  ** accessed by means of getOctave() and getLevel().
00601  **
00602  ** @remark Calling this method will delete the list of keypoints
00603  ** constructed by detectKeypoints().
00604  **
00605  ** @param _im_pt pointer to image data.
00606  ** @param _width image width.
00607  ** @param _height image height .
00608  **/
00609 void
00610 Sift::
00611 process(const pixel_t* _im_pt, int _width, int _height)
00612 {
00613   using namespace Detail ;
00614 
00615   width  = _width ;
00616   height = _height ;
00617     
00618   prepareBuffers() ;
00619   
00620   VL::float_t sigmak1 = powf(2.0f, 1.0f / S) ;
00621   VL::float_t dsigma0 = sigma0 * std::sqrt (1.0f - 1.0f / (sigmak1*sigmak1) ) ;
00622   
00623   // -----------------------------------------------------------------
00624   //                                                 Make pyramid base
00625   // -----------------------------------------------------------------
00626   if( omin < 0 ) {
00627     copyAndUpsampleRows(temp,       _im_pt, width,  height  ) ;
00628     copyAndUpsampleRows(octaves[0], temp,   height, 2*width ) ;      
00629 
00630     for(int o = -1 ; o > omin ; --o) {
00631       copyAndUpsampleRows(temp,       octaves[0], width  << -o,    height << -o) ;
00632       copyAndUpsampleRows(octaves[0], temp,       height << -o, 2*(width  << -o)) ;             }
00633 
00634   } else if( omin > 0 ) {
00635     copyAndDownsample(octaves[0], _im_pt, width, height, 1 << omin) ;
00636   } else {
00637     copy(octaves[0], _im_pt, width, height) ;
00638   }
00639 
00640   {
00641     VL::float_t sa = sigma0 * powf(sigmak1, smin) ; 
00642     VL::float_t sb = sigman / powf(2.0f,   omin) ; // review this
00643     if( sa > sb ) {
00644       VL::float_t sd = std::sqrt ( sa*sa - sb*sb ) ;
00645       smooth( octaves[0], temp, octaves[0], 
00646               getOctaveWidth(omin),
00647               getOctaveHeight(omin), 
00648               sd ) ;
00649     }
00650   }
00651 
00652   // -----------------------------------------------------------------
00653   //                                                      Make octaves
00654   // -----------------------------------------------------------------
00655   for(int o = omin ; o < omin+O ; ++o) {
00656     // Prepare octave base
00657     if( o > omin ) {
00658       int sbest = std::min(smin + S, smax) ;
00659       copyAndDownsample(getLevel(o,   smin ), 
00660         getLevel(o-1, sbest),
00661         getOctaveWidth(o-1),
00662         getOctaveHeight(o-1), 2 ) ;
00663       VL::float_t sa = sigma0 * powf(sigmak1, smin      ) ;
00664       VL::float_t sb = sigma0 * powf(sigmak1, sbest - S ) ;
00665       if(sa > sb ) {
00666         VL::float_t sd = std::sqrt ( sa*sa - sb*sb ) ;
00667         smooth( getLevel(o,0), temp, getLevel(o,0), 
00668                 getOctaveWidth(o), getOctaveHeight(o),
00669                 sd ) ;
00670       }
00671     }
00672 
00673     // Make other levels
00674     for(int s = smin+1 ; s <= smax ; ++s) {
00675       VL::float_t sd = dsigma0 * powf(sigmak1, s) ;
00676       smooth( getLevel(o,s), temp, getLevel(o,s-1),
00677               getOctaveWidth(o), getOctaveHeight(o),
00678               sd ) ;
00679     }
00680   }
00681 }
00682 
00683 /** @brief Sift detector
00684  **
00685  ** The function runs the SIFT detector on the stored Gaussian scale
00686  ** space (see process()). The detector consists in three steps
00687  **
00688  ** - local maxima detection;
00689  ** - subpixel interpolation;
00690  ** - rejection of weak keypoints (@a threhsold);
00691  ** - rejection of keypoints on edge-like structures (@a edgeThreshold).
00692  **
00693  ** As they are found, keypoints are added to an internal list.  This
00694  ** list can be accessed by means of the member functions
00695  ** getKeypointsBegin() and getKeypointsEnd(). The list is ordered by
00696  ** octave, which is usefult to speed-up computeKeypointOrientations()
00697  ** and computeKeypointDescriptor().
00698  **/
00699 void
00700 Sift::detectKeypoints(VL::float_t threshold, VL::float_t edgeThreshold)
00701 {
00702   keypoints.clear() ;
00703 
00704   size_t nValidatedKeypoints = 0 ;
00705 
00706   // Process one octave per time
00707   for(int o = omin ; o < omin + O ; ++o) {
00708         
00709     int const xo = 1 ;
00710     int const yo = getOctaveWidth(o) ;
00711     int const so = getOctaveWidth(o) * getOctaveHeight(o) ;
00712     int const ow = getOctaveWidth(o) ;
00713     int const oh = getOctaveHeight(o) ;
00714 
00715     VL::float_t xperiod = getOctaveSamplingPeriod(o) ;
00716 
00717     // -----------------------------------------------------------------
00718     //                                           Difference of Gaussians
00719     // -----------------------------------------------------------------
00720     pixel_t* dog = temp ;
00721     tempIsGrad = false ;
00722     {
00723       pixel_t* pt = dog ;
00724       for(int s = smin ; s <= smax-1 ; ++s) {
00725         pixel_t* srca = getLevel(o, s  ) ;
00726         pixel_t* srcb = getLevel(o, s+1) ;
00727         pixel_t* enda = srcb ;
00728         while( srca != enda ) {
00729           *pt++ = *srcb++ - *srca++ ;
00730         }
00731       }
00732     }
00733     
00734     // -----------------------------------------------------------------
00735     //                                           Find points of extremum
00736     // -----------------------------------------------------------------
00737     {
00738       pixel_t* pt  = dog + xo + yo + so ;
00739       for(int s = smin+1 ; s <= smax-2 ; ++s) {
00740         for(int y = 1 ; y < oh - 1 ; ++y) {
00741           for(int x = 1 ; x < ow - 1 ; ++x) {          
00742             pixel_t v = *pt ;
00743             
00744             // assert( (pt - x*xo - y*yo - (s-smin)*so) - dog == 0 ) ;
00745             
00746 #define CHECK_NEIGHBORS(CMP,SGN)                    \
00747             ( v CMP ## = SGN 0.8 * threshold &&     \
00748               v CMP *(pt + xo) &&                   \
00749               v CMP *(pt - xo) &&                   \
00750               v CMP *(pt + so) &&                   \
00751               v CMP *(pt - so) &&                   \
00752               v CMP *(pt + yo) &&                   \
00753               v CMP *(pt - yo) &&                   \
00754                                                     \
00755               v CMP *(pt + yo + xo) &&              \
00756               v CMP *(pt + yo - xo) &&              \
00757               v CMP *(pt - yo + xo) &&              \
00758               v CMP *(pt - yo - xo) &&              \
00759                                                     \
00760               v CMP *(pt + xo      + so) &&         \
00761               v CMP *(pt - xo      + so) &&         \
00762               v CMP *(pt + yo      + so) &&         \
00763               v CMP *(pt - yo      + so) &&         \
00764               v CMP *(pt + yo + xo + so) &&         \
00765               v CMP *(pt + yo - xo + so) &&         \
00766               v CMP *(pt - yo + xo + so) &&         \
00767               v CMP *(pt - yo - xo + so) &&         \
00768                                                     \
00769               v CMP *(pt + xo      - so) &&         \
00770               v CMP *(pt - xo      - so) &&         \
00771               v CMP *(pt + yo      - so) &&         \
00772               v CMP *(pt - yo      - so) &&         \
00773               v CMP *(pt + yo + xo - so) &&         \
00774               v CMP *(pt + yo - xo - so) &&         \
00775               v CMP *(pt - yo + xo - so) &&         \
00776               v CMP *(pt - yo - xo - so) )
00777             
00778             if( CHECK_NEIGHBORS(>,+) || CHECK_NEIGHBORS(<,-) ) {
00779               
00780               Keypoint k ;
00781               k.ix = x ;
00782               k.iy = y ;
00783               k.is = s ;
00784               keypoints.push_back(k) ;
00785             }
00786             pt += 1 ;
00787           }
00788           pt += 2 ;
00789         }
00790         pt += 2*yo ;
00791       }
00792     }
00793 
00794     // -----------------------------------------------------------------
00795     //                                               Refine local maxima
00796     // -----------------------------------------------------------------
00797     { // refine
00798       KeypointsIter siter ;
00799       KeypointsIter diter ;
00800       
00801       for(diter = siter = keypointsBegin() + nValidatedKeypoints ; 
00802           siter != keypointsEnd() ; 
00803           ++siter) {
00804        
00805         int x = int( siter->ix ) ;
00806         int y = int( siter->iy ) ;
00807         int s = int( siter->is ) ;
00808                 
00809         VL::float_t Dx=0,Dy=0,Ds=0,Dxx=0,Dyy=0,Dss=0,Dxy=0,Dxs=0,Dys=0 ;
00810         VL::float_t  b [3] ;
00811         pixel_t* pt ;
00812         int dx = 0 ;
00813         int dy = 0 ;
00814 
00815         // must be exec. at least once
00816         for(int iter = 0 ; iter < 5 ; ++iter) {
00817 
00818           VL::float_t A[3*3] ;          
00819 
00820           x += dx ;
00821           y += dy ;
00822 
00823           pt = dog 
00824             + xo * x
00825             + yo * y
00826             + so * (s - smin) ;
00827 
00828 #define at(dx,dy,ds) (*( pt + (dx)*xo + (dy)*yo + (ds)*so))
00829 #define Aat(i,j)     (A[(i)+(j)*3])    
00830           
00831           /* Compute the gradient. */
00832           Dx = 0.5f * (at(+1,0,0) - at(-1,0,0)) ;
00833           Dy = 0.5f * (at(0,+1,0) - at(0,-1,0));
00834           Ds = 0.5f * (at(0,0,+1) - at(0,0,-1)) ;
00835           
00836           /* Compute the Hessian. */
00837           Dxx = (at(+1,0,0) + at(-1,0,0) - 2.0f * at(0,0,0)) ;
00838           Dyy = (at(0,+1,0) + at(0,-1,0) - 2.0f * at(0,0,0)) ;
00839           Dss = (at(0,0,+1) + at(0,0,-1) - 2.0f * at(0,0,0)) ;
00840           
00841           Dxy = 0.25f * ( at(+1,+1,0) + at(-1,-1,0) - at(-1,+1,0) - at(+1,-1,0) ) ;
00842           Dxs = 0.25f * ( at(+1,0,+1) + at(-1,0,-1) - at(-1,0,+1) - at(+1,0,-1) ) ;
00843           Dys = 0.25f * ( at(0,+1,+1) + at(0,-1,-1) - at(0,-1,+1) - at(0,+1,-1) ) ;
00844           
00845           /* Solve linear system. */
00846           Aat(0,0) = Dxx ;
00847           Aat(1,1) = Dyy ;
00848           Aat(2,2) = Dss ;
00849           Aat(0,1) = Aat(1,0) = Dxy ;
00850           Aat(0,2) = Aat(2,0) = Dxs ;
00851           Aat(1,2) = Aat(2,1) = Dys ;
00852           
00853           b[0] = - Dx ;
00854           b[1] = - Dy ;
00855           b[2] = - Ds ;
00856           
00857           // Gauss elimination
00858           for(int j = 0 ; j < 3 ; ++j) {
00859 
00860             // look for leading pivot
00861             VL::float_t maxa = 0 ;
00862             VL::float_t maxabsa = 0 ;
00863             int   maxi = -1 ;
00864             int i ;
00865             for(i = j ; i < 3 ; ++i) {
00866               VL::float_t a    = Aat(i,j) ;
00867               VL::float_t absa = fabsf( a ) ;
00868               if ( absa > maxabsa ) {
00869                 maxa    = a ;
00870                 maxabsa = absa ;
00871                 maxi    = i ;
00872               }
00873             }
00874 
00875             // singular?
00876             if( maxabsa < 1e-10f ) {
00877               b[0] = 0 ;
00878               b[1] = 0 ;
00879               b[2] = 0 ;
00880               break ;
00881             }
00882 
00883             i = maxi ;
00884 
00885             // swap j-th row with i-th row and
00886             // normalize j-th row
00887             for(int jj = j ; jj < 3 ; ++jj) {
00888               std::swap( Aat(j,jj) , Aat(i,jj) ) ;
00889               Aat(j,jj) /= maxa ;
00890             }
00891             std::swap( b[j], b[i] ) ;
00892             b[j] /= maxa ;
00893 
00894             // elimination
00895             for(int ii = j+1 ; ii < 3 ; ++ii) { //x renamed to x1 to eliminate shadowing 
00896               VL::float_t x1 = Aat(ii,j) ;
00897               for(int jj = j ; jj < 3 ; ++jj) {
00898                 Aat(ii,jj) -= x1 * Aat(j,jj) ;                
00899               }
00900               b[ii] -= x1 * b[j] ;
00901             }
00902           }
00903 
00904           // backward substitution
00905           for(int i = 2 ; i > 0 ; --i) { //x renamed to x1 to eliminate shadowing
00906             VL::float_t x1 = b[i] ;
00907             for(int ii = i-1 ; ii >= 0 ; --ii) {
00908               b[ii] -= x1 * Aat(ii,i) ;
00909             }
00910           }
00911 
00912           /* If the translation of the keypoint is big, move the keypoint
00913            * and re-iterate the computation. Otherwise we are all set.
00914            */
00915           dx= ((b[0] >  0.6 && x < ow-2) ?  1 : 0 )
00916             + ((b[0] < -0.6 && x > 1   ) ? -1 : 0 ) ;
00917           
00918           dy= ((b[1] >  0.6 && y < oh-2) ?  1 : 0 )
00919             + ((b[1] < -0.6 && y > 1   ) ? -1 : 0 ) ;
00920 
00921           /*          
00922           std::cout<<x<<","<<y<<"="<<at(0,0,0)
00923                    <<"("
00924                    <<at(0,0,0)+0.5 * (Dx * b[0] + Dy * b[1] + Ds * b[2])<<")"
00925                    <<" "<<std::flush ; 
00926           */
00927 
00928           if( dx == 0 && dy == 0 ) break ;
00929         }
00930         
00931         /* std::cout<<std::endl ; */
00932         
00933         // Accept-reject keypoint
00934         {
00935           VL::float_t val = at(0,0,0) + 0.5f * (Dx * b[0] + Dy * b[1] + Ds * b[2]) ; 
00936           VL::float_t score = (Dxx+Dyy)*(Dxx+Dyy) / (Dxx*Dyy - Dxy*Dxy) ; 
00937           VL::float_t xn = x + b[0] ;
00938           VL::float_t yn = y + b[1] ;
00939           VL::float_t sn = s + b[2] ;
00940           
00941           if(fast_abs(val) > threshold &&
00942              score < (edgeThreshold+1)*(edgeThreshold+1)/edgeThreshold && 
00943              score >= 0 &&
00944              fast_abs(b[0]) < 1.5 &&
00945              fast_abs(b[1]) < 1.5 &&
00946              fast_abs(b[2]) < 1.5 &&
00947              xn >= 0    &&
00948              xn <= ow-1 &&
00949              yn >= 0    &&
00950              yn <= oh-1 &&
00951              sn >= smin &&
00952              sn <= smax ) {
00953             
00954             diter->o  = o ;
00955        
00956             diter->ix = x ;
00957             diter->iy = y ;
00958             diter->is = s ;
00959 
00960             diter->x = xn * xperiod ; 
00961             diter->y = yn * xperiod ; 
00962             diter->s = sn ;
00963 
00964             diter->sigma = getScaleFromIndex(o,sn) ;
00965             
00966 //            std::cout << diter->is << " " << diter->s << " " << o << " " << diter->sigma << ", " << getScaleFromIndex(o,sn) << ", " << sigma0 << ", " << S << std::endl;
00967 
00968             ++diter ;
00969           }
00970         }
00971       } // next candidate keypoint
00972 
00973       // prepare for next octave
00974       keypoints.resize( diter - keypoints.begin() ) ;
00975       nValidatedKeypoints = keypoints.size() ;
00976     } // refine block
00977 
00978   } // next octave
00979 }
00980 
00981 // ===================================================================
00982 //                                       computeKeypointOrientations()
00983 // -------------------------------------------------------------------
00984 
00985 /** @brief Compute modulus and phase of the gradient
00986  **
00987  ** The function computes the modulus and the angle of the gradient of
00988  ** the specified octave @a o. The result is stored in a temporary
00989  ** internal buffer accessed by computeKeypointDescriptor() and
00990  ** computeKeypointOrientations().
00991  **
00992  ** The SIFT detector provides keypoint with scale index s in the
00993  ** range @c smin+1 and @c smax-2. As such, the buffer contains only
00994  ** these levels.
00995  **
00996  ** If called mutliple time on the same data, the function exits
00997  ** immediately.
00998  **
00999  ** @param o octave of interest.
01000  **/
01001 void
01002 Sift::prepareGrad(int o)
01003 { 
01004   int const ow = getOctaveWidth(o) ;
01005   int const oh = getOctaveHeight(o) ;
01006   int const xo = 1 ;
01007   int const yo = ow ;
01008   int const so = oh*ow ;
01009 
01010   if( ! tempIsGrad || tempOctave != o ) {
01011 
01012     // compute dx/dy
01013     for(int s = smin+1 ; s <= smax-2 ; ++s) {
01014       for(int y = 1 ; y < oh-1 ; ++y ) {
01015         pixel_t* src  = getLevel(o, s) + xo + yo*y ;        
01016         pixel_t* end  = src + ow - 1 ;
01017         pixel_t* grad = 2 * (xo + yo*y + (s - smin -1)*so) + temp ;
01018         while(src != end) {
01019           VL::float_t Gx = 0.5f * ( *(src+xo) - *(src-xo) ) ;
01020           VL::float_t Gy = 0.5f * ( *(src+yo) - *(src-yo) ) ;
01021           VL::float_t m = fast_sqrt( Gx*Gx + Gy*Gy ) ;
01022           VL::float_t t = fast_mod_2pi( fast_atan2(Gy, Gx) + VL::float_t(2*M_PI) );
01023           *grad++ = pixel_t( m ) ;
01024           *grad++ = pixel_t( t ) ;
01025           ++src ;
01026         }
01027       }
01028     }
01029   }
01030   
01031   tempIsGrad = true ;
01032   tempOctave = o ;
01033 }
01034 
01035 /** @brief Compute the orientation(s) of a keypoint
01036  **
01037  ** The function computes the orientation of the specified keypoint.
01038  ** The function returns up to four different orientations, obtained
01039  ** as strong peaks of the histogram of gradient orientations (a
01040  ** keypoint can theoretically generate more than four orientations,
01041  ** but this is very unlikely).
01042  **
01043  ** @remark The function needs to compute the gradient modululs and
01044  ** orientation of the Gaussian scale space octave to which the
01045  ** keypoint belongs. The result is cached, but discarded if different
01046  ** octaves are visited. Thereofre it is much quicker to evaluate the
01047  ** keypoints in their natural octave order.
01048  **
01049  ** The keypoint must lie within the scale space. In particular, the
01050  ** scale index is supposed to be in the range @c smin+1 and @c smax-1
01051  ** (this is from the SIFT detector). If this is not the case, the
01052  ** computation is silently aborted and no orientations are returned.
01053  **
01054  ** @param angles buffers to store the resulting angles.
01055  ** @param keypoint keypoint to process.
01056  ** @return number of orientations found.
01057  **/
01058 int
01059 Sift::computeKeypointOrientations(VL::float_t angles [4], Keypoint keypoint)
01060 {
01061   int const   nbins = 36 ;
01062   VL::float_t const winFactor = 1.5f ;
01063   VL::float_t hist [nbins] ;
01064 
01065   // octave
01066   int o = keypoint.o ;
01067   VL::float_t xperiod = getOctaveSamplingPeriod(o) ;
01068 
01069   // offsets to move in the Gaussian scale space octave
01070   const int ow = getOctaveWidth(o) ;
01071   const int oh = getOctaveHeight(o) ;
01072   const int xo = 2 ;
01073   const int yo = xo * ow ;
01074   const int so = yo * oh ;
01075 
01076   // keypoint fractional geometry
01077   VL::float_t x     = keypoint.x / xperiod ;
01078   VL::float_t y     = keypoint.y / xperiod ;
01079   VL::float_t sigma = keypoint.sigma / xperiod ;
01080   
01081   // shall we use keypoints.ix,iy,is here?
01082   int xi = ((int) (x+0.5)) ; 
01083   int yi = ((int) (y+0.5)) ;
01084   int si = keypoint.is ;
01085   
01086   VL::float_t const sigmaw = winFactor * sigma ;
01087   int W = (int) floor(3.0 * sigmaw) ;
01088   
01089   // skip the keypoint if it is out of bounds
01090   if(o  < omin   ||
01091      o  >=omin+O ||
01092      xi < 0      || 
01093      xi > ow-1   || 
01094      yi < 0      || 
01095      yi > oh-1   || 
01096      si < smin+1 || 
01097      si > smax-2 ) {
01098     std::cerr<<"!"<<std::endl ;
01099     return 0 ;
01100   }
01101   
01102   // make sure that the gradient buffer is filled with octave o
01103   prepareGrad(o) ;
01104 
01105   // clear the SIFT histogram
01106   std::fill(hist, hist + nbins, 0) ;
01107 
01108   // fill the SIFT histogram
01109   pixel_t* pt = temp + xi * xo + yi * yo + (si - smin -1) * so ;
01110 
01111 #undef at
01112 #define at(dx,dy) (*(pt + (dx)*xo + (dy)*yo))
01113 
01114   for(int ys = std::max(-W, 1-yi) ; ys <= std::min(+W, oh -2 -yi) ; ++ys) {
01115     for(int xs = std::max(-W, 1-xi) ; xs <= std::min(+W, ow -2 -xi) ; ++xs) {
01116       
01117       VL::float_t dx = xi + xs - x;
01118       VL::float_t dy = yi + ys - y;
01119       VL::float_t r2 = dx*dx + dy*dy ;
01120 
01121       // limit to a circular window
01122       if(r2 >= W*W+0.5) continue ;
01123 
01124       VL::float_t wgt = VL::fast_expn( r2 / (2*sigmaw*sigmaw) ) ;
01125       VL::float_t mod = *(pt + xs*xo + ys*yo) ;
01126       VL::float_t ang = *(pt + xs*xo + ys*yo + 1) ;
01127 
01128       //      int bin = (int) floor( nbins * ang / (2*M_PI) ) ;
01129       int bin = (int) floor( nbins * ang / (2*M_PI) ) ;
01130       hist[bin] += mod * wgt ;        
01131     }
01132   }
01133   
01134   // smooth the histogram
01135 #if defined VL_LOWE_STRICT
01136   // Lowe's version apparently has a little issue with orientations
01137   // around + or - pi, which we reproduce here for compatibility
01138   for (int iter = 0; iter < 6; iter++) {
01139     VL::float_t prev  = hist[nbins/2] ;
01140     for (int i = nbins/2-1; i >= -nbins/2 ; --i) {
01141       int const j  = (i     + nbins) % nbins ;
01142       int const jp = (i - 1 + nbins) % nbins ;
01143       VL::float_t newh = (prev + hist[j] + hist[jp]) / 3.0;
01144       prev = hist[j] ;
01145       hist[j] = newh ;
01146     }
01147   }
01148 #else
01149   // this is slightly more correct
01150   for (int iter = 0; iter < 6; iter++) {
01151     VL::float_t prev  = hist[nbins-1] ;
01152     VL::float_t first = hist[0] ;
01153     int i ;
01154     for (i = 0; i < nbins - 1; i++) {
01155       VL::float_t newh = (prev + hist[i] + hist[(i+1) % nbins]) / 3.0f;
01156       prev = hist[i] ;
01157       hist[i] = newh ;
01158     }
01159     hist[i] = (prev + hist[i] + first)/3.0f ;
01160   }
01161 #endif
01162   
01163   // find the histogram maximum
01164   VL::float_t maxh = * std::max_element(hist, hist + nbins) ;
01165 
01166   // find peaks within 80% from max
01167   int nangles = 0 ;
01168   for(int i = 0 ; i < nbins ; ++i) {
01169     VL::float_t h0 = hist [i] ;
01170     VL::float_t hm = hist [(i-1+nbins) % nbins] ;
01171     VL::float_t hp = hist [(i+1+nbins) % nbins] ;
01172     
01173     // is this a peak?
01174     if( h0 > 0.8*maxh && h0 > hm && h0 > hp ) {
01175       
01176       // quadratic interpolation
01177       //      VL::float_t di = -0.5f * (hp - hm) / (hp+hm-2*h0) ; 
01178       VL::float_t di = -0.5f * (hp - hm) / (hp+hm-2*h0) ; 
01179       VL::float_t th = 2*float(M_PI) * (i+di+0.5f) / nbins ;      
01180       angles [ nangles++ ] = th ;
01181       if( nangles == 4 )
01182         goto enough_angles ;
01183     }
01184   }
01185  enough_angles:
01186   return nangles ;
01187 }
01188 
01189 // ===================================================================
01190 //                                         computeKeypointDescriptor()
01191 // -------------------------------------------------------------------
01192 
01193 namespace Detail {
01194 
01195 /** Normalizes in norm L_2 a descriptor. */
01196 void
01197 normalize_histogram(VL::float_t* L_begin, VL::float_t* L_end)
01198 {
01199   VL::float_t* L_iter ;
01200   VL::float_t norm = 0 ;
01201 
01202   for(L_iter = L_begin; L_iter != L_end ; ++L_iter)
01203     norm += (*L_iter) * (*L_iter) ;
01204 
01205   norm = fast_sqrt(norm) ;
01206 
01207   for(L_iter = L_begin; L_iter != L_end ; ++L_iter)
01208     *L_iter /= (norm + std::numeric_limits<VL::float_t>::epsilon() ) ;
01209 }
01210 
01211 }
01212 
01213 /** @brief SIFT descriptor
01214  **
01215  ** The function computes the descriptor of the keypoint @a keypoint.
01216  ** The function fills the buffer @a descr_pt which must be large
01217  ** enough. The funciton uses @a angle0 as rotation of the keypoint.
01218  ** By calling the function multiple times, different orientations can
01219  ** be evaluated.
01220  **
01221  ** @remark The function needs to compute the gradient modululs and
01222  ** orientation of the Gaussian scale space octave to which the
01223  ** keypoint belongs. The result is cached, but discarded if different
01224  ** octaves are visited. Thereofre it is much quicker to evaluate the
01225  ** keypoints in their natural octave order.
01226  **
01227  ** The function silently abort the computations of keypoints without
01228  ** the scale space boundaries. See also siftComputeOrientations().
01229  **/
01230 void
01231 Sift::computeKeypointDescriptor
01232 (VL::float_t* descr_pt,
01233  Keypoint keypoint, 
01234  VL::float_t angle0)
01235 {
01236 
01237   /* The SIFT descriptor is a  three dimensional histogram of the position
01238    * and orientation of the gradient.  There are NBP bins for each spatial
01239    * dimesions and NBO  bins for the orientation dimesion,  for a total of
01240    * NBP x NBP x NBO bins.
01241    *
01242    * The support  of each  spatial bin  has an extension  of SBP  = 3sigma
01243    * pixels, where sigma is the scale  of the keypoint.  Thus all the bins
01244    * together have a  support SBP x NBP pixels wide  . Since weighting and
01245    * interpolation of  pixel is used, another  half bin is  needed at both
01246    * ends of  the extension. Therefore, we  need a square window  of SBP x
01247    * (NBP + 1) pixels. Finally, since the patch can be arbitrarly rotated,
01248    * we need to consider  a window 2W += sqrt(2) x SBP  x (NBP + 1) pixels
01249    * wide.
01250    */      
01251 
01252   // octave
01253   int o = keypoint.o ;
01254   VL::float_t xperiod = getOctaveSamplingPeriod(o) ;
01255 
01256   // offsets to move in Gaussian scale space octave
01257   const int ow = getOctaveWidth(o) ;
01258   const int oh = getOctaveHeight(o) ;
01259   const int xo = 2 ;
01260   const int yo = xo * ow ;
01261   const int so = yo * oh ;
01262   
01263 //  std::cout << "Keypoint at (" << keypoint.x << "," << keypoint.y << "), " << keypoint.sigma << ", " << angle0 << " ";
01264 
01265   // keypoint fractional geometry
01266   VL::float_t x     = keypoint.x / xperiod;
01267   VL::float_t y     = keypoint.y / xperiod ;
01268   VL::float_t sigma = keypoint.sigma / xperiod ;
01269   
01270 //  std::cout << "transformed to (" << x << "," << y << "," << sigma << ")\n";
01271 
01272   VL::float_t st0   = sinf( angle0 ) ;
01273   VL::float_t ct0   = cosf( angle0 ) ;
01274   
01275   // shall we use keypoints.ix,iy,is here?
01276   int xi = ((int) (x+0.5)) ; 
01277   int yi = ((int) (y+0.5)) ;
01278   int si = keypoint.is ;
01279 
01280   // const VL::float_t magnif = 3.0f ;
01281   const int NBO = 8 ;
01282   const int NBP = 4 ;
01283   const VL::float_t SBP = magnif * sigma ;
01284   const int   W = (int) floor (sqrt(2.0) * SBP * (NBP + 1) / 2.0 + 0.5) ;
01285 //  std::cout << "W = " << W << ", SBP = " << SBP << ", magnif = " << magnif << "\n";
01286   
01287   /* Offsets to move in the descriptor. */
01288   /* Use Lowe's convention. */
01289   const int binto = 1 ;
01290   const int binyo = NBO * NBP ;
01291   const int binxo = NBO ;
01292   // const int bino  = NBO * NBP * NBP ;
01293   
01294   int bin ;
01295   
01296   // check bounds
01297   if(o  < omin   ||
01298      o  >=omin+O ||
01299      xi < 0      || 
01300      xi > ow-1   || 
01301      yi < 0      || 
01302      yi > oh-1   ||
01303      si < smin+1 ||
01304      si > smax-2 )
01305         return ;
01306   
01307   // make sure gradient buffer is up-to-date
01308   prepareGrad(o) ;
01309 
01310   std::fill( descr_pt, descr_pt + NBO*NBP*NBP, 0 ) ;
01311 
01312   /* Center the scale space and the descriptor on the current keypoint. 
01313    * Note that dpt is pointing to the bin of center (SBP/2,SBP/2,0).
01314    */
01315   pixel_t const * pt = temp + xi*xo + yi*yo + (si - smin - 1)*so ;
01316   VL::float_t *  dpt = descr_pt + (NBP/2) * binyo + (NBP/2) * binxo ;
01317      
01318 #define atd(dbinx,dbiny,dbint) *(dpt + (dbint)*binto + (dbiny)*binyo + (dbinx)*binxo)
01319       
01320   /*
01321    * Process pixels in the intersection of the image rectangle
01322    * (1,1)-(M-1,N-1) and the keypoint bounding box.
01323    */
01324   for(int dyi = std::max(-W, 1-yi) ; dyi <= std::min(+W, oh-2-yi) ; ++dyi) {
01325     for(int dxi = std::max(-W, 1-xi) ; dxi <= std::min(+W, ow-2-xi) ; ++dxi) {
01326       
01327       // retrieve 
01328       VL::float_t mod   = *( pt + dxi*xo + dyi*yo + 0 ) ;
01329       VL::float_t angle = *( pt + dxi*xo + dyi*yo + 1 ) ;
01330       VL::float_t theta = fast_mod_2pi(-angle + angle0) ; // lowe compatible ?
01331       
01332 //      std::cout << "\tpixel (" << (xi+dxi) << "," << (yi+dyi) << ") with length " << mod << " and angle " << theta << "\n";
01333       
01334       // fractional displacement
01335       VL::float_t dx = xi + dxi - x;
01336       VL::float_t dy = yi + dyi - y;
01337       
01338       // get the displacement normalized w.r.t. the keypoint
01339       // orientation and extension.
01340       VL::float_t nx = ( ct0 * dx + st0 * dy) / SBP ;
01341       VL::float_t ny = (-st0 * dx + ct0 * dy) / SBP ; 
01342       VL::float_t nt = NBO * theta / float(2*M_PI) ;
01343       
01344 //      std::cout << "\tnormalized to (" << (nx*SBP) << "," << (ny*SBP) << "," << nt << ")\n";
01345       
01346       // Get the gaussian weight of the sample. The gaussian window
01347       // has a standard deviation equal to NBP/2. Note that dx and dy
01348       // are in the normalized frame, so that -NBP/2 <= dx <= NBP/2.
01349       VL::float_t const wsigma = NBP/2 ;
01350       VL::float_t win = VL::fast_expn((nx*nx + ny*ny)/(2.0f * wsigma * wsigma)) ;
01351       
01352       // The sample will be distributed in 8 adjacent bins.
01353       // We start from the ``lower-left'' bin.
01354       int binx = fast_floor( nx - 0.5f ) ;
01355       int biny = fast_floor( ny - 0.5f ) ;
01356       int bint = fast_floor( nt ) ;
01357       VL::float_t rbinx = nx - (binx+0.5f) ;
01358       VL::float_t rbiny = ny - (biny+0.5f) ;
01359       VL::float_t rbint = nt - bint ;
01360       int dbinx ;
01361       int dbiny ;
01362       int dbint ;
01363 
01364       // Distribute the current sample into the 8 adjacent bins
01365       for(dbinx = 0 ; dbinx < 2 ; ++dbinx) {
01366         for(dbiny = 0 ; dbiny < 2 ; ++dbiny) {
01367           for(dbint = 0 ; dbint < 2 ; ++dbint) {
01368             
01369             if( binx+dbinx >= -(NBP/2) &&
01370                 binx+dbinx <   (NBP/2) &&
01371                 biny+dbiny >= -(NBP/2) &&
01372                 biny+dbiny <   (NBP/2) ) {
01373               VL::float_t weight = win 
01374                 * mod 
01375                 * fast_abs (1 - dbinx - rbinx)
01376                 * fast_abs (1 - dbiny - rbiny)
01377                 * fast_abs (1 - dbint - rbint) ;
01378               
01379 //              std::cout << "\t\tcontributes " << weight << " to bin (" << (binx+dbinx) << "," << (biny+dbiny) << "," << ((bint+dbint)%NBO) << ")\n";
01380               
01381               atd(binx+dbinx, biny+dbiny, (bint+dbint) % NBO) += weight ;
01382             }
01383           }            
01384         }
01385       }
01386     }  
01387   }
01388 
01389   /* Standard SIFT descriptors are normalized, truncated and normalized again */
01390   if( normalizeDescriptor ) {
01391 
01392     /* Normalize the histogram to L2 unit length. */        
01393     Detail::normalize_histogram(descr_pt, descr_pt + NBO*NBP*NBP) ;
01394     
01395     /* Truncate at 0.2. */
01396     for(bin = 0; bin < NBO*NBP*NBP ; ++bin) {
01397       if (descr_pt[bin] > 0.2f) descr_pt[bin] = 0.2f;
01398     }
01399     
01400     /* Normalize again. */
01401     Detail::normalize_histogram(descr_pt, descr_pt + NBO*NBP*NBP) ;
01402   }
01403 
01404 }
01405 
01406 // namespace VL
01407 }

Tekkotsu v5.1CVS
Generated Mon May 9 04:58:51 2016 by Doxygen 1.6.3