Tekkotsu Homepage
Demos
Overview
Downloads
Dev. Resources
Reference
Credits

PitchDetector.cc

Go to the documentation of this file.
00001 #include "PitchDetector.h"
00002 #include "Events/EventRouter.h"
00003 #include "Events/EventBase.h"
00004 #include "Events/DataEvent.h"
00005 #include "Events/PitchEvent.h"
00006 #include "Shared/newmat/newmatap.h"
00007 #include "Shared/Config.h"
00008 #include "Shared/debuget.h"
00009 
00010 #include "Shared/ODataFormats.h"
00011 #ifdef PLATFORM_APERIOS
00012 #  include "OPENR/OPENRAPI.h"
00013 #endif
00014 
00015 REGISTER_BEHAVIOR_MENU_OPT(PitchDetector,"Background Behaviors/System Daemons",BEH_NONEXCLUSIVE|BEH_START);
00016 
00017 using namespace std; 
00018 
00019 const unsigned int PitchDetector::fft_frames = 4; // number frames to use for fft
00020 const unsigned int PitchDetector::num_pitches = 60; // 5 octaves
00021 const float PitchDetector::base_pitch = 110.0f; // two octaves below tuning A (440)
00022 const float PitchDetector::half_step = 1.0594630943593f; // twelfth root of two
00023 const float PitchDetector::sqrt_2_pi = 2.506628274631f; // \sqrt{2\pi}
00024 
00025 PitchDetector::~PitchDetector() {
00026   ASSERT(pitch_info==NULL,"pitch_info wasn't deleted before destructor");
00027   ASSERT(pitch_bin==NULL,"pitch_bin wasn't deleted before destructor");
00028 }
00029 
00030 void PitchDetector::doStart() {
00031   EventGeneratorBase::doStart(); // do this first (required)
00032   
00033   ASSERT(pitch_info==NULL,"pitch_info was already allocated?");
00034   ASSERT(pitch_bin==NULL,"pitch_bin was already allocated?");
00035   
00036   pitch_info = new PitchInfo[num_pitches];
00037   for (unsigned int i = 0; i != num_pitches; ++i) {
00038     float freq = base_pitch * powf(half_step, i);
00039     pitch_info[i].freq = freq;
00040     pitch_info[i].sigma = sqrtf((freq * half_step - freq) / 0.5f);
00041     pitch_info[i].duration = 0;
00042   }
00043 
00044   // doEvent will initialize before use
00045   pitch_bin = new float[num_pitches];
00046 
00047   cur_frame = 0;
00048   have_fft = false;
00049   //printf("writing to file..\n");
00050   //fprintf(fft_file, "\n======starting fft collection=======\n");
00051   //printf("done writing to file..\n");
00052 }
00053 
00054 void PitchDetector::doStop() {
00055   //fclose(fft_file);
00056   if(pitch_info!=NULL) {
00057     delete [] pitch_info;
00058     pitch_info=NULL;
00059   }
00060   if(pitch_bin!=NULL) {
00061     delete [] pitch_bin;
00062     pitch_bin=NULL;
00063   }
00064   EventGeneratorBase::doStop(); // do this last (required)
00065 }
00066 
00067 void PitchDetector::doEvent() {
00068   if( event->getGeneratorID() != EventBase::micOSndEGID)
00069     return;
00070   
00071   // Get to the sound buffer
00072   // getData() is not specified for const data
00073   unsigned int i, j;
00074   const DataEvent<const OSoundVectorData*> *de = reinterpret_cast<const DataEvent<const OSoundVectorData*>*>( &event);
00075   
00076   OSoundVectorData *svd = const_cast<OSoundVectorData*>(de->getData());
00077   const short *d = ( const short *)svd->GetData(0);
00078   
00079   if ( ! frame_sz ) { /* we need to initialize _everything_ */
00080     //printf("building vectors for first time..\n");
00081     frame_sz = svd->GetInfo(0)->frameSize;
00082     rate = svd->GetInfo(0)->samplingRate;
00083     //printf("frame_sz %d, rate %d\n",frame_sz,rate);
00084     win_sz = frame_sz * fft_frames;
00085     
00086     left.ReSize(win_sz);
00087     right.ReSize(win_sz);
00088     iml.ReSize(win_sz / 2 + 1);
00089     imr.ReSize(win_sz / 2 + 1);
00090     rel.ReSize(win_sz / 2 + 1);
00091     rer.ReSize(win_sz / 2 + 1);
00092     pol.ReSize(win_sz / 2 + 1);
00093     por.ReSize(win_sz / 2 + 1);
00094     po.ReSize(win_sz / 2 + 1);
00095   }
00096   
00097   //printf("saving audio data to vectors [%u]..\n", cur_frame);
00098   for (i = 0; i != frame_sz; ++i) {
00099     left ((cur_frame * frame_sz) + i + 1) = d[(i<<1)  ];
00100     right((cur_frame * frame_sz) + i + 1) = d[(i<<1)+1];
00101   }
00102   
00103   if (++cur_frame == fft_frames) {
00104     cur_frame = 0;
00105     
00106     hamming(left);
00107     hamming(right);
00108     
00109     //printf("calling fft!\n");
00110     NEWMAT::RealFFT(left, rel, iml);
00111     NEWMAT::RealFFT(right, rer, imr);
00112     for (i = 1; i <= win_sz / 2 + 1; ++i) {
00113       NEWMAT::Real a, b;
00114       a = rel(i);
00115       b = iml(i);
00116       pol(i) = sqrtf(a*a + b*b);
00117       a = rer(i);
00118       b = imr(i);
00119       por(i) = sqrtf(a*a + b*b);
00120       //based on whether stereo info actually used, remove all
00121       //stereo separation altogether or merge even later..
00122       po(i) = (pol(i) + por(i)) / 2;
00123       
00124       //fprintf(fft_file, "[frequency %f] pow[%d] = %f\n", rate * i * 1.0 / win_sz, i, po(i));
00125     }
00126     have_fft = true;
00127     
00128   } else if (cur_frame == 1 && have_fft) { //hack to split processing..
00129     float mean = 0.0f;
00130     unsigned int max = 0; //if we see this value twice.. oops
00131     //turbo slow for now..
00132     //printf("building pitch bins!\n");
00133     local_maxes = 0;
00134     for (i = 0; i != num_pitches; ++i) {
00135       float sigma = pitch_info[i].sigma;
00136       float freq = pitch_info[i].freq;
00137       float bin = 0.0f;
00138       
00139       for (j = 1; j <= win_sz / 2 + 1; ++j)
00140         bin += po(j) * gaussian_pdf(j * rate * 1.f / win_sz, sigma, freq);
00141       mean += (pitch_bin[i] = bin);
00142       
00143       
00144       //prep for global max check
00145       max = (bin > pitch_bin[max]) ? i : max;
00146       
00147       //check if prev a local max
00148       pitch_info[i].local_max = pitch_info[i].global_max = 0.0f;
00149       if (i == 1) {
00150         float prev = pitch_bin[i-1];
00151         if (bin < prev) {
00152           pitch_info[0].local_max = 1.0f - (bin / prev);
00153           ++local_maxes;
00154         }
00155       } else if (i > 1) {
00156         float a = pitch_bin[i-2], b = pitch_bin[i-1], c = pitch_bin[i];
00157         if (b > a && b > c) {
00158           pitch_info[i-1].local_max = 1.0f - (a + c) / (2.f * b);
00159           ++local_maxes;
00160         }
00161       }
00162       if (i == num_pitches - 1) { //intentionally not else-if !
00163         float prev = pitch_bin[i - 1];
00164         if (bin > prev) {
00165           pitch_info[i].local_max = 1.0f - (prev / bin);
00166           ++local_maxes;
00167         }
00168       }
00169     }
00170     mean /= num_pitches;
00171     
00172     pitch_info[max].global_max = 1.0f - mean / pitch_bin[max];
00173     
00174     //final pass through on whether this is a pitch or not.
00175     //compute overtone properties and confidence..
00176     for (i = 0; i < num_pitches; ++i) {
00177       float c, f = 1.0f;
00178       if (i % 4 && is_pitch(confidence(i/4, pitch_bin[i/4])))
00179         f /= 2.0f;
00180       if (i % 3 && is_pitch(confidence(i/3, pitch_bin[i/3])))
00181         f /= 2.0f;
00182       if (i % 3 && is_pitch(confidence(i*2/3, pitch_bin[i*2/3])))
00183         f /= 2.0f;
00184       if (i % 2 && is_pitch(confidence(i/2, pitch_bin[i/2])))
00185         f /= 2.0f;
00186       pitch_info[i].overtone = 1.0f - f;
00187       pitch_info[i].confidence = (c = confidence(i, pitch_bin[i]));
00188       
00189       if (is_pitch(c)) {
00190         //printf("pitch number %u, frequency %f, name %s, confidence %f went on\nstrength %f gmax %f lmax %f otone %f lmaxes %u\n", i, pitch_info[i].freq, pitch_name(i), c, pitch_bin[i],pitch_info[i].global_max, pitch_info[i].local_max, pitch_info[i].overtone, local_maxes);
00191         EventBase::EventTypeID_t type = ( ! pitch_info[i].duration ) ? EventBase::activateETID : EventBase::statusETID;
00192         pitch_info[i].amplitude = (pitch_info[i].amplitude*pitch_info[i].duration + pitch_bin[i]) / (pitch_info[i].duration + 1);
00193         ++pitch_info[i].duration;
00194         erouter->postEvent(PitchEvent(reinterpret_cast<size_t>(this), type, pitch_info[i].freq, pitch_name(i), pitch_bin[i], pitch_info[i].duration*win_sz*1000/rate, c));
00195       } else {
00196         if (pitch_info[i].duration) {
00197           //printf("pitch number %u, frequency %f, name %s, confidence %f, duration %u went off \n",i, pitch_info[i].freq, pitch_name(i), c, pitch_info[i].duration);
00198           erouter->postEvent(PitchEvent(reinterpret_cast<size_t>(this), EventBase::deactivateETID,pitch_info[i].freq, pitch_name(i),pitch_info[i].amplitude,pitch_info[i].duration*win_sz*1000/rate,c));
00199           pitch_info[i].duration = 0;
00200           pitch_info[i].amplitude = 0;
00201         }
00202       }
00203       //fprintf(fft_file, "pitch %d freq %f name %s amp %f dur %u\n\tgmax %f lmax %f otone %f lmaxes %u confidence %f\n",i, pitch_info[i].freq, pitch_name(i),pitch_bin[i], pitch_info[i].duration,pitch_info[i].global_max, pitch_info[i].local_max,pitch_info[i].overtone, local_maxes, c);
00204     }
00205   }
00206   //printf("done with mic event in class Pitch\n");
00207 }
00208 
00209 bool PitchDetector::is_pitch(float conf) {
00210   return (conf >= config->sound.pitchConfidenceThreshold);
00211 }
00212 
00213 
00214 
00215 /*! @file
00216  * @brief Implements PitchDetector, which generates a PitchEvent whenever a notable frequency is detected using FFT
00217  * @author Matus Telgarsky and Jonah Sherman (Creators)
00218  * @author Ethan Tira-Thompson (imported into framework)
00219  *
00220  * Originally written as a part of a final project at Carnegie Mellon (15-494 Cognitive Robotics, Spring 2006)
00221  */

Tekkotsu v5.1CVS
Generated Mon May 9 04:58:46 2016 by Doxygen 1.6.3