Gist
A Real-Time Audio Analysis Library
 All Classes Files Functions Pages
gist.h
Go to the documentation of this file.
1 //=======================================================================
22 //=======================================================================
23 
24 
25 #ifndef __GISTHEADER__
26 #define __GISTHEADER__
27 
28 //=======================================================================
29 // core
32 
33 // onset detection functions
35 
36 // pitch detection
37 #include "pitch/Yin.h"
38 
39 // MFCC
40 #include "mfcc/MFCC.h"
41 
42 // fft
43 #include "fftw3.h"
44 
45 //=======================================================================
47 template <class T>
48 class Gist
49 {
50 public:
51 
56  Gist(int frameSize_,int sampleRate_) :fftConfigured(false), onsetDetectionFunction(frameSize_), yin(sampleRate_), mfcc(frameSize_,sampleRate_)
57  {
58  setAudioFrameSize(frameSize_);
59  }
60 
63  {
64  if (fftConfigured)
65  {
66  freeFFT();
67  }
68  }
69 
73  void setAudioFrameSize(int frameSize_)
74  {
75  frameSize = frameSize_;
76 
77  audioFrame.resize(frameSize);
78  fftReal.resize(frameSize);
79  fftImag.resize(frameSize);
80  magnitudeSpectrum.resize(frameSize/2);
81 
82  configureFFT();
83 
84  onsetDetectionFunction.setFrameSize(frameSize);
85  mfcc.setFrameSize(frameSize);
86  }
87 
91  void processAudioFrame(std::vector<T> audioFrame_)
92  {
93  audioFrame = audioFrame_;
94 
95  performFFT();
96  }
97 
102  void processAudioFrame(T *buffer,unsigned long numSamples)
103  {
104  audioFrame.assign(buffer,buffer + numSamples);
105 
106  performFFT();
107  }
108 
111  std::vector<T> getMagnitudeSpectrum()
112  {
113  return magnitudeSpectrum;
114  }
115 
116  //================= CORE TIME DOMAIN FEATURES =================
117 
122  {
123  return coreTimeDomainFeatures.rootMeanSquare(audioFrame);
124  }
125 
130  {
131  return coreTimeDomainFeatures.peakEnergy(audioFrame);
132  }
133 
138  {
139  return coreTimeDomainFeatures.zeroCrossingRate(audioFrame);
140  }
141 
142  //=============== CORE FREQUENCY DOMAIN FEATURES ==============
143 
148  {
149  return coreFrequencyDomainFeatures.spectralCentroid(magnitudeSpectrum);
150  }
151 
156  {
157  return coreFrequencyDomainFeatures.spectralCrest(magnitudeSpectrum);
158  }
159 
164  {
165  return coreFrequencyDomainFeatures.spectralFlatness(magnitudeSpectrum);
166  }
167 
168  //================= ONSET DETECTION FUNCTIONS =================
169 
174  {
175  return onsetDetectionFunction.energyDifference(audioFrame);
176  }
177 
182  {
183  return onsetDetectionFunction.spectralDifference(magnitudeSpectrum);
184  }
185 
190  {
191  return onsetDetectionFunction.spectralDifferenceHWR(magnitudeSpectrum);
192  }
193 
198  {
199  return onsetDetectionFunction.complexSpectralDifference(fftReal,fftImag);
200  }
201 
206  {
207  return onsetDetectionFunction.highFrequencyContent(magnitudeSpectrum);
208  }
209 
210  //=========================== PITCH ============================
211 
216  {
217  return yin.pitchYin(audioFrame);
218  }
219 
220  //=========================== MFCCs =============================
221 
225  std::vector<T> melFrequencySpectrum()
226  {
227  return mfcc.melFrequencySpectrum(magnitudeSpectrum);
228  }
229 
234  {
235  return mfcc.melFrequencyCepstralCoefficients(magnitudeSpectrum);
236  }
237 
238 
239 private:
240 
241  //=======================================================================
242 
244  void configureFFT()
245  {
246  if (fftConfigured)
247  {
248  freeFFT();
249  }
250 
251  // initialise the fft time and frequency domain audio frame arrays
252  fftIn = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * frameSize); // complex array to hold fft data
253  fftOut = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * frameSize); // complex array to hold fft data
254 
255  // FFT plan initialisation
256  p = fftw_plan_dft_1d(frameSize, fftIn, fftOut, FFTW_FORWARD, FFTW_ESTIMATE);
257 
258  fftConfigured = true;
259  }
260 
262  void freeFFT()
263  {
264  // destroy fft plan
265  fftw_destroy_plan(p);
266 
267  fftw_free(fftIn);
268  fftw_free(fftOut);
269  }
270 
271 
273  void performFFT()
274  {
275  // copy samples from audio frame
276  for (int i = 0;i < frameSize;i++)
277  {
278  fftIn[i][0] = (double) audioFrame[i];
279  fftIn[i][1] = (double) 0.0;
280  }
281 
282  // perform the FFT
283  fftw_execute(p);
284 
285  // store real and imaginary parts of FFT
286  for (int i = 0;i < frameSize;i++)
287  {
288  fftReal[i] = (T) fftOut[i][0];
289  fftImag[i] = (T) fftOut[i][1];
290  }
291 
292  // calculate the magnitude spectrum
293  for (int i = 0;i < frameSize/2;i++)
294  {
295  magnitudeSpectrum[i] = sqrt(pow(fftReal[i],2) + pow(fftImag[i],2));
296  }
297 
298  }
299 
300  //=======================================================================
301 
302  fftw_plan p;
303  fftw_complex *fftIn;
304  fftw_complex *fftOut;
306  int frameSize;
308  std::vector<T> audioFrame;
309  std::vector<T> fftReal;
310  std::vector<T> fftImag;
311  std::vector<T> magnitudeSpectrum;
313  bool fftConfigured;
314 
316  CoreTimeDomainFeatures<T> coreTimeDomainFeatures;
317 
319  CoreFrequencyDomainFeatures<T> coreFrequencyDomainFeatures;
320 
322  OnsetDetectionFunction<T> onsetDetectionFunction;
323 
325  Yin<T> yin;
326 
328  MFCC<T> mfcc;
329 };
330 
331 
332 #endif
T pitchYin()
Calculates monophonic pitch according to the Yin algorithm.
Definition: gist.h:215
std::vector< T > getMagnitudeSpectrum()
Gist automatically calculates the magnitude spectrum when processAudioFrame() is called, this function returns it.
Definition: gist.h:111
Implementations of onset detection functions.
T spectralDifferenceHWR()
Calculates the complex spectral difference onset detection function sample for the magnitude spectrum...
Definition: gist.h:189
template class for the pitch detection algorithm Yin.
Definition: Yin.h:34
~Gist()
Destructor.
Definition: gist.h:62
std::vector< T > melFrequencyCepstralCoefficients()
Calculates Mel Frequency Cepstral Coefficients.
Definition: gist.h:233
void setAudioFrameSize(int frameSize_)
Set the audio frame size.
Definition: gist.h:73
T spectralCrest()
Calculates the spectral crest.
Definition: gist.h:155
T zeroCrossingRate()
Calculates the zero crossing rate of the currently stored audio frame.
Definition: gist.h:137
void processAudioFrame(std::vector< T > audioFrame_)
Process an audio frame.
Definition: gist.h:91
T energyDifference()
Calculates the energy difference onset detection function sample for the magnitude spectrum frame...
Definition: gist.h:173
template class for calculating common time domain audio features.
Definition: CoreTimeDomainFeatures.h:34
Implementation of the YIN pitch detection algorithm (de Cheveigné and Kawahara,2002) ...
T rootMeanSquare()
Calculates the root mean square (RMS) of the currently stored audio frame.
Definition: gist.h:121
T peakEnergy()
Calculates the peak energy of the currently stored audio frame.
Definition: gist.h:129
template class for calculating common frequency domain audio features.
Definition: CoreFrequencyDomainFeatures.h:35
void processAudioFrame(T *buffer, unsigned long numSamples)
Process an audio frame.
Definition: gist.h:102
T spectralCentroid()
Calculates the spectral centroid from the magnitude spectrum.
Definition: gist.h:147
T spectralFlatness()
Calculates the spectral flatness from the magnitude spectrum.
Definition: gist.h:163
Implementations of common time domain audio features.
std::vector< T > melFrequencySpectrum()
Calculates the Mel Frequency Spectrum.
Definition: gist.h:225
T spectralDifference()
Calculates the spectral difference onset detection function sample for the magnitude spectrum frame...
Definition: gist.h:181
Calculates Mel Frequency Cepstral Coefficients.
T complexSpectralDifference()
Calculates the complex spectral difference onset detection function sample for the magnitude spectrum...
Definition: gist.h:197
T highFrequencyContent()
Calculates the high frequency content onset detection function sample for the magnitude spectrum fram...
Definition: gist.h:205
Class for all performing all Gist audio analyses.
Definition: gist.h:48
Implementations of common frequency domain audio features.
Template class for calculating Mel Frequency Cepstral Coefficients Instantiations of the class should...
Definition: MFCC.h:35
Gist(int frameSize_, int sampleRate_)
Constructor.
Definition: gist.h:56
template class for calculating onset detection functions Instantiations of the class should be of eit...
Definition: OnsetDetectionFunction.h:36