示例#1
0
    def __init__(self):
        self.m_channels = 0
        self.m_stepSize = 256
        self.m_blockSize = 2048
        self.m_inputSampleRate = 44100
        self.m_fmin = 40
        self.m_fmax = 1600

        self.m_yin = Yin()

        self.m_threshDistr = 2.0
        self.m_outputUnvoiced = 2
        self.m_preciseTime = 0.0
        self.m_lowAmp = 0.1
        self.m_onsetSensitivity = 0.7
        self.m_pruneThresh = 0.1

        self.m_pitchProb = []
        self.m_level = np.array([], dtype=np.float32)

        self.fs = FeatureSet()
示例#2
0
    def __init__(self):
        self.m_channels = 0
        self.m_stepSize = 256
        self.m_blockSize = 2048
        self.m_inputSampleRate = 44100
        self.m_fmin = 40
        self.m_fmax = 1600

        self.m_yin = Yin()

        self.m_threshDistr = 2.0
        self.m_outputUnvoiced = 2
        self.m_preciseTime = 0.0
        self.m_lowAmp = 0.1
        self.m_onsetSensitivity = 0.7
        self.m_pruneThresh = 0.1

        self.m_pitchProb = []
        self.m_level = np.array([], dtype=np.float32)

        self.fs = FeatureSet()
示例#3
0
class PyinMain(object):

    def __init__(self):
        self.m_channels = 0
        self.m_stepSize = 256
        self.m_blockSize = 2048
        self.m_inputSampleRate = 44100
        self.m_fmin = 40
        self.m_fmax = 1600

        self.m_yin = Yin()

        self.m_threshDistr = 2.0
        self.m_outputUnvoiced = 2
        self.m_preciseTime = 0.0
        self.m_lowAmp = 0.1
        self.m_onsetSensitivity = 0.7
        self.m_pruneThresh = 0.1

        self.m_pitchProb = []
        self.m_level = np.array([], dtype=np.float32)

        self.fs = FeatureSet()

    def initialise(self, channels = 1, inputSampleRate = 44100, stepSize = 256, blockSize = 2048,
                   lowAmp = 0.1, onsetSensitivity = 0.7, pruneThresh = 0.1 ):

        if channels != 1:
            return False

        self.m_channels = channels
        self.m_inputSampleRate = inputSampleRate
        self.m_stepSize = stepSize
        self.m_blockSize = blockSize

        self.m_lowAmp = lowAmp
        self.m_onsetSensitivity = onsetSensitivity
        self.m_pruneThresh = pruneThresh

        self.reset()

        return True

    def reset(self):

        self.m_yin.setThresholdDistr(self.m_threshDistr)
        self.m_yin.setFrameSize(self.m_blockSize)
        self.m_yin.setFast(not self.m_preciseTime)

        self.m_pitchProb = np.array([], dtype=np.float64)
        self.m_level = np.array([], dtype=np.float32)

    def process(self, inputBuffers):
        '''
        inputBuffers is samples for one frames
        '''
        dInputBuffers = np.zeros((self.m_blockSize,), dtype=np.float64) # make sure it is zero-padded at end
        for i in range(self.m_blockSize):
            dInputBuffers[i] = inputBuffers[i]

        rms = RMS(inputBuffers, self.m_blockSize)

        isLowAmplitude = rms < self.m_lowAmp

        yo = self.m_yin.processProbabilisticYin(dInputBuffers)

        self.m_level = np.append(self.m_level, yo.rms)

        '''
        First, get the things out of the way that we don't want to output
        immediately, but instead save for later
        '''
        tempPitchProb = np.array([], dtype=np.float32)
        firstStack = False
        for iCandidate in range(yo.freqProb.shape[0]):
            tempPitch = 12.0 * log(yo.freqProb[iCandidate][0]/440.0)/log(2.0) + 69.0
            if not isLowAmplitude:
                if firstStack == False:
                    tempPitchProb = np.array([np.array([tempPitch, yo.freqProb[iCandidate][1]], dtype=np.float64),])
                    firstStack = True
                else:
                    tempPitchProb = np.vstack((tempPitchProb, np.array([tempPitch, yo.freqProb[iCandidate][1]], dtype=np.float64)))
            else:
                factor = ((rms+0.01*self.m_lowAmp)/(1.01*self.m_lowAmp))
                if firstStack == False:
                    tempPitchProb = np.array([np.array([tempPitch, yo.freqProb[iCandidate][1]*factor], dtype=np.float64),])
                    firstStack = True
                else:
                    tempPitchProb = np.vstack((tempPitchProb, np.array([tempPitch, yo.freqProb[iCandidate][1]*factor], dtype=np.float64)))
       
        if len(self.m_pitchProb) < 1 and len(tempPitchProb) > 0:
            self.m_pitchProb = [tempPitchProb,]
        elif len(self.m_pitchProb) >= 1:
            self.m_pitchProb = self.m_pitchProb + [tempPitchProb]

        # f0 CANDIDATES
        f = Feature()
        for i in range(yo.freqProb.shape[0]):
            f.values = np.append(f.values, yo.freqProb[i][0])
        self.fs.m_oF0Candidates.append(copy.copy(f))

        f.resetValues()
        voicedProb = 0.0
        for i in range(yo.freqProb.shape[0]):
            f.values = np.append(f.values, yo.freqProb[i][1])
            voicedProb += yo.freqProb[i][1]
        self.fs.m_oF0Probs.append(copy.copy(f))

        f.values = np.append(f.values, voicedProb)
        self.fs.m_oVoicedProb.append(copy.copy(f))

        # SALIENCE -- maybe this should eventually disappear
        f.resetValues()
        salienceSum = 0.0
        for iBin in range(yo.salience.shape[0]):
            f.values = np.append(f.values, yo.salience[iBin])
            salienceSum += yo.salience[iBin]
        self.fs.m_oCandidateSalience.append(copy.copy(f))

        return self.fs

    def decodePitchTrack(self):
        '''
        1. decode pitch with Viterbi
        
        '''
        

        if len(self.m_pitchProb) == 0:
            return self.fs

        # MONO-PITCH STUFF
        mp = MonoPitch()
        mpOut = mp.process(self.m_pitchProb)
        
        return mpOut
   
    def setDecodedMonoPitch(self, mpOut):
        '''
        store monoPitch mpOut in the field self.fs.m_oSmoothedPitchTrack 
        '''
        f = Feature()
        for iFrame in range(len(mpOut)):
            if mpOut[iFrame] < 0 and self.m_outputUnvoiced == 0: # skip unvoiced frames, if not desired to output them
                continue
            f.resetValues()
            if self.m_outputUnvoiced == 1:
                f.values = np.append(f.values, np.fabs(mpOut[iFrame])) #  absolute value of unvoiced if desired to output them 
            else:
                f.values = np.append(f.values, mpOut[iFrame])
    
            self.fs.m_oSmoothedPitchTrack.append(copy.copy(f))
    

    def segment_notes(self, pitch_contour, with_bar_positions, bar_position_ts, bar_labels, hop_time, usul_type):
        '''
        decode note states using MonoNote probabilistic model
        
        
        Parameters
        ----------------------
        pitch_contour :
            only pitch values
            
        with_bar_positions: bool
            metrical-accent aware detection set
        
        Returns
        -----------------------
        feature set:
            updated with  m_oMonoNoteOut: array of FrameOutput
        MIDI_pitch_contour:
              midi pitch
        '''
        
        MIDI_pitch_contour_and_prob = np.zeros((len(pitch_contour),2)) 
        MIDI_pitch_contour_and_prob[:,0] = pitch_contour
        
        if len(pitch_contour) == 0:
            return self.fs


        ############ convert to MIDI scale
        mn = MonoNote(STEPS_PER_SEMITONE, NUM_SEMITONES, with_bar_positions, hop_time, usul_type) # if frame_beat_annos is set, use bar-position dependent annotation   
        
#         import matplotlib.pyplot as plt
#         plt.plot(mn.hmm.transProbs[0,1][30*75 + 5: 31*75 +5])
#         plt.show()

        for iFrame in range(len(pitch_contour)):
            if pitch_contour[iFrame] > 0:  # zero or negative value (silence) remains with 0 probability and negative frequency in Herz
                MIDI_pitch_contour_and_prob[iFrame][0] = 12 * log(pitch_contour[iFrame]/440.0)/log(2.0) + 69
                MIDI_pitch_contour_and_prob[iFrame][1] = PITCH_PROB # constant voicing probability = 0.9
        
        mnOut = mn.process(MIDI_pitch_contour_and_prob, bar_position_ts, bar_labels, hop_time) # decode note states Viterbi

        self.fs.m_oMonoNoteOut = mnOut # array of FrameOutput 
        return self.fs, MIDI_pitch_contour_and_prob[:,0] 

    def postprocessPitchTracks(self, MIDI_pitch_contour, mnOut, with_same_pitch_onsets):        
        '''
        
        postprocessing of MIDI_pitch noteStatesToPitch
        1. filter detected onsets and store them as fields in  self.fs.onsetFrames
        2.  filter also MIDI_pitch tracks per note (notePitchTracks) and median pitches
        
        Parameters
        --------------------------
        MIDI_pitch_contour:
            pitch contour in MIDI 
        mnOut: array of FrameOutput 
            decoded note states
            
    
        
        '''
        f = Feature()
        f.resetValues()
        
        self.fs.onsetFrames = [] #  onsetFrames where there is change from state 3 to 1 
        isVoiced = 0
        oldIsVoiced = 0
        nFrame = len(MIDI_pitch_contour)
        
        minNoteFrames = (self.m_inputSampleRate*self.m_pruneThresh)/self.m_stepSize # minimum number of frames  per note
        
        notePitchTrack = np.array([], dtype=np.float32) # collects pitches for one note at a time
        

                
        for iFrame in range(nFrame):
                        
            isVoiced = mnOut[iFrame].noteState < 3 and MIDI_pitch_contour[iFrame] > 0
            
            is_samepitch_onset = False
            if with_same_pitch_onsets: 
                is_samepitch_onset = (iFrame >= nFrame-3 ) \
                    or (self.m_level[iFrame]/self.m_level[iFrame+2]>self.m_onsetSensitivity) # onset at same pitch if pitch amplitude changes above a threshold 
#                 isVoiced = isVoiced and is_samepitch_onset
                is_samepitch_onset = isVoiced and is_samepitch_onset # REPLACED
            
            
#             if isVoiced and iFrame != nFrame-1: # sanity check
#                 if oldIsVoiced == 0: # set onset at non-voiced-to-voiced transition
#                     self.fs.onsetFrames.append( iFrame )
            
            if isVoiced and iFrame != nFrame-1: # sanity check                      # REPLACED
                if oldIsVoiced == 0 or is_samepitch_onset: # set onset at non-voiced-to-voiced transition
                    self.fs.onsetFrames.append( iFrame )
                    
            
                MIDI_pitch = MIDI_pitch_contour[iFrame]
                notePitchTrack = np.append(notePitchTrack, MIDI_pitch) # add to the note's MIDI_pitch
                
            else: # not currently voiced
                if oldIsVoiced == 1: # end of the note
                    if len(notePitchTrack) >= minNoteFrames:

                        notePitchTrack = np.sort(notePitchTrack) # what is this?
                        self.fs.m_oNotePitchTracks.append(copy.copy(notePitchTrack)) # store current note pitch track 
                        
                        medianPitch = notePitchTrack[int(len(notePitchTrack)/2)]
                        medianFreq = pow(2, (medianPitch-69)/12)*440
                        f.resetValues()
                        f.values = np.append(f.values, np.double(medianFreq))
                        self.fs.m_oNotes.append(copy.copy(f)) # store median frequency per note. NOT EFFICIENT

                    
                    notePitchTrack = np.array([], dtype=np.float32) # new note starts
            oldIsVoiced = isVoiced

        return self.fs
示例#4
0
class PyinMain(object):
    def __init__(self):
        self.m_channels = 0
        self.m_stepSize = 256
        self.m_blockSize = 2048
        self.m_inputSampleRate = 44100
        self.m_fmin = 40
        self.m_fmax = 1600

        self.m_yin = Yin()

        self.m_threshDistr = 2.0
        self.m_outputUnvoiced = 2
        self.m_preciseTime = 0.0
        self.m_lowAmp = 0.1
        self.m_onsetSensitivity = 0.7
        self.m_pruneThresh = 0.1

        self.m_pitchProb = []
        self.m_level = np.array([], dtype=np.float32)

        self.fs = FeatureSet()

    def initialise(self,
                   channels=1,
                   inputSampleRate=44100,
                   stepSize=256,
                   blockSize=2048,
                   lowAmp=0.1,
                   onsetSensitivity=0.7,
                   pruneThresh=0.1):

        if channels != 1:
            return False

        self.m_channels = channels
        self.m_inputSampleRate = inputSampleRate
        self.m_stepSize = stepSize
        self.m_blockSize = blockSize

        self.m_lowAmp = lowAmp
        self.m_onsetSensitivity = onsetSensitivity
        self.m_pruneThresh = pruneThresh

        self.reset()

        return True

    def reset(self):

        self.m_yin.setThresholdDistr(self.m_threshDistr)
        self.m_yin.setFrameSize(self.m_blockSize)
        self.m_yin.setFast(not self.m_preciseTime)

        self.m_pitchProb = np.array([], dtype=np.float64)
        self.m_level = np.array([], dtype=np.float32)

    def process(self, inputBuffers):

        dInputBuffers = np.zeros((self.m_blockSize, ), dtype=np.float64)
        for i in range(self.m_blockSize):
            dInputBuffers[i] = inputBuffers[i]

        rms = RMS(inputBuffers, self.m_blockSize)

        isLowAmplitude = rms < self.m_lowAmp

        yo = self.m_yin.processProbabilisticYin(dInputBuffers)

        self.m_level = np.append(self.m_level, yo.rms)
        '''
        First, get the things out of the way that we don't want to output
        immediately, but instead save for later
        '''
        tempPitchProb = np.array([], dtype=np.float32)
        firstStack = False
        for iCandidate in range(yo.freqProb.shape[0]):
            tempPitch = 12.0 * log(
                yo.freqProb[iCandidate][0] / 440.0) / log(2.0) + 69.0
            if not isLowAmplitude:
                if firstStack == False:
                    tempPitchProb = np.array([
                        np.array([tempPitch, yo.freqProb[iCandidate][1]],
                                 dtype=np.float64),
                    ])
                    firstStack = True
                else:
                    tempPitchProb = np.vstack(
                        (tempPitchProb,
                         np.array([tempPitch, yo.freqProb[iCandidate][1]],
                                  dtype=np.float64)))
            else:
                factor = ((rms + 0.01 * self.m_lowAmp) /
                          (1.01 * self.m_lowAmp))
                if firstStack == False:
                    tempPitchProb = np.array([
                        np.array(
                            [tempPitch, yo.freqProb[iCandidate][1] * factor],
                            dtype=np.float64),
                    ])
                    firstStack = True
                else:
                    tempPitchProb = np.vstack(
                        (tempPitchProb,
                         np.array(
                             [tempPitch, yo.freqProb[iCandidate][1] * factor],
                             dtype=np.float64)))
        if len(self.m_pitchProb) < 1 and len(tempPitchProb) > 0:
            self.m_pitchProb = [
                tempPitchProb,
            ]
        elif len(self.m_pitchProb) >= 1:
            self.m_pitchProb = self.m_pitchProb + [tempPitchProb]

        # f0 CANDIDATES
        f = Feature()
        for i in range(yo.freqProb.shape[0]):
            f.values = np.append(f.values, yo.freqProb[i][0])
        self.fs.m_oF0Candidates.append(copy.copy(f))

        f.resetValues()
        voicedProb = 0.0
        for i in range(yo.freqProb.shape[0]):
            f.values = np.append(f.values, yo.freqProb[i][1])
            voicedProb += yo.freqProb[i][1]
        self.fs.m_oF0Probs.append(copy.copy(f))

        f.values = np.append(f.values, voicedProb)
        self.fs.m_oVoicedProb.append(copy.copy(f))

        # SALIENCE -- maybe this should eventually disappear
        f.resetValues()
        salienceSum = 0.0
        for iBin in range(yo.salience.shape[0]):
            f.values = np.append(f.values, yo.salience[iBin])
            salienceSum += yo.salience[iBin]
        self.fs.m_oCandidateSalience.append(copy.copy(f))

        return self.fs

    def getSmoothedPitchTrack(self):
        f = Feature()

        if len(self.m_pitchProb) == 0:
            return self.fs

        # MONO-PITCH STUFF
        mp = MonoPitch()
        mpOut = mp.process(self.m_pitchProb)
        for iFrame in range(len(mpOut)):
            if mpOut[iFrame] < 0 and self.m_outputUnvoiced == 0:
                continue
            f.resetValues()
            if self.m_outputUnvoiced == 1:
                f.values = np.append(f.values, np.fabs(mpOut[iFrame]))
            else:
                f.values = np.append(f.values, mpOut[iFrame])

            self.fs.m_oSmoothedPitchTrack.append(copy.copy(f))

        return mpOut

    def getRemainingFeatures(self, mpOut):
        f = Feature()

        if len(mpOut) == 0:
            return self.fs

        # if len(self.m_pitchProb) == 0:
        #     return self.fs
        #
        # # MONO-PITCH STUFF
        # mp = MonoPitch()
        # mpOut = mp.process(self.m_pitchProb)
        # for iFrame in range(len(mpOut)):
        #     if mpOut[iFrame] < 0 and self.m_outputUnvoiced == 0:
        #         continue
        #     f.resetValues()
        #     if self.m_outputUnvoiced == 1:
        #         f.values = np.append(f.values, np.fabs(mpOut[iFrame]))
        #     else:
        #         f.values = np.append(f.values, mpOut[iFrame])
        #
        #     self.fs.m_oSmoothedPitchTrack.append(copy.copy(f))

        # MONO-NOTE STUFF
        mn = MonoNote()
        smoothedPitch = []
        for iFrame in range(len(mpOut)):
            temp = []
            if mpOut[iFrame] > 0:  # negative value: silence
                tempPitch = 12 * log(mpOut[iFrame] / 440.0) / log(2.0) + 69
                temp += [[tempPitch, 0.9]]
            smoothedPitch += [temp]

        mnOut = mn.process(smoothedPitch)

        self.fs.m_oMonoNoteOut = mnOut

        # turning feature into a note feature

        f.resetValues()

        onsetFrame = 0
        isVoiced = 0
        oldIsVoiced = 0
        nFrame = len(self.m_pitchProb)

        minNoteFrames = (self.m_inputSampleRate *
                         self.m_pruneThresh) / self.m_stepSize

        notePitchTrack = np.array(
            [], dtype=np.float32)  # collects pitches for one note at a time
        for iFrame in range(nFrame):
            isVoiced = mnOut[iFrame].noteState < 3 \
            and len(smoothedPitch[iFrame]) > 0 \
            and (iFrame >= nFrame-2 or (self.m_level[iFrame]/self.m_level[iFrame+2]>self.m_onsetSensitivity))

            if isVoiced and iFrame != nFrame - 1:
                if oldIsVoiced == 0:  # beginning of the note
                    onsetFrame = iFrame
                pitch = smoothedPitch[iFrame][0][0]
                notePitchTrack = np.append(notePitchTrack,
                                           pitch)  # add to the note's pitch
            else:  # not currently voiced
                if oldIsVoiced == 1:  # end of the note
                    if len(notePitchTrack) >= minNoteFrames:
                        notePitchTrack = np.sort(notePitchTrack)
                        medianPitch = notePitchTrack[int(
                            len(notePitchTrack) / 2)]
                        medianFreq = pow(2, (medianPitch - 69) / 12) * 440
                        f.resetValues()
                        f.values = np.append(f.values, np.double(medianFreq))
                        self.fs.m_oNotes.append(copy.copy(f))
                        self.fs.m_oNotePitchTracks.append(
                            copy.copy(notePitchTrack))
                    notePitchTrack = np.array([], dtype=np.float32)
            oldIsVoiced = isVoiced

        return self.fs
示例#5
0
class PyinMain(object):

    def __init__(self):
        self.m_channels = 0
        self.m_stepSize = 256
        self.m_blockSize = 2048
        self.m_inputSampleRate = 44100
        self.m_fmin = 40
        self.m_fmax = 1600

        self.m_yin = Yin()

        self.m_threshDistr = 2.0
        self.m_outputUnvoiced = 2
        self.m_preciseTime = 0.0
        self.m_lowAmp = 0.1
        self.m_onsetSensitivity = 0.7
        self.m_pruneThresh = 0.1

        self.m_pitchProb = []
        self.m_level = np.array([], dtype=np.float32)

        self.fs = FeatureSet()

    def initialise(self, channels = 1, inputSampleRate = 44100, stepSize = 256, blockSize = 2048,
                   lowAmp = 0.1, onsetSensitivity = 0.7, pruneThresh = 0.1 ):

        if channels != 1:
            return False

        self.m_channels = channels
        self.m_inputSampleRate = inputSampleRate
        self.m_stepSize = stepSize
        self.m_blockSize = blockSize

        self.m_lowAmp = lowAmp
        self.m_onsetSensitivity = onsetSensitivity
        self.m_pruneThresh = pruneThresh

        self.reset()

        return True

    def reset(self):

        self.m_yin.setThresholdDistr(self.m_threshDistr)
        self.m_yin.setFrameSize(self.m_blockSize)
        self.m_yin.setFast(not self.m_preciseTime)

        self.m_pitchProb = np.array([], dtype=np.float64)
        self.m_level = np.array([], dtype=np.float32)

    def process(self, inputBuffers):

        dInputBuffers = np.zeros((self.m_blockSize,), dtype=np.float64)
        for i in range(self.m_blockSize):
            dInputBuffers[i] = inputBuffers[i]

        rms = RMS(inputBuffers, self.m_blockSize)

        isLowAmplitude = rms < self.m_lowAmp

        yo = self.m_yin.processProbabilisticYin(dInputBuffers)

        self.m_level = np.append(self.m_level, yo.rms)

        '''
        First, get the things out of the way that we don't want to output
        immediately, but instead save for later
        '''
        tempPitchProb = np.array([], dtype=np.float32)
        firstStack = False
        for iCandidate in range(yo.freqProb.shape[0]):
            tempPitch = 12.0 * log(yo.freqProb[iCandidate][0]/440.0)/log(2.0) + 69.0
            if not isLowAmplitude:
                if firstStack == False:
                    tempPitchProb = np.array([np.array([tempPitch, yo.freqProb[iCandidate][1]], dtype=np.float64),])
                    firstStack = True
                else:
                    tempPitchProb = np.vstack((tempPitchProb, np.array([tempPitch, yo.freqProb[iCandidate][1]], dtype=np.float64)))
            else:
                factor = ((rms+0.01*self.m_lowAmp)/(1.01*self.m_lowAmp))
                if firstStack == False:
                    tempPitchProb = np.array([np.array([tempPitch, yo.freqProb[iCandidate][1]*factor], dtype=np.float64),])
                    firstStack = True
                else:
                    tempPitchProb = np.vstack((tempPitchProb, np.array([tempPitch, yo.freqProb[iCandidate][1]*factor], dtype=np.float64)))
        if len(self.m_pitchProb) < 1 and len(tempPitchProb) > 0:
            self.m_pitchProb = [tempPitchProb,]
        elif len(self.m_pitchProb) >= 1:
            self.m_pitchProb = self.m_pitchProb + [tempPitchProb]

        # f0 CANDIDATES
        f = Feature()
        for i in range(yo.freqProb.shape[0]):
            f.values = np.append(f.values, yo.freqProb[i][0])
        self.fs.m_oF0Candidates.append(copy.copy(f))

        f.resetValues()
        voicedProb = 0.0
        for i in range(yo.freqProb.shape[0]):
            f.values = np.append(f.values, yo.freqProb[i][1])
            voicedProb += yo.freqProb[i][1]
        self.fs.m_oF0Probs.append(copy.copy(f))

        f.values = np.append(f.values, voicedProb)
        self.fs.m_oVoicedProb.append(copy.copy(f))

        # SALIENCE -- maybe this should eventually disappear
        f.resetValues()
        salienceSum = 0.0
        for iBin in range(yo.salience.shape[0]):
            f.values = np.append(f.values, yo.salience[iBin])
            salienceSum += yo.salience[iBin]
        self.fs.m_oCandidateSalience.append(copy.copy(f))

        return self.fs

    def getSmoothedPitchTrack(self):
        f = Feature()

        if len(self.m_pitchProb) == 0:
            return self.fs

        # MONO-PITCH STUFF
        mp = MonoPitch()
        mpOut = mp.process(self.m_pitchProb)
        for iFrame in range(len(mpOut)):
            if mpOut[iFrame] < 0 and self.m_outputUnvoiced == 0:
                continue
            f.resetValues()
            if self.m_outputUnvoiced == 1:
                f.values = np.append(f.values, np.fabs(mpOut[iFrame]))
            else:
                f.values = np.append(f.values, mpOut[iFrame])

            self.fs.m_oSmoothedPitchTrack.append(copy.copy(f))

        return mpOut

    def getRemainingFeatures(self,mpOut):
        f = Feature()

        if len(mpOut) == 0:
            return self.fs

        # if len(self.m_pitchProb) == 0:
        #     return self.fs
        #
        # # MONO-PITCH STUFF
        # mp = MonoPitch()
        # mpOut = mp.process(self.m_pitchProb)
        # for iFrame in range(len(mpOut)):
        #     if mpOut[iFrame] < 0 and self.m_outputUnvoiced == 0:
        #         continue
        #     f.resetValues()
        #     if self.m_outputUnvoiced == 1:
        #         f.values = np.append(f.values, np.fabs(mpOut[iFrame]))
        #     else:
        #         f.values = np.append(f.values, mpOut[iFrame])
        #
        #     self.fs.m_oSmoothedPitchTrack.append(copy.copy(f))

        # MONO-NOTE STUFF
        mn = MonoNote()
        smoothedPitch = []
        for iFrame in range(len(mpOut)):
            temp = []
            if mpOut[iFrame] > 0:  # negative value: silence
                tempPitch = 12 * log(mpOut[iFrame]/440.0)/log(2.0) + 69
                temp += [[tempPitch, 0.9]]
            smoothedPitch += [temp]

        mnOut = mn.process(smoothedPitch)

        self.fs.m_oMonoNoteOut = mnOut

        # turning feature into a note feature

        f.resetValues()

        onsetFrame = 0
        isVoiced = 0
        oldIsVoiced = 0
        nFrame = len(self.m_pitchProb)

        minNoteFrames = (self.m_inputSampleRate*self.m_pruneThresh)/self.m_stepSize

        notePitchTrack = np.array([], dtype=np.float32) # collects pitches for one note at a time
        for iFrame in range(nFrame):
            isVoiced = mnOut[iFrame].noteState < 3 \
            and len(smoothedPitch[iFrame]) > 0 \
            and (iFrame >= nFrame-2 or (self.m_level[iFrame]/self.m_level[iFrame+2]>self.m_onsetSensitivity))

            if isVoiced and iFrame != nFrame-1:
                if oldIsVoiced == 0: # beginning of the note
                    onsetFrame = iFrame
                pitch = smoothedPitch[iFrame][0][0]
                notePitchTrack = np.append(notePitchTrack, pitch) # add to the note's pitch
            else: # not currently voiced
                if oldIsVoiced == 1: # end of the note
                    if len(notePitchTrack) >= minNoteFrames:
                        notePitchTrack = np.sort(notePitchTrack)
                        medianPitch = notePitchTrack[int(len(notePitchTrack)/2)]
                        medianFreq = pow(2, (medianPitch-69)/12)*440
                        f.resetValues()
                        f.values = np.append(f.values, np.double(medianFreq))
                        self.fs.m_oNotes.append(copy.copy(f))
                        self.fs.m_oNotePitchTracks.append(copy.copy(notePitchTrack))
                    notePitchTrack = np.array([], dtype=np.float32)
            oldIsVoiced = isVoiced

        return self.fs