Пример #1
0
def test_norm_amp(base_filenames):
    for f in base_filenames:
        print(f)
        if f.startswith('silence'):
            continue
        wavpath = f + '.wav'
        mfcc = Mfcc(wavpath,
                    min_freq=0,
                    max_freq=8000,
                    num_coeffs=1,
                    win_len=0.025,
                    time_step=0.01,
                    num_filters=20,
                    use_power=True)
        mfcc.norm_amp([(0, 1)])
Пример #2
0
 def get_features(self, path):
     mfcc = Mfcc(path, self.freq_lims, self._num_coeffs, 0.025,
                 0.01, num_filters = 26, use_power = True, deltas = True)
     pitch = Pitch(path, self.time_step, (75,600))
     pitch.process()
     harmonicity = Harmonicity(path, self.time_step, 75)
     harmonicity.process()
     return mfcc, pitch, harmonicity
 def test(self):
     for f in filenames:
         print(f)
         if f.startswith('silence'):
             continue
         wavpath = os.path.join(TEST_DIR,f+'.wav')
         matpath = os.path.join(TEST_DIR,f+'_mfcc.mat')
         if not os.path.exists(matpath):
             continue
         m = loadmat(matpath)
         mfcc = Mfcc(wavpath,self.freq_lims,self.numCC,self.winLen,
                                 self.timeStep,num_filters=self.num_filters,
                                 use_power=True
                                 )
         pspec, aspec = mfcc.process(debug=True)
         #assert_array_almost_equal(m['pspectrum'].T,pspec,decimal=4)
         #assert_array_almost_equal(m['aspectrum'].T,aspec,decimal=4)
         assert_array_almost_equal(m['cepstra'].T,mfcc.to_array())
 def test(self):
     for f in filenames:
         print(f)
         if f.startswith('silence'):
             continue
         wavpath = os.path.join(TEST_DIR, f + '.wav')
         matpath = os.path.join(TEST_DIR, f + '_mfcc.mat')
         if not os.path.exists(matpath):
             continue
         m = loadmat(matpath)
         mfcc = Mfcc(wavpath,
                     self.freq_lims,
                     self.numCC,
                     self.winLen,
                     self.timeStep,
                     num_filters=self.num_filters,
                     use_power=True)
         pspec, aspec = mfcc.process(debug=True)
         #assert_array_almost_equal(m['pspectrum'].T,pspec,decimal=4)
         #assert_array_almost_equal(m['aspectrum'].T,aspec,decimal=4)
         assert_array_almost_equal(m['cepstra'].T, mfcc.to_array())
Пример #5
0
def test(base_filenames):
    for f in base_filenames:
        print(f)
        if f.startswith('silence'):
            continue
        wavpath = f + '.wav'
        matpath = f + '_mfcc.mat'
        if not os.path.exists(matpath):
            continue
        m = loadmat(matpath)
        mfcc = Mfcc(wavpath,
                    min_freq=0,
                    max_freq=8000,
                    num_coeffs=13,
                    win_len=0.025,
                    time_step=0.01,
                    num_filters=20,
                    use_power=True)
        mfcc.process()
        #assert_array_almost_equal(m['pspectrum'].T,pspec,decimal=4)
        #assert_array_almost_equal(m['aspectrum'].T,aspec,decimal=4)
        assert_array_almost_equal(m['cepstra'].T, mfcc.to_array())
Пример #6
0
def test_deltas(base_filenames):
    for f in base_filenames:
        print(f)
        if f.startswith('silence'):
            continue
        wavpath = f + '.wav'
        mfcc = Mfcc(wavpath,
                    min_freq=0,
                    max_freq=8000,
                    num_coeffs=13,
                    win_len=0.025,
                    time_step=0.01,
                    num_filters=20,
                    use_power=False,
                    deltas=True)
Пример #7
0
    max_thresh = 0.05
    segs = []

    for interval in word_tier:
        if interval.mark == '':
            continue
        print(interval.mark, interval.minTime, interval.maxTime)
        outpath = os.path.join(temp_wav_dir, interval.mark + '.wav')
        extract_audio(wav_path,
                      outpath,
                      interval.minTime,
                      interval.maxTime,
                      padding=padding)
        rep = Mfcc(outpath,
                   freq_lims=(80, 7800),
                   num_coeffs=12,
                   win_len=0.025,
                   time_step=0.01)
        rep.is_windowed = True
        duration = interval.maxTime - interval.minTime
        thresh = unnorm(norm(duration, min_duration, max_duration), min_thresh,
                        max_thresh)
        rep.segment(threshold=thresh)
        print(sorted(rep._segments.keys()))
        padded_begin = interval.minTime - padding
        if padded_begin < 0:
            padded_begin = 0
        for k in sorted(rep._segments.keys()):
            with open(os.path.join(temp_mfcc_dir, '{}.mfcc'.format(seg_ind)),
                      'wb') as fh:
                pickle.dump(rep[k[0], k[1]], fh)
Пример #8
0
            continue
        durations.append(interval.maxTime - interval.minTime)
    max_duration = max(durations)
    min_duration = min(durations)

    min_thresh = 0.01
    max_thresh = 0.05
    segs = []

    for interval in word_tier:
        if interval.mark == '':
            continue
        print(interval.mark, interval.minTime, interval.maxTime)
        outpath = os.path.join(temp_wav_dir, interval.mark + '.wav')
        extract_audio(wav_path, outpath, interval.minTime, interval.maxTime, padding = padding)
        rep = Mfcc(outpath, freq_lims = (80, 7800), num_coeffs = 12, win_len = 0.025, time_step = 0.01)
        rep.is_windowed = True
        duration = interval.maxTime - interval.minTime
        thresh = unnorm(norm(duration, min_duration, max_duration), min_thresh, max_thresh)
        rep.segment(threshold = thresh)
        print(sorted(rep._segments.keys()))
        padded_begin = interval.minTime - padding
        if padded_begin < 0:
            padded_begin = 0
        for k in sorted(rep._segments.keys()):
            with open(os.path.join(temp_mfcc_dir, '{}.mfcc'.format(seg_ind)), 'wb') as fh:
                pickle.dump(rep[k[0],k[1]], fh)
            with open(os.path.join(temp_mean_dir, '{}.mean'.format(seg_ind)), 'wb') as fh:
                pickle.dump(rep._segments[k], fh)
            segs.append(str(seg_ind))
            seg_ind += 1
Пример #9
0
    def test(self, debug = False, dialect = 'timit', norm_amp = True, classifier = 'hmm'):
        print('begin testing')
        #assume testing on TIMIT
        if debug:
            with open('debug.txt','w') as f:
                pass
        words = []
        phones = []
        wavs = []
        if dialect == 'timit':
            path = TIMIT_DIR
            test_dir = os.path.join(path, 'TEST')
            segment_set = TIMIT_SEGMENT_SET
            wrdExt = '.wrd'
            phnExt = '.phn'
        elif dialect == 'buckeye':
            test_dir = BUCKEYE_DIR
            segment_set = BUCKEYE_SEGMENT_SET
            wrdExt = '.words'
            phnExt = '.phones'
        for root, subdirs, files in os.walk(test_dir):
            for f in files:
                if f.lower().endswith(wrdExt):
                    words.append(os.path.join(root,f))
                elif f.lower().endswith(phnExt):
                    phones.append(os.path.join(root,f))
                elif f.lower().endswith('.wav'):
                    wavs.append(os.path.join(root,f))
        dialogs = align_dialog_info(words, phones,wavs)
        hits = 0
        total = 0
        cats = sorted(self._priors.keys())
        cats2ind = {x:i for i,x in enumerate(cats)}
        conf_matrix = np.zeros((len(cats),len(cats)))

        for (i,(d,info)) in enumerate(dialogs.items()):
            print(i,len(dialogs))
            mfcc = Mfcc(info['wav'], (80,7800), self._num_coeffs, 0.025,
                        0.01, num_filters = 26, use_power = True)
            if norm_amp:
                mfcc.norm_amp(*self._ranges[0])
            phones = read_phones(info['phones'],sr=mfcc.sampling_rate, dialect=dialect)
            if debug:
                with open('debug.txt','a') as f:
                    f.write('{}\n{}\n\n'.format(info['wav'],' '.join([x[0] for x in phones])))
            for p in phones:
                #if p[0] in segment_set['V']:
                #    phone_class = 'V'
                #else:
                #    phone_class = 'NV'
                for k,v in segment_set.items():
                    if p[0] in v:
                        phone_class = k
                        break
                else:
                    continue
                if debug:
                    with open('debug.txt','a') as f:
                        f.write('{}\n\n'.format(p))
                coeffs = mfcc[p[1],p[2]]
                for frame in coeffs:
                    predicted = self.predict(frame)
                    colInd = cats2ind[predicted]
                    rowInd = cats2ind[phone_class]
                    conf_matrix[rowInd,colInd] += 1
                    if predicted == phone_class:
                        hits += 1
                    total += 1
                    if debug:
                        with open('debug.txt','a') as f:
                            f.write('{}\t{}\t{}\n'.format(phone_class,predicted,' '.join(map(str,frame))))
            print('Current accuracy:',hits/total)
        print('Accuracy:',hits/total)
        print(cats)
        print(conf_matrix)