Пример #1
0
def extract_feature(path, states):
    data = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith('.wav'):
                filename = os.path.join(root, file)
                samples, samplingrate = loadAudio(filename)
                print(filename + '... ', end='')

                # feature extraction (=> inputs for DNN)
                lmfcc = mfcc(samples)
                mspec_ = mspec(samples)

                # forced alignment (=> targets for DNN)
                wordTrans = list(
                    path2info(filename)
                    [2])  # word transcription (contained in the filename)
                phoneTrans = words2phones(
                    wordTrans,
                    prondict)  # word transcription => phone transcription
                targets = forcedAlignment(lmfcc, phoneHMMs, phoneTrans)
                targets = np.array([states.index(t) for t in targets
                                    ])  # save targets as indeces

                data.append({
                    'filename': filename,
                    'lmfcc': lmfcc,
                    'mspec': mspec_,
                    'targets': targets
                })
                print('done')
    return np.array(data)
def q5_feature_correlation():
    mfcc_features_list = []
    mspec_features_list = []
    for sample in data_dict:
        mfcc_feature = mfcc(sample["samples"])
        mfcc_features_list.append(mfcc_feature)
        mspec_feature = mspec(sample["samples"])
        mspec_features_list.append(mspec_feature)

    mfcc_features_list = np.vstack(mfcc_features_list)
    mspec_features_list = np.vstack(mspec_features_list)

    mfcc_cor = np.corrcoef(mfcc_features_list, rowvar=False)
    mspec_cor = np.corrcoef(mspec_features_list, rowvar=False)
    plot_p_color_mesh(mfcc_cor, 'MxM Mfcc correlations')
    plot_p_color_mesh(mspec_cor, 'MxM mspec correlations')
Пример #3
0
def concat_all_features(data, feature="mfcc"):
    assert feature in ["mfcc", "mspec"]
    all_features = None
    for d in data:
        sample = d['samples']
        sampling_rate = d['samplingrate']
        if feature == "mfcc":
            features = mfcc(sample, samplingrate=sampling_rate)
        elif feature == "mspec":
            features = mspec(sample, samplingrate=sampling_rate)

        if all_features is None:
            all_features = features
        else:
            all_features = np.concatenate((all_features, features), axis=0)
    return all_features
def feature_extraction_and_force_alignment(filepath, nstates, phoneHMMs):
   """
   handle one .wav file
   """
   samples, samplingrate = loadAudio(filepath)
   wordTrans = list(path2info(filepath)[2])
   phoneTrans = words2phones(wordTrans, prondict)
   stateTrans = [phone + '_' + str(stateid) for phone in phoneTrans
            for stateid in range(nstates[phone])]
   lmfcc_result = mfcc(samples)
   mspec_result = mspec(samples)
   targets = []

   _, viterbi_path = forcedAlignment(lmfcc_result, phoneHMMs, phoneTrans)
   targets = [stateTrans[idx] for idx in viterbi_path.astype(np.int16)] 
   
   return lmfcc_result, mspec_result, targets
Пример #5
0
    # split a state by the number of states in its HMM.
    # E.g. # of states of ah = 3; ah -> ['ah_0', 'ah_1', 'ah_2']
    stateList = list()
    for ph in phoneHMMs.keys():
        for i in range(nstates[ph]):
            stateList.append('%s_%d' % (ph, i))
    # --------------------------------------------------------------
    data = list()
    for root, dirs, files in walk(folder_to_extract):
        for f in tqdm(files):
            if not f.endswith('.wav'):
                continue
            # do our work
            filename = os.path.join(root, f)
            sample, srate = loadAudio(filename)
            mspec_x = mspec(sample, samplingrate=srate)
            lmfcc_x = mfcc(sample, samplingrate=srate)
            wordTrans = list(path2info(filename)[2])
            phoneTrans = words2phones(wordTrans, prondict)
            targets = forcedAlignment(lmfcc_x, phoneHMMs, phoneTrans)
            # convert the targets from str to int
            idx_targets = [stateList.index(t) for t in targets]
            data.append({
                'filename': filename,
                'lmfcc': lmfcc_x,
                'mspec': mspec_x,
                'targets': idx_targets
            })

    kwargs = {data_type: data}
    np.savez(dump_file_name, **kwargs)
logMel = logMelSpectrum(fft_, sampling_rate, 512)
if (np.allclose(example['mspec'], logMel, atol=1e-08)):
    customPlot(logMel, 'mspec:Mel Filterbank', True)
mfcc_ = cepstrum(logMel, 13)
if (np.allclose(example['mfcc'], mfcc_, atol=1e-08)):
    customPlot(mfcc_, 'mfcc:MFCCs', True)
lmfcc_ = lifter(mfcc_)
if (np.allclose(example['lmfcc'], lmfcc_, atol=1e-08)):
    customPlot(lmfcc_, 'lmfcc:Liftered MFCCs', True)

from lab1_proto import mfcc, mspec
data = np.load('lab1_data.npz', allow_pickle=True)['data']
for i in range(data.shape[0]):
    samples = data[i]['samples']
    s = mfcc(samples)
    t = mspec(samples)
    if (i == 0):
        data_mfcc = s
        data_mspec = t
    else:
        data_mfcc = np.append(data_mfcc, s, axis=0)
        data_mspec = np.append(data_mspec, t, axis=0)

plt.pcolormesh(np.corrcoef(data_mfcc.T))  ## how corrcoef works ?
plt.pcolormesh(np.corrcoef(data_mspec.T))

from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=4).fit(data_mfcc)
labels = gmm.predict(data_mfcc)

from sklearn.manifold import TSNE