Exemplo n.º 1
0
def extract_LLD_from_audio(audio, fs):
    # MFCC
    mfcc = librosa.feature.mfcc(audio,
                                fs,
                                n_fft=N_FFT,
                                hop_length=HOP_LENGTH,
                                center=False).transpose()
    mfcc_hsf = extract_HSF(mfcc)

    # LPC
    lpc = librosa.lpc(audio, 16)

    # Mel-Spectrogram
    spect = librosa.feature.melspectrogram(y=audio,
                                           sr=fs,
                                           n_fft=N_FFT,
                                           hop_length=HOP_LENGTH,
                                           center=False)
    spect = librosa.power_to_db(spect, ref=np.max).transpose()
    spect_hsf = extract_HSF(spect)

    # Other features
    f0 = get_F_0(audio, fs)[0]
    hnr = get_HNR(audio, fs)

    return np.asarray(mfcc), np.asarray(mfcc_hsf), np.asarray(lpc), np.asarray(
        spect), np.asarray(spect_hsf), np.asarray([f0, hnr])
Exemplo n.º 2
0
def sa_featurize(audiofile):
    '''
	from the docs 
	https://brookemosby.github.io/Signal_Analysis/Signal_Analysis.features.html#module-Signal_Analysis.features.signal
	'''

    y, sr = librosa.core.load(audiofile)
    duration = len(y) / sr
    print(duration)

    f0 = get_F_0(y, sr)[0]
    hnr = get_HNR(y, sr)
    jitter = get_Jitter(y, sr)
    jitter_features = list(jitter.values())
    jitter_labels = list(jitter)
    pulses = get_Pulses(y, sr)
    pulses = len(pulses) / duration

    features = [f0, hnr, pulses] + jitter_features
    labels = ['FundamentalFrequency', 'HarmonicstoNoiseRatio', 'PulsesPerSec'
              ] + jitter_labels

    print(dict(zip(labels, features)))

    return features, labels
Exemplo n.º 3
0
def test_get_HNR():
    #Here we test all the exceptions
    with pytest.raises(Exception) as excinfo:
        sig.get_HNR(sig1, r1, min_pitch=0)
    assert excinfo.typename == 'ValueError'
    assert excinfo.value.args[0] == "min_pitch has to be greater than zero."
    with pytest.raises(Exception) as excinfo:
        sig.get_HNR(sig1, r1, silence_threshold=3)
    assert excinfo.typename == 'ValueError'
    assert excinfo.value.args[0] == "silence_threshold isn't in [ 0, 1 ]."

    #Testing values that came from Praat for each signal, using the standard
    #values
    params = [(sig1, r1, 13.102), (sig2, r2, 9.660), (sig3, r3, 17.940),
              (sig4, r4, 16.254), (np.zeros(500), 500, 0)]

    for param in params:
        wave, rate, true_val = param
        est_val = sig.get_HNR(wave, rate)
        assert abs(est_val - true_val) < .3, 'HNR not accurate'
Exemplo n.º 4
0
def pitch_based_features(signal, rate, win_length, hop_length):
    mean_hnr = sig.get_HNR(signal, rate)
    return mean_hnr
Exemplo n.º 5
0
def get_data(inputwave):
    fs, signal = wav.read(inputwave)
    y, sr = lb.load(inputwave)

    mfcc = speechpy.feature.mfcc(signal,
                                 sampling_frequency=fs,
                                 frame_length=0.020,
                                 frame_stride=0.01,
                                 num_filters=40,
                                 fft_length=512,
                                 low_frequency=0,
                                 high_frequency=None,
                                 num_cepstral=12)

    zcr = lb.feature.zero_crossing_rate(y)

    f0 = freq_from_autocorr(signal, fs)

    rms = lb.feature.rmse(y=y)

    HNR = SA.get_HNR(signal,
                     fs,
                     time_step=0,
                     min_pitch=75,
                     silence_threshold=0.1,
                     periods_per_window=4.5)

    ################################################################################################

    print(inputwave)

    mfcc_data.append(mfcc.mean(axis=0))
    mfccstd.append(np.std(mfcc, axis=0))
    mfccskew.append(stats.skew(mfcc, axis=0))
    mfcckurt.append(stats.kurtosis(mfcc, axis=0))
    mfccmax.append(np.max(mfcc, axis=0))
    mfccmin.append(np.min(mfcc, axis=0))

    zcrmean.append(zcr.mean(axis=1))
    zcrstd.append(np.std(zcr, axis=1))
    zcrskew.append(stats.skew(zcr, axis=1))
    zcrkurt.append(stats.kurtosis(zcr, axis=1))
    zcrmax.append(np.max(zcr, axis=1))
    zcrmin.append(np.min(zcr, axis=1))

    #f0mean.append(f0.mean(axis=0))
    #f0std.append(f0.std(axis=0))
    #f0skew.append(stats.skew(f0, axis=0))
    #f0kurt.append(stats.kurtosis(f0, axis=0))
    #f0max.append(np.max(f0, axis=0))
    #f0min.append(np.min(f0, axis=0))

    rmsmean.append(rms.mean(axis=1))
    rmsstd.append(np.std(rms, axis=1))
    rmsskew.append(stats.skew(rms, axis=1))
    rmskurt.append(stats.kurtosis(rms, axis=1))
    rmsmax.append(np.max(rms, axis=1))
    rmsmin.append(np.min(rms, axis=1))

    hnrmean.append(HNR.mean(axis=0))
    hnrstd.append(HNR.std(axis=0))
    hnrskew.append(stats.skew(HNR, axis=0))
    hnrkurt.append(stats.kurtosis(HNR, axis=0))
    hnrmax.append(np.max(HNR, axis=0))
    hnrmin.append(np.min(HNR, axis=0))
    #######################################################################################################

    mfcc_array = np.asarray(mfcc_data)
    mfccstd_array = np.asarray(mfccstd)
    mfccskew_array = np.asarray(mfccskew)
    mfcckurt_array = np.asarray(mfcckurt)
    mfccmax_array = np.asarray(mfccmax)
    mfccmin_array = np.asarray(mfccmin)

    zcrmean_array = np.asarray(zcrmean)
    zcrstd_array = np.asarray(zcrstd)
    zcrskew_array = np.asarray(zcrskew)
    zcrkurt_array = np.asarray(zcrkurt)
    zcrmax_array = np.asarray(zcrmax)
    zcrmin_array = np.asarray(zcrmin)

    rmsmean_array = np.asarray(rmsmean)
    rmsstd_array = np.asarray(rmsstd)
    rmsskew_array = np.asarray(rmsskew)
    rmskurt_array = np.asarray(rmskurt)
    rmsmax_array = np.asarray(rmsmax)
    rmsmin_array = np.asarray(rmsmin)

    hnrmean_array = np.asarray(hnrmean)
    hnrstd_array = np.asarray(hnrstd)
    hnrskew_array = np.asarray(hnrskew)
    hnrkurt_array = np.asarray(hnrkurt)
    hnrmax_array = np.asarray(hnrmax)
    hnrmin_array = np.asarray(hnrmin)

    hnrmean_rearray = np.reshape(hnrmean_array, (-1, 1))
    hnrstd_rearray = np.reshape(hnrstd_array, (-1, 1))
    hnrskew_rearray = np.reshape(hnrskew_array, (-1, 1))
    hnrkurt_rearray = np.reshape(hnrkurt_array, (-1, 1))
    hnrmax_rearray = np.reshape(hnrmax_array, (-1, 1))
    hnrmin_rearray = np.reshape(hnrmin_array, (-1, 1))

    finaldata = np.concatenate(
        (mfcc_array, mfccstd_array, mfccskew_array, mfcckurt_array,
         mfccmax_array, mfccmin_array, zcrmean_array, zcrstd_array,
         zcrskew_array, zcrkurt_array, zcrmax_array, zcrmin_array,
         rmsmean_array, rmsstd_array, rmsskew_array, rmskurt_array,
         rmsmax_array, rmsmin_array, hnrmean_rearray, hnrstd_rearray,
         hnrskew_rearray, hnrkurt_rearray, hnrmax_rearray, hnrmin_rearray),
        axis=1)

    return finaldata
Exemplo n.º 6
0
def get_hnr(y, sr):
    fr = int(sr / 100)
    return np.array([
        get_HNR(y[fr * i:fr * (i + 1)], sr)
        for i in range(int(y.shape[0] / fr))
    ])
Exemplo n.º 7
0
    mfcc_dir_name = '\\mfcc\\' + speaker_directory
    if not os.path.isdir(corename + mfcc_dir_name):
        os.mkdir(corename + mfcc_dir_name)
    specgram_dir_name = '\\specgrams\\' + speaker_directory
    if not os.path.isdir(corename + specgram_dir_name):
        os.mkdir(corename + specgram_dir_name)

    # iterate through all recordings in a given directory
    for count, wavname in enumerate(sorted(os.listdir(corename + recordings_core + '\\' + speaker_directory))):
        print('--- {} --- {} ---'.format(wavname, sentences.iloc[count]['mod'].upper()))
        # load wave
        rate, signal = read(corename + recordings_core + '\\' + speaker_directory + '\\' + wavname)

        # get f0, Harmonic-to-Noise Ratio (HNR) and jitter values
        recF0mean = get_F_0(signal, rate)
        hnr = get_HNR(signal, rate)
        jttr = get_Jitter(signal, rate)['local']

        # librosa:: preprocessing (conversion to float)
        signal = signal / float(2 ** 15)

        # librosa:: generate specgram and save to relevant dir
        D = librosa.stft(signal)
        figure()
        specshow(librosa.amplitude_to_db(librosa.magphase(D)[0], ref=np.max))
        axis('off')
        savefig(corename + specgram_dir_name + '\\' + wavname[-10:-4] + '.png', dpi=200)
        close()

        # librosa:: calculate MFCC's (n_mfcc=20) and save *.npy file to relevant dir
        recMFCC = mfcc(signal, rate, n_mfcc=20, hop_length=winshift,