示例#1
0
def process_audio(audio_data, sr):
    """
    Computes the Mel-Frequency Cepstral Coefficients and their first and second order derivatives. Concatenates then
    all into a single numpy array and the swaps the axis from [n_mfcc, n_samples] to [n_samples, n_mfcc].

    :param audio_data: floating point time series of an audio file
    :param sr: the sample rate at which train_data was loaded
    :return: a feature array of dimension [n_samples, n_mfcc] containing the computed MFCCs and their time
             derivatives
    """
    mel_freq_coeff = mfcc(y=audio_data,
                          sr=sr,
                          n_mfcc=13,
                          hop_length=int(.10 * sr),
                          n_fft=int(.20 * sr))
    mel_freq_coeff = mel_freq_coeff[1:, :]

    mel_freq_coeff_delta = delta(mel_freq_coeff, width=7)
    mel_freq_coeff_delta_delta = delta(mel_freq_coeff, width=7, order=2)

    features = concatenate(
        (mel_freq_coeff, mel_freq_coeff_delta, mel_freq_coeff_delta_delta),
        axis=0)
    features = swapaxes(features, 0, 1)
    return features
 def process_signal(self, signal):
     ft = np.abs(stft(signal, n_fft=self.window_size, hop_length=self.window_stride, window='hann'))
     mel = melspectrogram(sr=self.sample_rate,S=ft)
     mfccs = mfcc( sr=self.sample_rate, n_mfcc=self.num_mfccs,S=mel)
     deltas=  delta(mfccs)
     delta_deltas=  delta(mfccs,order=2)
     return mfccs, deltas, delta_deltas
def extract_mfcc_features(data, max_length_sec = 10 ):
    try:
        filename, lbl = data
        #signal, sr = librosa.load(filename)
        sr, signal = read(filename)
        if len(signal) == 0:
            return filename, None, None
        if len(signal.shape) > 1:
            signal = signal[:,0]
        signal = signal - signal.mean() 
        signal = signal[:max_length_sec*sr]
        signal = np.array(remove_silence( list(signal), 0.01 ))
        if np.sum(signal) == 0.0:
            print "Empty", filename
            return filename, None, None
        mfcc = librosa.feature.mfcc( signal, n_fft = fft_points, hop_length = fft_overlap, n_mfcc = mfcc_coefficients, fmax = 5000 )
        delta_mfcc_1 = delta( mfcc, order = 1 )
        delta_mfcc_2 = delta( mfcc, order = 2 )
        #print "Took", time.time() - start, "length", original_len, "size", os.path.getsize( filename ), "pre process", preprocess_time, "load", loading_time
        total_features = np.vstack( [ mfcc, delta_mfcc_1, delta_mfcc_2 ] )
        total_features = np.transpose( total_features )
        total_features = preprocess_mfcc( total_features )
        #total_features = StandardScaler().fit_transform( total_features )
        return filename, lbl, total_features
    except Exception,e:
        print signal, signal.shape
        print e
        traceback.print_exc(file=sys.stdout)
        print filename
        return filename, None, None
示例#4
0
def create_mels_deltas(waveform, sample_rate):
    one_mel = melspectrogram(waveform.squeeze(0).numpy(),
                             sr=sample_rate,
                             n_fft=2048,
                             hop_length=1024,
                             n_mels=128,
                             fmin=0.0,
                             fmax=sample_rate / 2,
                             htk=True,
                             norm=None)
    one_mel = np.log(one_mel + 1e-8)
    one_mel = (one_mel - np.min(one_mel)) / (np.max(one_mel) - np.min(one_mel))
    one_mel_delta = delta(one_mel)
    one_mel_delta = (one_mel_delta - np.min(one_mel_delta)) / (
        np.max(one_mel_delta) - np.min(one_mel_delta))
    one_mel_delta_delta = delta(one_mel, order=2)
    one_mel_delta_delta = (one_mel_delta_delta - np.min(one_mel_delta_delta)
                           ) / (np.max(one_mel_delta_delta) -
                                np.min(one_mel_delta_delta))
    mel_3d = torch.cat([
        torch.tensor(one_mel).unsqueeze(0),
        torch.tensor(one_mel_delta).unsqueeze(0),
        torch.tensor(one_mel_delta_delta).unsqueeze(0)
    ],
                       dim=0)
    return mel_3d
def extract_gmm_feature( data, max_length_sec = 10 ):
    try:
        filename, lbl = data
        sr,signal = read(filename)
        if len(signal.shape) > 1:
            signal = signal[:,0]

        signal = signal - signal.mean()
        signal = signal[:max_length_sec*sr]
        signal = np.array(remove_silence( signal, 0.005 ))
        if np.sum(signal) == 0.0:
            print "Empty", filename
            return filename, None, None

        mfcc = librosa.feature.mfcc( signal, n_fft = gmm_fft_points, hop_length = gmm_fft_overlap, n_mfcc = gmm_mfcc_coefficients, fmax = 5000 )
        #mfcc = preprocess_mfcc(mfcc)
        delta_mfcc_1 = delta( mfcc, order = 1 )
        delta_mfcc_2 = delta( mfcc, order = 2 )
        total_features = np.vstack( [ mfcc, delta_mfcc_1, delta_mfcc_2 ] )
        total_features = np.transpose( total_features )
        total_features = preprocess_mfcc( total_features )        
        #total_features = StandardScaler().fit_transform( total_features )
        gmm = GMM(n_components=1)
        gmm.fit( total_features )
        res_features = np.hstack( [gmm.means_[0], gmm.covars_[0]] )
        #print gmm.means_.shape
        #result_features = np.vstack( [ gmm. ] )
        return filename, lbl, res_features
    except Exception,e:
        print e
        return filename, None, None
示例#6
0
def get_deltas(melSpecs):
    keys = melSpecs.keys()
    deltas = {}
    deltadeltas = {}

    for key in keys:
        deltas[key] = lf.delta(melSpecs[key], order=1)
        deltadeltas[key] = lf.delta(melSpecs[key], order=2)
    return deltas, deltadeltas
示例#7
0
文件: hac.py 项目: naka-lab/Serket
def build_codebooks_from_list_of_wav(wavs, ks, **mfcc_params):
    mfccs = []
    for w in wavs:
        sr, data = wavfile.read(w)
        cur_mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params))
        mfccs.append(cur_mfccs)
        cdb_mfcc, _ = kmeans2(np.vstack([m.T for m in mfccs]), ks[0])
        cdb_dmfcc, _ = kmeans2(np.vstack([delta(m).T for m in mfccs]), ks[1])
        cdb_ddmfcc, _ = kmeans2(np.vstack([delta(m, order=2).T for m in mfccs]), ks[2])
        return (cdb_mfcc, cdb_dmfcc, cdb_ddmfcc)
示例#8
0
def extract_features(file):
    signal, sr = librosa.load(file, sr=sample_rate, mono=False)
    #signal = numpy.asfortranarray(numpy.concatenate([[signal[0]], [signal[1]]]))
    #signal = librosa.to_mono(signal)
    signal = signal[1]
    filter_banks = f_bank(signal)
    d = delta(filter_banks, order=1)
    d2 = delta(d, order=2)
    S = numpy.concatenate([filter_banks, d, d2], axis=1)

    return S
示例#9
0
	def prep_utterance(self, data):

		if data.shape[2]>self.max_nb_frames:
			ridx = np.random.randint(0, data.shape[2]-self.max_nb_frames)
			data_ = data[:, :, ridx:(ridx+self.max_nb_frames)]
		else:
			mul = int(np.ceil(self.max_nb_frames/data.shape[0]))
			data_ = np.tile(data, (1, 1, mul))
			data_ = data_[:, :, :self.max_nb_frames]

		if self.delta:
			data_ = np.concatenate([data_, delta(data_,width=3,order=1), delta(data_,width=3,order=2)], axis=0)

		return data_
示例#10
0
def generate_deltas(X):
    new_dim = np.zeros(tuple(np.shape(X)))
    X = np.concatenate((X, new_dim), axis=3)
    del new_dim
    for i in range(len(X)):
        X[i, :, :, 1] = delta(X[i, :, :, 0])
    return X
示例#11
0
 def piczak_preprocessing(self, wav, sr, shift=0):
     # resampled_wav = librosa.resample(y=wav,orig_sr=sr, target_sr=22050)
     spectrogram = melspectrogram(y=wav, sr=sr, n_mels=60, n_fft=1024)
     spectrogram = np.roll(spectrogram, shift * 20, axis=0)
     logspec = librosa.logamplitude(spectrogram)
     deltas = delta(logspec)
     return np.stack((logspec, deltas), axis=-1)
示例#12
0
def build_codebooks_from_list_of_wav(wavs, ks, mode='raw', **mfcc_params):
    """Generates three codebooks of low level units from a list of wav files.

    The three codebooks corresponds to a quantization of MFCC vectors
    from the sound files as well as their first and second order time
    derivatives.

    :parameters:
        - ks: triple of int
            Number of elements in each code book.

        - mode: iterative|raw

    :returns:
        triple of codebooks as (k, d) arrays
    """
    mfccs = []
    for w in wavs:
        print("preprocessing {}".format(w))
        sr, data = wavfile.read(w)
        cur_mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params))
        mfccs.append(cur_mfccs)
        #mfccs.append(cur_mfccs.T)
        #d_mfccs.append(delta(cur_mfccs).T)
        #dd_mfccs.append(delta(cur_mfccs, order=2).T)
    print("Building codebooks:")
    print("- MFCC...")
    cdb_mfcc = build_codebook(np.vstack([m.T for m in mfccs]),
                              ks[0],
                              mode=mode)
    print("- Delta MFCC...")
    cdb_dmfcc = build_codebook(np.vstack([delta(m).T for m in mfccs]),
                               ks[1],
                               mode=mode)
    print("- Delta Delta MFCC...")
    cdb_ddmfcc = build_codebook(np.vstack([delta(m, order=2).T
                                           for m in mfccs]),
                                ks[2],
                                mode=mode)
    #return (build_codebook(np.vstack(mfccs), ks[0], mode=mode),
    #        build_codebook(np.vstack(d_mfccs), ks[1], mode=mode),
    #        build_codebook(np.vstack(dd_mfccs), ks[2], mode=mode))
    return (cdb_mfcc, cdb_dmfcc, cdb_ddmfcc)
示例#13
0
def hac(data, sr, codebooks, lags=[5, 2], **mfcc_params):
    """Histogram of acoustic coocurrence (see [VanHamme2008]).

    A vector of counts is returned instead of an actual histogram.

    :parameters:
        - data: time serie
        - sr: sample rate
        - codebooks: triple of codebooks
        - lags: a list of lags to use (the corresponding histograms are
            concatenated).
    """
    mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params))
    d_mfccs = delta(mfccs)
    dd_mfccs = delta(mfccs, order=2)
    streams = [mfccs.T, d_mfccs.T, dd_mfccs.T]
    return np.hstack([compute_coocurrences(stream, codebook, lags)
                      for (stream, codebook) in zip(streams, codebooks)
                      ])
示例#14
0
def hac(data, sr, codebooks, lags=[5, 2], **mfcc_params):
    """Histogram of acoustic coocurrence (see [VanHamme2008]).

    A vector of counts is returned instead of an actual histogram.

    :parameters:
        - data: time serie
        - sr: sample rate
        - codebooks: triple of codebooks
        - lags: a list of lags to use (the corresponding histograms are
            concatenated).
    """
    mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params))
    d_mfccs = delta(mfccs)
    dd_mfccs = delta(mfccs, order=2)
    streams = [mfccs.T, d_mfccs.T, dd_mfccs.T]
    return np.hstack([
        compute_coocurrences(stream, codebook, lags)
        for (stream, codebook) in zip(streams, codebooks)
    ])
示例#15
0
def build_codebooks_from_list_of_wav(wavs, ks, mode='raw', **mfcc_params):
    """Generates three codebooks of low level units from a list of wav files.

    The three codebooks corresponds to a quantization of MFCC vectors
    from the sound files as well as their first and second order time
    derivatives.

    :parameters:
        - ks: triple of int
            Number of elements in each code book.

        - mode: iterative|raw

    :returns:
        triple of codebooks as (k, d) arrays
    """
    mfccs = []
    for w in wavs:
        print("preprocessing {}".format(w))
        sr, data = wavfile.read(w)
        cur_mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params))
        mfccs.append(cur_mfccs)
        #mfccs.append(cur_mfccs.T)
        #d_mfccs.append(delta(cur_mfccs).T)
        #dd_mfccs.append(delta(cur_mfccs, order=2).T)
    print("Building codebooks:")
    print("- MFCC...")
    cdb_mfcc = build_codebook(np.vstack([m.T for m in mfccs]),
                              ks[0], mode=mode)
    print("- Delta MFCC...")
    cdb_dmfcc = build_codebook(np.vstack([delta(m).T for m in mfccs]),
                               ks[1], mode=mode)
    print("- Delta Delta MFCC...")
    cdb_ddmfcc = build_codebook(
        np.vstack([delta(m, order=2).T for m in mfccs]),
        ks[2], mode=mode)
    #return (build_codebook(np.vstack(mfccs), ks[0], mode=mode),
    #        build_codebook(np.vstack(d_mfccs), ks[1], mode=mode),
    #        build_codebook(np.vstack(dd_mfccs), ks[2], mode=mode))
    return (cdb_mfcc, cdb_dmfcc, cdb_ddmfcc)
示例#16
0
    def calculate_mfcc_deltas(self, mfccs):
        # If order is 2, we want to calculate order=1, and order=2
        n_data = mfccs.shape[0]
        width = self.delta_width
        if n_data < self.delta_width:
            if n_data % 2:  # If data is odd, we can set it to n_data
                width = n_data
            else:
                width = n_data - 1  # Otherwise, we need to make it odd

        delta_feats = np.zeros((n_data, self.n_ccs * self.max_order))
        for order in range(self.max_order):
            delta_feats[:, order * self.n_ccs:(order + 1) *
                        self.n_ccs] = delta(mfccs,
                                            order=order + 1,
                                            axis=0,
                                            width=width)
        return delta_feats
示例#17
0
                           hop_length=int(sr_ms * sliding_ms))
            frames = pad_center(frames,
                                size=zero_crossing_rates.shape[1],
                                axis=1)
            fundamentals = fundamental(frames, sr)
            '''
				We normalize with respect to the maximum and minimum found across the corpus.
			'''
            time_series = (time_series - min_max[meta_file][0]) / (
                min_max[meta_file][1] - min_max[meta_file][0])
            mfccs = mfcc(time_series,
                         sr=sr,
                         n_mfcc=12,
                         n_fft=int(frame_ms * sr_ms),
                         hop_length=int(sliding_ms * sr_ms))
            d_mfccs = delta(mfccs, width=3, order=1)

            frames = frame(time_series,
                           frame_length=int(sr_ms * frame_ms),
                           hop_length=int(sr_ms * sliding_ms))
            frames = pad_center(frames, size=mfccs.shape[1], axis=1)
            energies = trapz(frames * frames, dx=frame_ms, axis=0)

            for instant, (f0, zcr, e, frame_mfccs,
                          frame_delta_mfccs) in enumerate(
                              zip(fundamentals, zero_crossing_rates.T,
                                  energies, mfccs.T, d_mfccs.T)):
                cursor.execute(
                    '''WITH fn (label_id) AS (
					SELECT id FROM labels WHERE filepath = %s LIMIT 1)
					INSERT INTO frames (instant, f0, zcr, energy, mfcc1, mfcc2, mfcc3, mfcc4, mfcc5, mfcc6, mfcc7, mfcc8, mfcc9, mfcc10, mfcc11, mfcc12, delta_mfcc1, delta_mfcc2, delta_mfcc3, delta_mfcc4, delta_mfcc5, delta_mfcc6, delta_mfcc7, delta_mfcc8, delta_mfcc9, delta_mfcc10, delta_mfcc11, delta_mfcc12, label_)
示例#18
0
文件: hac.py 项目: naka-lab/Serket
def hac(data, sr, codebooks, lags=[5, 2], **mfcc_params):
    mfccs = mfcc(data, sr=sr, **complete_mfcc_params(mfcc_params))
    d_mfccs = delta(mfccs)
    dd_mfccs = delta(mfccs, order=2)
    streams = [mfccs.T, d_mfccs.T, dd_mfccs.T]
    return np.hstack([compute_coocurrences(stream, codebook, lags) for (stream, codebook) in zip(streams, codebooks)])
    def process_signal(self, signal):
        self.filterbank.forward(signal)

        self.envs.raw_env = self.filterbank.raw_envelopes
        self.envs.inh_env = self.filterbank.inhibited_envelopes
        self.envs.amp_env = self.filterbank.amplitude_modulation_envelopes
        self.envs.amp_mod = self.filterbank.amp_mod

        self.efd.spectral_env = self.filterbank.spectral_envelope
        self.efd.effective_roughness = self.filterbank.effective_roughness
        # self.efd.mod_depth = self.filterbank.mod_depth

        #Calculate marginal statistics
        self.efd.inh_stats = marginal_statistics(self.envs.inh_env)
        self.efd.raw_stats = marginal_statistics(self.envs.raw_env)

        m, v_unitless, s, k, var, std_dev = self.efd.raw_stats

        #Calculate modulation features
        self.efd.modulation_power = modulation_powers(self.envs.amp_env, var)
        self.efd.average_amp_mod = np.mean(self.envs.amp_env, axis=2).reshape((
            self.n_bands, -1))

        temp_env = self.filterbank.temporal_envelope
        inst_roughness = self.filterbank.instantaneous_roughness

        #Make temporal env resolution 60 ms
        diff = len(temp_env) % self.samples_twenty_ms_ds
        if diff !=0 :
            pad =  np.zeros(self.samples_twenty_ms_ds - diff)
            temp_env = np.append(temp_env, pad)
            inst_roughness = np.append(inst_roughness, pad)

        self.efd.temp_env_reduced = np.mean(np.reshape(
            temp_env,(-1,self.samples_twenty_ms_ds)), axis=1)
        self.envs.temp_env = temp_env

        self.efd.inst_roughness = np.mean(np.reshape(
            inst_roughness,(-1,self.samples_twenty_ms_ds)), axis=1)

        # #Also make raw env resolution 60 ms turns out this doesnt improve
        # dct speed by much at all
        # diff2 = len(raw_env[0]) %self.samples_sixtyms_ds
        # if diff2 !=0 :
        #     pad = np.zeros((self.n_bands, self.samples_sixtyms_ds-diff2))
        #     raw_env = np.hstack((raw_env,pad ))
        #     env_inh = np.hstack((env_inh, pad))
        #
        # raw_env_reduced = np.mean(np.reshape(
        #     raw_env,(self.n_bands,-1, self.samples_sixtyms_ds)), axis=2)
        # env_inh_reduced = np.mean(np.reshape(
        #     env_inh, (self.n_bands, -1, self.samples_sixtyms_ds)), axis=2)

        # dct_raw = dct(raw_env_reduced, norm="ortho", axis = 0)
        # dct_inh = dct(env_inh_reduced, norm="ortho", axis = 0)
        #
        # dct_raw = dct(raw_env, norm="ortho", axis = 0)

        #Compute dct on envelopes
        self.dctf.dct_inhibited = dct(self.envs.inh_env, norm="ortho",
                                      axis = 0)
        self.dctf.dct_delta = delta(self.dctf.dct_inhibited)
        self.dctf.dct_delta_delta = delta(self.dctf.dct_delta )

        self.efd.dct = np.mean(self.dctf.dct_inhibited, axis =1)
        self.efd.dct_delta = np.mean(self.dctf.dct_delta, axis =1)
        self.efd.dct_delta_delta = np.mean(self.dctf.dct_delta_delta, axis =1)
示例#20
0
 def delta_mean(a):
     return delta(a).mean()
示例#21
0
        # librosa:: preprocessing (conversion to float)
        signal = signal / float(2 ** 15)

        # librosa:: generate specgram and save to relevant dir
        D = librosa.stft(signal)
        figure()
        specshow(librosa.amplitude_to_db(librosa.magphase(D)[0], ref=np.max))
        axis('off')
        savefig(corename + specgram_dir_name + '\\' + wavname[-10:-4] + '.png', dpi=200)
        close()

        # librosa:: calculate MFCC's (n_mfcc=20) and save *.npy file to relevant dir
        recMFCC = mfcc(signal, rate, n_mfcc=20, hop_length=winshift,
                       win_length=winlen, window=np.hamming(winlen))
        MFCC_feature_vector = np.concatenate((recMFCC,
                                              delta(recMFCC),
                                              delta(recMFCC, order=2)))
        np.save(corename + mfcc_dir_name + '\\' + wavname[-10:-4], MFCC_feature_vector)

        # export data to pandas dataframe
        df = df.append({"id": wavname[:-8],
                        "sex": speaker_sex,
                        "path": recordings_core + '\\' + speaker_directory + '\\' + wavname,
                        "sentence": sentences.iloc[count]['sentence'],
                        "mod": sentences.iloc[count]['mod'][0],
                        "F0_mean": round(recF0mean[0], 5),
                        "HNR": round(hnr, 5),
                        "jitter": round(jttr, 5),
                        "MFCC_fv": mfcc_dir_name + '\\' + wavname[-10:-4] + '.npy',
                        "specgram": specgram_dir_name + '\\' + wavname[-10:-4] + '.png'},
                       ignore_index=True)
示例#22
0
def Features_Audio(Fenetres,
                   TailleFenetre,
                   EcartSousFenetres,
                   fen_anal=100,
                   center=True):
    # TailleFenetre est donné en secondes et correspond à la taille des fenetres de texture
    # Ecartsousfenetre est donné en proportion de de fen_anal (0,5 pour un recouvrement de deux fenêtres, 1/3 pour 3 fenetres, 1 pas de recouvrement)
    # Fen_anal en ms (taille de la fenêtre d'analyse)
    # une ligne par fenêtre
    # une colonne par feature
    # Retour_X une liste des features par fenetre
    Retour_X = []
    win_l = hz * fen_anal / 1000
    hop_l = int(win_l * EcartSousFenetres)
    win_l = int(win_l)
    for DebutFenetre in Fenetres:
        Fenetre = Signal[int(DebutFenetre * hz):int(DebutFenetre * hz +
                                                    TailleFenetre * hz)]
        D = np.abs(
            librosa.stft(Fenetre,
                         window=window,
                         n_fft=win_l,
                         win_length=win_l,
                         hop_length=hop_l,
                         center=center))**2
        # calcul du MEL
        S = feature.melspectrogram(S=D,
                                   y=Fenetre,
                                   n_mels=n_MEL,
                                   fmin=fmin,
                                   fmax=fmax)
        # calcul des 13 coefficients
        mfcc = feature.mfcc(S=librosa.power_to_db(S), n_mfcc=n_mfcc)
        # Calcul de la dérivée
        mfcc_delta = feature.delta(mfcc)
        # Calcul de la dérivée seconde
        mfcc_delta2 = feature.delta(mfcc_delta)
        # Zero crossing rate
        ZCR = feature.zero_crossing_rate(Fenetre,
                                         frame_length=win_l,
                                         hop_length=hop_l,
                                         center=center,
                                         threshold=1e-10)
        # spectral contrast
        SCo = feature.spectral_contrast(S=D,
                                        sr=hz,
                                        n_fft=win_l,
                                        hop_length=512,
                                        fmin=fmin,
                                        quantile=0.02)
        # Intégration temporelle
        mfcc = np.mean(mfcc, axis=1)
        mfcc_delta = np.mean(mfcc_delta, axis=1)
        mfcc_delta2 = np.mean(mfcc_delta2, axis=1)
        ZCR = np.mean(ZCR)
        SCo = np.mean(SCo)
        # Concatenation des features
        f = np.hstack((mfcc, mfcc_delta, mfcc_delta2, ZCR, SCo))
        # on transpose (feature en colonne) et rajoute les lignes correspondant aux nouvelles fenêtres
        Retour_X.append(f.tolist())
    return np.array(Retour_X)
示例#23
0
 def delta_delta_mean(a):
     return delta(a, order=2).mean()
示例#24
0
 def mfcc_features(self,signal):
     signal = signal*1.0
     S = mfcc(y=signal,sr = self.rate,n_mfcc=self.mfcc_fts,hop_length=64)
     D = delta(data=S,order=1)
     DD = delta(data=S,order=2)
     return [S,D,DD]
示例#25
0
else:
    print("\n==========\nProcessing dataset from ({0}) directory...\n==========".format(train_dir))
    for i in range(n_classes):
        print("\n==========\nProcessing files for class: ({0})\n==========".format(classes[i]))
        filepath = train_dir+classes[i]+'/'
        train_files = os.listdir(filepath)
        for fname in train_files:
            train_path = filepath+fname
            signal, sample_rate = load(train_path,sr=None)

            signal_for_silence = AudioSegment.from_file(train_path,format='wav')
            silence_indices = detect_silence(signal_for_silence,min_silence_len=min_silence_len,silence_thresh=silence_thresh)
            signal = np.delete(signal, silence_indices)

            mfcc_feats = mfcc(signal=signal,numcep=num_cep,samplerate=sample_rate,winstep=win_step,winfunc=np.hamming,nfft=nfft)
            delta_feats = delta(data=mfcc_feats,order=1)
            delta2_feats = delta(data=mfcc_feats,order=2)

            if mfcc_feats.shape[0] < stack_length:
                print("\n==========\nDEBUG: Excluded file {0} because feature length is too short after silence truncation (length was {1}).\n==========".format(train_path,mfcc_feats.shape[0]))

            else:
                corpus_breakdown[i] += 1
                features = np.zeros((mfcc_feats.shape[0],num_feats,1))
                features[:,0:num_cep,0] = mfcc_feats
                features[:,num_cep:2*num_cep,0] = delta_feats
                features[:,2*num_cep:3*num_cep,0] = delta2_feats

                labels = np.zeros((mfcc_feats.shape[0],n_classes))
                labels[:,i] = 1
示例#26
0
import numpy as np
from librosa import load
from librosa.feature import mfcc, delta
from scipy.signal import hanning
# import matplotlib.pyplot as plt

filename = 'D:\\phd\\DATA\\recordings\\01_ZL\\01_ZL_001.wav'
y, sr = load(filename, sr=None)
winlen = int(0.02 * sr)
winshift = int(0.01 * sr)
mfccs = mfcc(y,
             sr,
             n_mfcc=20,
             hop_length=winshift,
             win_length=winlen,
             window=hanning(winlen))
feature_matrix = np.concatenate((mfccs, delta(mfccs), delta(mfccs, order=2)))

# TODO: RASTA-PLP
# print('size = {}'.format(mfccs.shape))

# plt.matshow(mfccs, aspect='auto')
# plt.show()
示例#27
0
def get_melspectrogram_delta_deltadelta(y, **kwargs):
    """Compute Mel-spectrogram, delta features and delta-delta features."""
    melspec = get_melspectrogram(y, **kwargs)
    delta = rosaf.delta(melspec)
    delta_delta = rosaf.delta(melspec, order=2)
    return np.stack([melspec, delta, delta_delta])
def get_feature_from_librosa(wave_name, window):
    #print wave_name
    (rate, sig) = wav.read(wave_name)

    chroma_stft_feat = feature.chroma_stft(sig,
                                           rate,
                                           n_fft=window,
                                           hop_length=window / 2)
    #print chroma_stft_feat.shape
    mfcc_feat = feature.mfcc(y=sig, sr=rate, n_mfcc=13, hop_length=window / 2)
    mfcc_feat = mfcc_feat[1:, :]
    #print mfcc_feat.shape
    d_mfcc_feat = feature.delta(mfcc_feat)
    #print d_mfcc_feat.shape
    d_d_mfcc_feat = feature.delta(d_mfcc_feat)
    #print d_d_mfcc_feat.shape
    zero_crossing_rate_feat = feature.zero_crossing_rate(sig,
                                                         frame_length=window,
                                                         hop_length=window / 2)
    #print zero_crossing_rate_feat.shape

    S = librosa.magphase(
        librosa.stft(sig,
                     hop_length=window / 2,
                     win_length=window,
                     window='hann'))[0]
    rmse_feat = feature.rmse(S=S)
    #print rmse_feat.shape

    centroid_feat = feature.spectral_centroid(sig,
                                              rate,
                                              n_fft=window,
                                              hop_length=window / 2)
    #print centroid_feat.shape

    bandwith_feat = feature.spectral_bandwidth(sig,
                                               rate,
                                               n_fft=window,
                                               hop_length=window / 2)
    #print bandwith_feat.shape

    contrast_feat = feature.spectral_contrast(sig,
                                              rate,
                                              n_fft=window,
                                              hop_length=window / 2)
    #print contrast_feat.shape
    rolloff_feat = feature.spectral_rolloff(sig,
                                            rate,
                                            n_fft=window,
                                            hop_length=window / 2)  #计算滚降频率
    #print rolloff_feat.shape

    poly_feat = feature.poly_features(sig,
                                      rate,
                                      n_fft=window,
                                      hop_length=window /
                                      2)  #拟合一个n阶多项式到谱图列的系数。
    #print poly_feat.shape
    #==============================================================================
    #     print(chroma_stft_feat.shape)
    #     #print(corr_feat.shape)
    #     print(mfcc_feat.shape)
    #     print(d_mfcc_feat.shape)
    #     print(d_d_mfcc_feat.shape)
    #     print(zero_crossing_rate_feat.shape)
    #     print(rmse_feat.shape)
    #     print(centroid_feat.shape)
    #     print(bandwith_feat.shape)
    #     print(contrast_feat.shape)
    #     print(rolloff_feat.shape)
    #     print(poly_feat.shape)
    #==============================================================================
    feat = numpy.hstack(
        (chroma_stft_feat.T, mfcc_feat.T, d_mfcc_feat.T, d_d_mfcc_feat.T,
         zero_crossing_rate_feat.T, rmse_feat.T, centroid_feat.T,
         bandwith_feat.T, contrast_feat.T, rolloff_feat.T, poly_feat.T))
    feat = feat.T
    return feat  #一行代表一帧的特征
示例#29
0
def mfccs_deltas(mfcc: np.ndarray, N: int, order: int):
    return delta(mfcc, width=N, order=order)