Exemplo n.º 1
0
def train_classifier(dataset,n_classes,Fs=16000):
    datalist = os.listdir(dataset)
    n_data_set = len(datalist)

    for i in range(n_data_set) :
        filepath = dataset + '/' + datalist[i]
        if datalist[i].find('.wav')==1 :
            try:
                [x, Fs, n_channels, n_samples] = read_wave(filepath)
            except:
                print(e.msg)

        elif datalist[i].find('.raw') :
            try:
                x = read_raw(filepath)
                if len(x.shape)>1:
                    n_channels = x.shape[1]
                    n_samples = x.shape[0]
                else:
                    n_channels = 1
                    n_samples = len(x)
            except:
                print(e.msg)
        features = get_mfcc(x,Fs)
        dimension = features.shape[1]
        size = features.shape[0]

        emHMM_algorithm(features,dimension,2,size)
Exemplo n.º 2
0
def train_classifier(dataset, n_classes, Fs=16000):
    datalist = os.listdir(dataset)
    n_data_set = len(datalist)

    for i in range(n_data_set):
        filepath = dataset + '/' + datalist[i]
        if datalist[i].find('.wav') == 1:
            try:
                [x, Fs, n_channels, n_samples] = read_wave(filepath)
            except:
                print(e.msg)

        elif datalist[i].find('.raw'):
            try:
                x = read_raw(filepath)
                if len(x.shape) > 1:
                    n_channels = x.shape[1]
                    n_samples = x.shape[0]
                else:
                    n_channels = 1
                    n_samples = len(x)
            except:
                print(e.msg)
        features = get_mfcc(x, Fs)
        dimension = features.shape[1]
        size = features.shape[0]

        emHMM_algorithm(features, dimension, 2, size)
Exemplo n.º 3
0
    def predict_from_file(self, file_name, norm=False):
        '''
        Take a fileName, try to open it and returns the prediction of the wrapped model
        :param file_name: Name of the file to recognize
        :return: a json-formatted string of the prediction
        '''

        pred_data = (mf.get_mfcc(self.dirpath + file_name,
                                 normalizemean=norm)[:self.trunk, 1:])
        prediction = self.model.predict(pred_data)
        prediction = self.users_map[int(prediction)]

        answ = {"user_prediction": prediction}

        return json.dumps(answ)
Exemplo n.º 4
0
    def predict_from_audio(self, audio, recognizer):
        audio, google_pred = sprec.recognize(audio,
                                             recognizer,
                                             return_audio=True)
        sprec.save_audio(audio, self.dirpath + "sample_to_recognize.wav")
        pred_data = mf.get_mfcc(self.dirpath +
                                "sample_to_recognize.wav")[:self.trunk, 1:]

        prediction = self.model.predict(pred_data)
        if isinstance(self.model, Cl.NNClassifier):
            prediction = self.users_map[prediction]
        else:
            prediction = self.users_map[prediction[0]]
        answ = {
            "user_prediction": prediction,
            "google_sentence_prediction": google_pred
        }

        return prediction, google_pred
def extract_features(path):
    df = pd.DataFrame()
    print('Extracting features')

    freq_col = ['pitch']
    mfcc_col = ['mfcc' + str(i + 1) for i in list(range(110))]
    col = freq_col + mfcc_col

    directory = os.listdir(path + "recorded_audio\\")
    print(directory)
    for wav_file in directory:
        write_features = []
        y, sr = librosa.load(path + "recorded_audio\\" + wav_file)
        fs, x = wav.read(path + "recorded_audio\\" + wav_file)

        pitch = get_pitch(fs, x)
        mfcc_features = get_mfcc(y, sr)

        write_features = [pitch] + mfcc_features.tolist()[0]
        df = df.append([write_features])
    df.columns = col
    df.to_csv('recorded_audio_features.csv')
Exemplo n.º 6
0
def main(path, gender):
    df = pd.DataFrame()
    print('Extracting features for ' + gender)

    directory = os.listdir(path)
    for wav_file in directory:
        write_features = []
        y, sr = librosa.load(path + wav_file)
        fs, x = wav.read(path + wav_file)
        print(wav_file)

        pitch = get_pitch(fs, x)
        #frequencies=get_frequencies(y,sr)
        #freq_features=get_features(frequencies)
        mfcc_features = get_mfcc(y, sr)

        #write_features=[pitch]+freq_features+mfcc_features.tolist()[0]+[gender]
        write_features = [pitch] + mfcc_features.tolist()[0] + [gender]
        df = df.append([write_features])
        #if wav_file=='00001.wav':
        #break #remove break to execute for all files
    df.columns = col
    df.to_csv(gender + '_features.csv')
try:
    cur_line = lines[j].split()
    start = int(cur_line[0])
    stop = int(cur_line[1])
    label = cur_line[2]
    length = stop / 10.0**7 - start / 10.0**7
    audio = f.read_frames(freq * (length))
    total_length += length
    if label in label_dic:
        mono_signal = audio  # audio[:,0]
        energy = np.sum(mono_signal**2, 0) / len(mono_signal)
        signal = mono_signal  # mono_signal/math.sqrt(energy)
        samplerate = f.samplerate
        mfcc = get_mfcc(signal,
                        samplerate,
                        winstep=window_step,
                        nfft=2048,
                        highfreq=8000,
                        lowfreq=10)
        N_iter = np.floor(len(mfcc) / N)
        # apply context window
        if (length / window_step) > N:
            mfcc_matrix = np.zeros((1, 13 * N))
            for k in range(int(N_iter)):
                mfcc_vec = []
                for kk in range(N):
                    mfcc_vec = np.concatenate(
                        (mfcc_vec, mfcc[k * N + kk, :]))
                mfcc_matrix = np.concatenate(
                    (mfcc_matrix, mfcc_vec[np.newaxis, :]))
            # get the numeric label corresponding to the literal label
            num_label = label_dic[label] * np.ones(
Exemplo n.º 8
0
    def calibrate(self,
                  user_name,
                  nb_calibrations="5",
                  existing_samples=False,
                  nb_tests="0",
                  noise_red=False,
                  norm=False,
                  downsample=0):
        '''
        Calibrate for a new user of the speaker recognition system
        :param user_name: The name of the user
        :param nb_calibrations: Number of calibration samples
        :param dirpath: path where to store the user data
        :param trunk: where to cut in the mel freq time domain
        :return: None
        '''

        # update the number of users we have
        self.nb_users += 1
        self.users_map.update({self.nb_users - 1: user_name})

        nb_calibrations = int(nb_calibrations)
        nb_tests = int(nb_tests)
        trunk = self.trunk

        if existing_samples == "0":
            print("Recording the calibration samples")
            for i in range(nb_calibrations):
                sprec.get_one_sample(self.dirpath + user_name.lower() +
                                     "{:0>4}.wav".format(i + 1))

        if not self.bootstrap:
            # Drop after trunk in the time domain as well as the first mel coef
            user_calibration_data = [(mf.get_mfcc(
                self.dirpath + user_name.lower() + "{:0>4}.wav".format(i + 1),
                delta=self.delta,
                noisereduction=noise_red,
                normalizemean=norm,
                downsample=downsample)[:trunk, 1:])
                                     for i in range(nb_calibrations)]
            self.train_data = np.concatenate(
                (self.train_data, np.asarray(user_calibration_data)), axis=0)
            self.train_labels = np.concatenate((self.train_labels,
                                                np.tile([self.nb_users - 1],
                                                        (nb_calibrations, 1))))

        else:
            print("Bootstrapping")
            '''for i in range(nb_calibrations):
                mfcc = (mf.get_mfcc(self.dirpath + user_name.lower() + "{:0>4}.wav".format(i + 1))
                                      [:, 1:])
                n_timedomain = mfcc.shape[0]
                nb_bootstrap = n_timedomain - self.trunk
                assert nb_bootstrap > 0
                #TODO: TRY A NEW WAY OF BOOTSTRAPPING
                cal_data = [mfcc[j: j+self.trunk] for j in range(nb_bootstrap)]
                self.train_data = np.concatenate((self.train_data, np.asarray(cal_data)), axis=0)
                self.train_labels = np.concatenate(
                    (self.train_labels, np.tile([self.nb_users - 1], (nb_bootstrap, 1))))'''

            for i in range(nb_calibrations):
                mfcc = (mf.get_mfcc(self.dirpath + user_name.lower() +
                                    "{:0>4}.wav".format(i + 1),
                                    delta=self.delta,
                                    normalizemean=norm,
                                    downsample=downsample)[:, 1:])
                n_timedomain = mfcc.shape[0]
                print(mfcc.shape)
                nb_bootstrap = n_timedomain - self.trunk
                step = 25
                assert nb_bootstrap > 0
                cal_data = [
                    mfcc[j:j + self.trunk]
                    for j in np.arange(0, nb_bootstrap, step)
                ]
                self.train_data = np.concatenate(
                    (self.train_data, np.asarray(cal_data)), axis=0)
                self.train_labels = np.concatenate(
                    (self.train_labels,
                     np.tile([self.nb_users - 1],
                             (np.arange(0, nb_bootstrap, step).shape[0], 1))))

                print(self.train_labels)

        if (nb_tests > 0):
            # Drop after trunk in the time domain as well as the first mel coef
            user_testing_data = [
                (mf.get_mfcc(self.dirpath + user_name.lower() +
                             "{:0>4}.wav".format(i + 1),
                             delta=self.delta,
                             noisereduction=noise_red,
                             normalizemean=norm)[:trunk, 1:])
                for i in range(nb_calibrations, nb_tests + nb_calibrations)
            ]

            self.test_data = np.concatenate(
                (self.test_data, np.asarray(user_testing_data)), axis=0)
            self.test_labels = np.concatenate(
                (self.test_labels, np.tile([self.nb_users - 1],
                                           (nb_tests, 1))))
Exemplo n.º 9
0
def process_file(args):
    fname = args[0]
    frame_size = args[1]
    frame_step = args[2]
    fft_n = args[3]
    mel_filterbank = args[4]
    mfcc_num = args[5]
    counter_queue = args[6]
    transcription_path = args[7]

    # Handle to types of files: wave and sph
    # f_raw is numpy array
    if fname.endswith(".wav"):
        sample_rate, f_raw = wavfile.read(fname)

    elif fname.endswith(".sph"):
        sph_obj = sph.read(fname)
        sample_rate = sph_obj.framerate
        f_raw = sph_obj.data

    else:
        raise ValueError("Wrong file format: " + str(fname))

    # split into overlapping frames
    frames = split_into_frames(f_raw, frame_size, frame_step, transcription_path, sample_rate)

    frames_buffer_size = 5
    frames_buffer = []
    # middle of the buffer
    processing_frame_index = 2

    features = []

    for frame in frames:
        if len(frames_buffer) < frames_buffer_size:
            frames_buffer.append(get_mfcc(frame, fft_n, mel_filterbank, mfcc_num))
        else:
            processing_frame_mfcc = frames_buffer[processing_frame_index]

            prev_frame_mfcc = frames_buffer[processing_frame_index-1]
            prev_frame_first_deltas = get_deltas(processing_frame_mfcc, frames_buffer[processing_frame_index-2])

            next_frame_mfcc = frames_buffer[processing_frame_index+1]
            next_frame_first_deltas = get_deltas(frames_buffer[processing_frame_index+2], processing_frame_mfcc)

            processing_frame_first_deltas = get_deltas(next_frame_mfcc, prev_frame_mfcc)
            processing_frame_second_deltas = get_deltas(next_frame_first_deltas, prev_frame_first_deltas)

            features.append((
                processing_frame_mfcc,
                processing_frame_first_deltas,
                processing_frame_second_deltas
            ))

            # Update circular buffer
            frames_buffer.pop(0)
            frames_buffer.append(get_mfcc(frame, fft_n, mel_filterbank, mfcc_num))

    processed_files = counter_queue.get() + 1
    if processed_files % 5 == 0:
        print("Processed " + str(processed_files) + ' files')
    counter_queue.put(processed_files)

    return features
Exemplo n.º 10
0
import sys
import mfcc
import glob
import os

audio_dir = sys.argv[1]
feature_dir = sys.argv[2]

audio_files = glob.glob('{}/*.mp3'.format(audio_dir))
print('found {} samples in {}'.format(len(audio_files), audio_dir))

for file in audio_files:
    sample_name = file.split('\\')[-1].split('.')[0]
    if os.path.isfile('{}/{}.csv'.format(feature_dir, sample_name)):
        print('feature exist for {}'.format(sample_name))
        continue
    mfcc_feature = mfcc.get_mfcc(file)
    
    fea_file_name = '{}/{}.csv'.format(feature_dir, sample_name)
    print('writing feature to [ {} ]'.format(fea_file_name))
    with open(fea_file_name, 'w') as f:
        for row in mfcc_feature:
            for col in row:
                f.write('{},'.format(col))
            f.write('\n')

Exemplo n.º 11
0
def main(args):
    audio_path = Path(args.audio_path)
    wave_data, sr = librosa.load(audio_path)

    df1 = pd.read_csv('../data1.csv')
    df2 = pd.read_csv('../data2.csv')
    df3 = pd.read_csv('../data3.csv')

    result_path = Path(args.save_path)
    timestamp = datetime.now().strftime(TIME_TEMPLATE)
    result_path = result_path / timestamp
    if not result_path.exists():
        try:
            result_path.mkdir(parents=True)
        except Exception as err:
            print(err)
    '''clustering data1'''
    clf_1 = kmeans.KMeans(n_clusters=4,
                          init='random',
                          max_ite=300,
                          random_state=44)
    pred_1 = clf_1.fit_predict(df1.values)
    df1['pred'] = pred_1
    # df1.plot(kind="scatter", x='x', y='y', c="pred", cmap='rainbow')
    plt.scatter(df1['x'], df1['y'], c=df1['pred'])
    plt.savefig(result_path / "data1.png")
    plt.clf()
    plt.close()
    '''clustering data2'''
    clf_2 = kmeans.KMeans(n_clusters=2,
                          init='random',
                          max_ite=300,
                          random_state=44)
    pred_2 = clf_2.fit_predict(df2.values)
    df2['pred'] = pred_2
    # df2.plot(kind="scatter", x='x', y='y', c="pred", cmap='rainbow')
    plt.scatter(df2['x'], df2['y'], c=df2['pred'])
    plt.savefig(result_path / "data2.png")
    plt.clf()
    plt.close()
    '''clustering data3'''
    clf_3 = kmeans.KMeans(n_clusters=4,
                          init='random',
                          max_ite=300,
                          random_state=44)
    pred_3 = clf_3.fit_predict(df3.values)
    df3['pred'] = pred_3

    fig = plt.figure(figsize=(10, 10))
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(df3['x'].values,
               df3['y'].values,
               df3['z'].values,
               c=df3['pred'])
    plt.legend()
    plt.savefig(result_path / 'data3.png')
    plt.clf()
    plt.close()
    '''mfcc'''
    win_size = 2048
    overlap = 0.5
    bank_size = 20

    _mfcc = mfcc.get_mfcc(wave_data, sr, win_size, overlap, bank_size)

    plt.imshow(_mfcc, cmap='rainbow', aspect='auto', origin='lower')
    plt.colorbar()
    plt.savefig(result_path / 'mfcc.png')
    plt.clf()
    plt.close()

    dmfcc = mfcc.calc_delta(_mfcc)
    plt.imshow(dmfcc, cmap='rainbow', aspect='auto', origin='lower')
    plt.colorbar()
    plt.savefig(result_path / 'dmfcc.png')
    plt.clf()
    plt.close()

    ddmfcc = mfcc.calc_delta(dmfcc)
    plt.imshow(ddmfcc, cmap='rainbow', aspect='auto', origin='lower')
    plt.colorbar()
    plt.savefig(result_path / 'ddmfcc.png')
    plt.clf()
    plt.close()
Exemplo n.º 12
0
 save_data += L*10 - data_spacing
 if save_data > 1:
     zero_padding = np.floor(save_data)
     save_data =  save_data % 1
 shouting_data = np.concatenate((shouting_data, np.zeros(zero_padding, dtype=int)))
 noise_data = np.concatenate((noise_data, np.zeros(zero_padding, dtype=int)))
 label_data = np.concatenate((label_data, np.zeros(zero_padding, dtype=int)))
 error_vector = np.concatenate((error_vector, np.zeros(zero_padding, dtype=int)))
 total_data += L*10
 test_length = len(label_data)
 if label in label_dic:
     mono_signal = audio  # audio[:,0]
     energy = np.sum(mono_signal**2, 0) / len(mono_signal)
     signal = mono_signal  # mono_signal/math.sqrt(energy)
     samplerate = f1.samplerate
     mfcc = get_mfcc(signal, samplerate, winstep=window_step, nfft=2048, highfreq=8000, lowfreq=10)
     if (L / window_step) < N:
         shouting_data = np.concatenate((shouting_data, np.zeros(data_spacing, dtype=int)))
         noise_data = np.concatenate((noise_data, np.zeros(data_spacing, dtype=int)))
         label_data = np.concatenate((label_data, np.zeros(data_spacing, dtype=int)))
         error_vector = np.concatenate((error_vector, np.zeros(data_spacing, dtype=int)))
     else:
         zeros_to_add = 0
         if label == "Noise":
             error_vector = np.concatenate((error_vector, (test_labels[index:index + data_spacing] -
                                                           predictionfinal[index:index + data_spacing])))
             shouting_data = np.concatenate((shouting_data, np.zeros(data_spacing, dtype=int)))
             noise_data = np.concatenate((noise_data, np.ones(data_spacing, dtype=int)))
             label_data = np.concatenate((label_data, np.ones(data_spacing, dtype=int)))
             index += data_spacing - 1
         elif label == "Shouting":