def inference_from_microphone():

    # Setup model
    model = setup_classifier(load_weights_from=PATH_TO_WEIGHTS)

    setup_classes_labels(load_classes_from=PATH_TO_CLASSES, model=model)

    # Start keyboard listener
    recording_state = Value('i', 0)
    board = KeyboardMonitor(recording_state, PRINT=False)
    board.start_listen(run_in_new_thread=True)

    # Set up audio recorder
    recorder = AudioRecorder()

    # Others
    tprinter = TimerPrinter()  # for print

    # Start loop
    cnt_voice = 0
    while True:
        tprinter.print("Usage: keep pressing down 'R' to record audio",
                       T_gap=20)

        board.update_key_state()
        if board.has_just_pressed():
            cnt_voice += 1
            print("Record {}th voice".format(cnt_voice))

            # start recording
            recorder.start_record(folder=SAVE_AUDIO_TO)

            # wait until key release
            while not board.has_just_released():
                board.update_key_state()
                time.sleep(0.001)

            # stop recording
            recorder.stop_record()

            # Do inference
            audio = lib_datasets.AudioClass(filename=recorder.filename)
            predicted_label = model.predict_audio_label(audio)

            print("\nAll word labels: {}".format(model.classes))
            print("\nPredicted label: {}\n".format(predicted_label))

            # Shout out the results. e.g.: one is two
            lib_datasets.shout_out_result(recorder.filename,
                                          predicted_label,
                                          preposition_word="is",
                                          cache_folder="data/examples/")

            # reset for better printing
            print("\n")
            tprinter.reset()

        time.sleep(0.1)
Пример #2
0
    def record_audio_and_classifiy(self,
            is_shout_out_result=False):
        model, classes = self._model, self._CLASSES
        gui, recorder = self._gui, self._recorder 

        # -- Record audio
        gui.enable_img1_self_updating()
        recorder.start_record(folder=self._DST_AUDIO_FOLDER)  # Start record
        while not gui.is_key_released():  # Wait for key released
            time.sleep(0.001)
        recorder.stop_record()  # Stop record

        # -- Do inference
        audio = lib_datasets.AudioClass(filename=recorder.filename)
        probs = model.predict_audio_label_probabilities(audio)
        predicted_idx = np.argmax(probs)
        predicted_label = classes[predicted_idx]
        max_prob = probs[predicted_idx]
        print("\nAll word labels: {}".format(classes))
        print("\nPredicted label: {}, probability: {}\n".format(
            predicted_label, max_prob))
        PROB_THRESHOLD = 0.8
        final_label  = predicted_label if max_prob > PROB_THRESHOLD else "none"
        
        # -- Update the image

        # Update image1: first stop self updating, 
        # then set recording_length and voice_intensity to zero
        gui.reset_img1() 

        # Update image 2: the prediction results
        gui.set_img2(
            final_label=final_label,
            predicted_label=predicted_label, 
            probability=max_prob, 
            length=audio.get_len_s(),
            valid_length=audio.get_len_s(), # TODO: remove the silent voice,
        )
        
        # Update image 3: the probability of each class
        gui.set_img3(probabilities=probs)
        
        # -- Shout out the results. e.g.: two is one
        if is_shout_out_result:
            lib_datasets.shout_out_result(
                recorder.filename, # Raw audio to shout out
                final_label,
                middle_word="is",
                cache_folder="data/examples/")

        return predicted_label, max_prob
def main(args):

    model = setup_classifier(load_weights_from=args.path_to_weights)

    setup_classes_labels(load_classes_from=args.path_to_classes, model=model)

    filenames = get_wav_filenames(path_to_data=args.path_to_data)

    print("\nStart predicting audio label:\n")

    for i, name in enumerate(filenames):
        audio = lib_datasets.AudioClass(filename=name)
        label = model.predict_audio_label(audio)

        print("{:03d}th file: Label = {:<10}, Filename = {}".format(
            i, label, name))
Пример #4
0
def inference_from_microphone():
    
    # Setup model
    model, classes = lib_rnn.setup_default_RNN_model(
        weight_filepath=SRC_WEIGHT_PATH, classes_txt=SRC_CLASSES_PATH)
    print(f"{len(classes)} classes: {classes}")
    
    # Start keyboard listener
    keyboard = KeyboardInputFromTerminal(
        hotkey="R", is_print=False, run_in_new_thread=True)

    # Set up audio recorder
    recorder = AudioRecorder()

    # Others
    timer_printer = TimerPrinter(print_period=2.0)  # for print

    # Start loop
    cnt_voice = 0
    while True:
        timer_printer.print("Usage: keep pressing down 'R' to record audio")
        if keyboard.is_key_pressed():
            cnt_voice += 1
            print("\nRecord {}th voice".format(cnt_voice))
            
            # Record audio
            recorder.start_record(folder=DST_AUDIO_FOLDER)  # Start record
            while not keyboard.is_key_released():  # Wait for key released
                time.sleep(0.001)
            recorder.stop_record()  # Stop record

            # Do inference
            audio = lib_datasets.AudioClass(filename=recorder.filename)
            predicted_label = model.predict_audio_label(audio)
            print("\nAll word labels: {}".format(model.classes))
            print("\nPredicted label: {}\n".format(predicted_label))

            # Shout out the results. e.g.: one is two
            lib_datasets.shout_out_result(recorder.filename, predicted_label,
                    middle_word="is",
                    cache_folder="data/examples/")
                
        time.sleep(0.1)
Пример #5
0
def main(args):

    # -- Init model
    model, classes = lib_rnn.setup_default_RNN_model(
        args.weight_path, args.classes_path)

    # -- If `data_folder` is a filename, return [data_folder].
    #    If `data_folder` is a folder, return all .wav filenames in this folder.
    filenames = lib_datasets.get_wav_filenames(
        data_folder=args.data_folder, suffix=".wav")

    # -- Classification
    print("\nStart predicting audio label:\n")

    for i, name in enumerate(filenames):
        audio = lib_datasets.AudioClass(filename=name)
        label = model.predict_audio_label(audio)

        print("{:03d}th file: Label = {:<10}, Filename = {}".format(
            i, label, name))
def reset_audio_sample_rate(filename, dst_sample_rate=16000):
    data, sample_rate = sf.read(filename)
    if (dst_sample_rate is not None) and (dst_sample_rate != sample_rate):
        data = librosa.core.resample(data, sample_rate, dst_sample_rate)
        sample_rate = dst_sample_rate
    sf.write(filename, data, sample_rate)
    print(f"Reset sample rate to {dst_sample_rate} for the file: {filename}")


folder = "./data/noises/"
fnames = get_filenames(folder, file_type="*.wav")

Aug = lib_augment.Augmenter
aug = Aug([
    Aug.Crop(time=(0.6, 2.0)),
    Aug.PadZeros(time=(0, 0.3)),
    Aug.PlaySpeed(rate=(0.7, 1.5), keep_size=False),
    Aug.Amplify(rate=(0.6, 1.5)),
])

for name in fnames:
    audio0 = lib_datasets.AudioClass(filename=name)
    for i in range(5):
        print(i)
        audio = copy.deepcopy(audio0)
        aug(audio)
        name_new = lib_commons.add_idx_suffix(name, i).split('/')[-1]
        audio.write_to_file("data/data_tmp/" + name_new)
        audio.play_audio()
'''
Test the function `lib_proc_audio.compute_mfcc`,
    which is called by `audio.compute_mfcc()`.
'''

import matplotlib.pyplot as plt

if True:  # Add ROOT and import my libraries.
    import sys
    import os
    ROOT = os.path.dirname(os.path.abspath(__file__)) + \
        "/../"  # Root of the project.
    sys.path.append(ROOT)

    import utils.lib_datasets as lib_datasets

audio = lib_datasets.AudioClass(filename="test_data/audio_front.wav")
audio.compute_mfcc()
audio.plot_audio_and_mfcc()
plt.show()