Exemplo n.º 1
0
    def test_cnn_blstm_fit(self):
        sample_data = "data_dir/sample_librivox-test-clean.csv"
        _, df = combine_all_wavs_and_trans_from_csvs(sample_data)
        data_generator = DataGenerator(df,
                                       feature_type='spectrogram',
                                       batch_size=6,
                                       frame_length=320,
                                       hop_length=160,
                                       n_mels=40,
                                       epoch_length=0,
                                       shuffle=True)
        model = models.cnn_blstm(units=256,
                                 input_dim=40,
                                 output_dim=29,
                                 dropout=0.2,
                                 cudnn=False,
                                 n_layers=1)
        model.compile(loss=self.loss, optimizer=self.optimizer)

        # Run training
        model.fit_generator(generator=data_generator, epochs=1, verbose=0)
Exemplo n.º 2
0
    def test_brnn_fit(self):
        sample_data = "data_dir/sample_librivox-test-clean.csv"
        _, df = combine_all_wavs_and_trans_from_csvs(sample_data)
        data_generator = DataGenerator(df,
                                       feature_type='mfcc',
                                       batch_size=6,
                                       frame_length=320,
                                       hop_length=160,
                                       n_mels=40,
                                       mfcc_features=26,
                                       epoch_length=0,
                                       shuffle=True)
        model = models.brnn(units=256,
                            input_dim=26,
                            output_dim=29,
                            dropout=0.2,
                            numb_of_dense=3)
        model.compile(loss=self.loss, optimizer=self.optimizer)

        # Run training
        model.fit_generator(generator=data_generator, epochs=1, verbose=0)
Exemplo n.º 3
0
def main(args):
    '''
    There are 5 simple steps to this program
    '''

    # 1. combine all data into 2 dataframes (train, valid)
    print("Getting data from arguments")
    train_dataprops, df_train = combine_all_wavs_and_trans_from_csvs(
        args.train_files)
    valid_dataprops, df_valid = combine_all_wavs_and_trans_from_csvs(
        args.valid_files)

    # check any special data model requirments e.g. a spectrogram
    if (args.model_arch == 1):
        model_input_type = "mfcc"
    elif (args.model_arch == 2 or args.model_arch == 5):
        print("Spectrogram required")
        # spectrogram = True
        model_input_type = "spectrogram"
    else:
        model_input_type = "mfcc"

    ## 2. init data generators
    print("Creating data batch generators")
    traindata = BatchGenerator(dataframe=df_train,
                               training=True,
                               batch_size=args.batchsize,
                               model_input_type=model_input_type)
    validdata = BatchGenerator(dataframe=df_valid,
                               training=False,
                               batch_size=args.batchsize,
                               model_input_type=model_input_type)
    inputs, outputs = traindata.get_batch(0)
    input_shape = inputs['the_input'].shape[1:]
    output_shape = inputs['the_labels'].shape[1:]

    output_dir = os.path.join('checkpoints/results', 'model')
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    ## 3. Load existing or create new model
    if args.loadcheckpointpath:
        # load existing
        print("Loading model")

        cp = args.loadcheckpointpath
        assert (os.path.isdir(cp))

        model_path = os.path.join(cp, "model")
        # assert(os.path.isfile(model_path))

        model = load_model_checkpoint(model_path)

        print("Model loaded")
    else:
        # new model recipes here
        print('New model DS{}'.format(args.model_arch))
        if (args.model_arch == 0):
            # DeepSpeech1 with Dropout
            model = ds1_dropout(input_dim=26,
                                fc_size=args.fc_size,
                                rnn_size=args.rnn_size,
                                dropout=[0.1, 0.1, 0.1],
                                output_dim=29)
        elif (args.model_arch == 1):
            # DeepSpeech1 - no dropout
            model = ds1(input_dim=26,
                        fc_size=args.fc_size,
                        rnn_size=args.rnn_size,
                        output_dim=29)
        elif (args.model_arch == 2):
            # DeepSpeech2 model
            model = ds2_gru_model(input_dim=161,
                                  fc_size=args.fc_size,
                                  rnn_size=args.rnn_size,
                                  output_dim=29)
        elif (args.model_arch == 3):
            # own model
            model = ownModel(input_shape,
                             output_shape,
                             fc_size=args.fc_size,
                             rnn_size=args.rnn_size,
                             dropout=[0.1, 0.1, 0.1],
                             output_dim=29)
        elif (args.model_arch == 4):
            # graves model
            model = graves(input_dim=26,
                           rnn_size=args.rnn_size,
                           output_dim=29,
                           std=0.5)
        elif (args.model_arch == 5):
            # cnn city
            model = cnn_city(input_dim=161,
                             fc_size=args.fc_size,
                             rnn_size=args.rnn_size,
                             output_dim=29)
        elif (args.model_arch == 6):
            # constrained model
            model = const(input_dim=26,
                          fc_size=args.fc_size,
                          rnn_size=args.rnn_size,
                          output_dim=29)
        else:
            raise ("model not found")

        print(model.summary(line_length=140))

        # required to save the JSON
        save_model(model, output_dir)

    if (args.opt.lower() == 'sgd'):
        opt = SGD(lr=args.learning_rate,
                  decay=1e-6,
                  momentum=0.9,
                  nesterov=True,
                  clipnorm=5)
    elif (args.opt.lower() == 'adam'):
        opt = Adam(lr=args.learning_rate,
                   beta_1=0.9,
                   beta_2=0.999,
                   epsilon=1e-8,
                   clipnorm=5)
    elif (args.opt.lower() == 'nadam'):
        opt = Nadam(lr=args.learning_rate,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-8,
                    clipnorm=5)
    else:
        raise Exception("optimiser not recognised")

    model.compile(optimizer=opt, loss=ctc)

    ## 4. train

    if args.train_steps == 0:
        args.train_steps = len(df_train.index) // args.batchsize
        # print(args.train_steps)
    # we use 1/xth of the validation data at each epoch end to test val score
    if args.valid_steps == 0:
        args.valid_steps = (len(df_valid.index) // args.batchsize)
        # print(args.valid_steps)

    if args.memcheck:
        cb_list = [MemoryCallback()]
    else:
        cb_list = []

    if args.tensorboard:
        tb_cb = TensorBoard(log_dir='./tensorboard/{}/'.format(args.name),
                            write_graph=False,
                            write_images=True)
        cb_list.append(tb_cb)

    y_pred = model.get_layer('ctc').input[0]
    input_data = model.get_layer('the_input').input

    report = K.function([input_data, K.learning_phase()], [y_pred])
    report_cb = ReportCallback(report, validdata, model, args.name, save=True)

    cb_list.append(report_cb)

    model.fit_generator(
        generator=traindata.next_batch(),
        steps_per_epoch=args.train_steps,
        epochs=args.epochs,
        callbacks=cb_list,
        validation_data=validdata.next_batch(),
        validation_steps=args.valid_steps,
    )

    ## These are the most important metrics
    print("Mean WER   :", report_cb.mean_wer_log)
    print("Mean LER   :", report_cb.mean_ler_log)
    print("NormMeanLER:", report_cb.norm_mean_ler_log)

    # export to csv?
    K.clear_session()
Exemplo n.º 4
0
def main(args):
    '''

    only args.name args.test_files and args.loadcheckpointpath can be passed as args


    '''

    print("Getting data from arguments")
    test_dataprops, df_test = combine_all_wavs_and_trans_from_csvs(
        args.test_files, sortagrad=False)

    # check any special data model requirments e.g. a spectrogram
    if (args.model_arch == 1):
        model_input_type = "mfcc"
    elif (args.model_arch == 2 or args.model_arch == 5):
        print("Spectrogram required")
        # spectrogram = True
        model_input_type = "spectrogram"
    else:
        model_input_type = "mfcc"

    ## 2. init data generators
    print("Creating data batch generators")
    testdata = BatchGenerator(dataframe=df_test,
                              dataproperties=test_dataprops,
                              training=False,
                              batch_size=1,
                              model_input_type=model_input_type)

    ## 3. Load existing or error
    if args.loadcheckpointpath:
        # load existing
        print("Loading model")

        cp = args.loadcheckpointpath
        assert (os.path.isdir(cp))
        trimmed = False

        if trimmed:
            model_path = os.path.join(cp, "TRIMMED_ds_model")
        else:
            model_path = os.path.join(cp, "model")
        # assert(os.path.isfile(model_path))

        model = load_model_checkpoint(model_path)
        opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

        print("Model loaded")

    else:
        # new model
        raise ("You need to load an existing trained model")

    model.compile(optimizer=opt, loss=ctc)

    ## 4. test

    train_steps = len(df_test.index) // 200

    try:
        y_pred = model.get_layer('ctc').input[0]
    except Exception as e:
        print("error", e)
        print(
            "couldn't find ctc layer, possibly a trimmed layer, trying other name"
        )
        y_pred = model.get_layer('out').output

    input_data = model.get_layer('the_input').input

    K.set_learning_phase(0)
    report = K.function([input_data, K.learning_phase()], [y_pred])
    report_cb = ReportCallback(report, testdata, model, args.name, save=False)
    report_cb.force_output = True
    report_cb.on_epoch_end(0, logs=None)

    K.clear_session()
Exemplo n.º 5
0
def main(args):
    # Paths to .csv files
    path = "data_dir/librivox-train-clean-360.csv"
    path_validation = "data_dir/librivox-dev-clean.csv"
    path_test = "data_dir/librivox-test-clean.csv"

    # Create dataframes
    print "\nReading training data:"
    _, input_dataframe = combine_all_wavs_and_trans_from_csvs(path)
    print "\nReading validation data: "
    _, validation_df = combine_all_wavs_and_trans_from_csvs(path_validation)
    print "\nReading test data: "
    _, test_df = combine_all_wavs_and_trans_from_csvs(path_test)

    # Training params:
    batch_size = args.batch_size
    input_epoch_length = args.epoch_len
    epochs = args.epochs
    learning_rate = args.lr
    log_file = args.log_file

    # Multi GPU or single GPU / CPU training
    num_gpu = args.num_gpu

    # Preprocessing params
    feature_type = args.feature_type
    mfcc_features = args.mfccs
    n_mels = args.mels

    # Model params
    model_type = args.model_type
    units = args.units
    dropout = args.dropout
    n_layers = args.layers

    # Saving and loading params
    model_save = args.model_save
    checkpoint = args.checkpoint
    model_load = args.model_load
    load_multi = args.load_multi

    # Additional settings for training
    save_best = args.save_best_val  # Save model with best val_loss (on path "model_save" + "_best")
    shuffle = args.shuffle_indexes
    reduce_lr = args.reduce_lr  # Reduce learning rate on val_loss plateau
    early_stopping = args.early_stopping  # Stop training early if val_loss stops improving

    frequency = 16  # Sampling rate of data in khz (LibriSpeech is 16khz)
    cudnnlstm = False

    # Data generation parameters
    data_params = {
        'feature_type': feature_type,
        'batch_size': batch_size,
        'frame_length': 20 * frequency,
        'hop_length': 10 * frequency,
        'mfcc_features': mfcc_features,
        'n_mels': n_mels,
        'epoch_length': input_epoch_length,
        'shuffle': shuffle
    }

    # Data generators for training, validation and testing data
    training_generator = DataGenerator(input_dataframe, **data_params)
    validation_generator = DataGenerator(validation_df, **data_params)
    test_generator = DataGenerator(test_df, **data_params)

    # Model input shape
    if feature_type == 'mfcc':
        input_dim = mfcc_features
    else:
        input_dim = n_mels

    output_dim = 29  # Output dim: features to predict + 1 for the CTC blank prediction

    # Optimization algorithm used to update network weights
    optimizer = Adam(lr=learning_rate, epsilon=1e-8, clipnorm=2.0)

    # Dummy loss-function for compiling model, actual CTC loss-function defined as a lambda layer in model
    loss = {'ctc': lambda y_true, y_pred: y_pred}

    # Print training data at the beginning of training
    calc_epoch_length = training_generator.__len__()
    print "\n\nModel and training parameters: "
    print "Starting time: ", datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print " - epochs: ", epochs, "\n - batch size: ", batch_size, \
        "\n - input epoch length: ", input_epoch_length, "\n - network epoch length: ", calc_epoch_length, \
        "\n - training on ", calc_epoch_length * batch_size, " files", "\n - learning rate: ", learning_rate, \
        "\n - hidden units: ", units, "\n - mfcc features: ", mfcc_features, "\n - dropout: ", dropout, "\n"

    try:
        # Load previous model or create new. With device cpu ensures that the model is created/loaded on the cpu
        if model_load:
            with tf.device('/cpu:0'):
                # When loading custom objects, Keras needs to know where to find them.
                # The CTC lambda is a dummy function
                custom_objects = {
                    'clipped_relu': models.clipped_relu,
                    '<lambda>': lambda y_true, y_pred: y_pred
                }

                # When loading a parallel model saved *while* running on multiple GPUs, use load_multi
                if load_multi:
                    model = models.load_model(model_load,
                                              custom_objects=custom_objects)
                    model = model.layers[-2]
                    print "Loaded existing model at: ", model_load

                # Load single GPU/CPU model or model saved *after* finished training
                else:
                    model = models.load_model(model_load,
                                              custom_objects=custom_objects)
                    print "Loaded existing model at: ", model_load

        else:
            with tf.device('/cpu:0'):
                # Create new model
                model = models.model(model_type=model_type,
                                     units=units,
                                     input_dim=input_dim,
                                     output_dim=output_dim,
                                     dropout=dropout,
                                     cudnn=cudnnlstm,
                                     n_layers=n_layers)
                print "Creating new model: ", model_type

        # Loss callback parameters
        loss_callback_params = {
            'validation_gen': validation_generator,
            'test_gen': test_generator,
            'checkpoint': checkpoint,
            'path_to_save': model_save,
            'log_file_path': log_file
        }

        # Model training parameters
        model_train_params = {
            'generator': training_generator,
            'epochs': epochs,
            'verbose': 2,
            'validation_data': validation_generator,
            'workers': 1,
            'shuffle': shuffle
        }

        # Optional callbacks for added functionality
        # Reduces learning rate when val_loss stagnates.
        if reduce_lr:
            print "Reducing learning rate on plateau"
            reduce_lr_cb = ReduceLROnPlateau(factor=0.2,
                                             patience=5,
                                             verbose=0,
                                             epsilon=0.1,
                                             min_lr=0.0000001)
            callbacks = [reduce_lr_cb]
        else:
            callbacks = []

        # Stops the model early if the val_loss isn't improving
        if early_stopping:
            es_cb = EarlyStopping(min_delta=0,
                                  patience=5,
                                  verbose=0,
                                  mode='auto')
            callbacks.append(es_cb)

        # Saves the model if val_loss is improved at "model_save" + "_best"
        if save_best:
            save_best = model_save + str('_best')
            mcp_cb = ModelCheckpoint(save_best,
                                     verbose=1,
                                     save_best_only=True,
                                     period=1)
            callbacks.append(mcp_cb)

        # Train with parallel model on 2 or more GPUs, must be even number
        if num_gpu > 1:
            if num_gpu % 2 == 0:
                # Compile parallel model for training on GPUs > 1
                parallel_model = multi_gpu_model(model, gpus=num_gpu)
                parallel_model.compile(loss=loss, optimizer=optimizer)

                # Print model summary
                model.summary()

                # Creates a test function that takes sound input and outputs predictions
                # Used to calculate WER while training the network
                input_data = model.get_layer('the_input').input
                y_pred = model.get_layer('ctc').input[0]
                test_func = K.function([input_data], [y_pred])

                # The loss callback function that calculates WER while training
                loss_cb = LossCallback(test_func=test_func,
                                       model=model,
                                       **loss_callback_params)
                callbacks.append(loss_cb)

                # Run training
                parallel_model.fit_generator(callbacks=callbacks,
                                             **model_train_params)

            else:
                raise ValueError('Number of GPUs must be an even number')

        # Train with CPU or single GPU
        elif num_gpu == 1 or num_gpu == 0:
            # Compile model for training on GPUs < 2
            model.compile(loss=loss, optimizer=optimizer)

            # Print model summary
            model.summary()

            # Creates a test function that takes preprocessed sound input and outputs predictions
            # Used to calculate WER while training the network
            input_data = model.get_layer('the_input').input
            y_pred = model.get_layer('ctc').input[0]
            test_func = K.function([input_data], [y_pred])

            # The loss callback function that calculates WER while training
            loss_cb = LossCallback(test_func=test_func,
                                   model=model,
                                   **loss_callback_params)
            callbacks.append(loss_cb)

            # Run training
            model.fit_generator(callbacks=callbacks, **model_train_params)

        else:
            raise ValueError('Not a valid number of GPUs: ', num_gpu)

        if args.model_save:
            model.save(model_save)
            print "Model saved: ", model_save

    except (Exception, ArithmeticError) as e:
        template = "An exception of type {0} occurred. Arguments:\n{1!r}"
        message = template.format(type(e).__name__, e.args)
        print message

    finally:
        # Clear memory
        K.clear_session()
    print "Ending time: ", datetime.now().strftime('%Y-%m-%d %H:%M:%S')
Exemplo n.º 6
0
    def setUp(self):
        _, self.df = combine_all_wavs_and_trans_from_csvs(
            "data_dir/sample_librivox-test-clean.csv")

        self.dg = DataGenerator(self.df, batch_size=10, epoch_length=10)
Exemplo n.º 7
0
def main(args):
    try:
        if not args.model_load:
            raise ValueError("Error within model arguments")
        audio_dir = args.audio_dir

        print("\nReading test data: ")
        _, df = combine_all_wavs_and_trans_from_csvs(audio_dir)

        batch_size = args.batch_size
        batch_index = args.batch_index

        mfcc_features = args.mfccs
        n_mels = args.mels
        # Sampling rate of data in khz (LibriSpeech is 16khz)
        frequency = 16

        # Training data_params:
        model_load = args.model_load
        load_multi = args.load_multi

        # Sets the full dataset in audio_dir to be available through data_generator
        # The data_generator doesn't actually load the audio files until they are requested through __get_item__()
        epoch_length = 0

        # Load trained model
        # When loading custom objects, Keras needs to know where to find them.
        # The CTC lambda is a dummy function
        custom_objects = {
            'clipped_relu': models.clipped_relu,
            '<lambda>': lambda y_true, y_pred: y_pred
        }

        # When loading a parallel model saved *while* running on GPU, use load_multi
        if load_multi:
            model = models.load_model(model_load,
                                      custom_objects=custom_objects)
            model = model.layers[-2]
            print("\nLoaded existing model: ", model_load)

        # Load single GPU/CPU model or model saved *after* finished training
        else:
            model = models.load_model(model_load,
                                      custom_objects=custom_objects)
            print("\nLoaded existing model: ", model_load)

        # Dummy loss-function to compile model, actual CTC loss-function defined as a lambda layer in model
        loss = {'ctc': lambda y_true, y_pred: y_pred}

        model.compile(loss=loss, optimizer='Adam')

        feature_shape = model.input_shape[0][2]

        # Model feature type
        if not args.feature_type:
            if feature_shape == 26:
                feature_type = 'mfcc'
            else:
                feature_type = 'spectrogram'
        else:
            feature_type = args.feature_type

        print("Feature type: ", feature_type)

        # Data generation parameters
        data_params = {
            'feature_type': feature_type,
            'batch_size': batch_size,
            'frame_length': 20 * frequency,
            'hop_length': 10 * frequency,
            'mfcc_features': mfcc_features,
            'n_mels': n_mels,
            'epoch_length': epoch_length,
            'shuffle': False
        }

        # Data generators for training, validation and testing data
        data_generator = DataGenerator(df, **data_params)

        # Print(model summary)
        model.summary()

        # Creates a test function that takes preprocessed sound input and outputs predictions
        # Used to calculate WER while training the network
        input_data = model.get_layer('the_input').input
        y_pred = model.get_layer('ctc').input[0]
        test_func = K.function([input_data], [y_pred])

        if args.calc_wer:
            print("\n - Calculation WER on ", audio_dir)
            wer = calc_wer(test_func, data_generator)
            print("Average WER: ", wer[1])

        predictions = predict_on_batch(data_generator, test_func, batch_index)
        print("\n - Predictions from batch index: ", batch_index, "\nFrom: ",
              audio_dir, "\n")
        for i in predictions:
            print("Original: ", i[0])
            print("Predicted: ", i[1], "\n")

    except (Exception, BaseException, GeneratorExit, SystemExit) as e:
        template = "An exception of type {0} occurred. Arguments:\n{1!r}"
        message = template.format(type(e).__name__, e.args)
        print("e.args: ", e.args)
        print(message)

    finally:
        # Clear memory
        K.clear_session()
        print(
            "couldn't find ctc layer, possibly a trimmed layer, trying other name"
        )
        y_pred = model.get_layer('out').output

    input_data = model.get_layer('the_input').input
    K.set_learning_phase(0)

    #2. record data and put it in live folder LOOP

    while 1:
        startloop(1)

        args.test_files = "./data/live/live.csv"
        print("Getting data from arguments")
        test_dataprops, df_test = combine_all_wavs_and_trans_from_csvs(
            args.test_files, sortagrad=False)
        ## x. init data generators
        print("Creating data batch generators")
        testdata = BatchGenerator(dataframe=df_test,
                                  dataproperties=test_dataprops,
                                  training=False,
                                  batch_size=1,
                                  model_input_type=model_input_type)

        ## RUN TEST
        report = K.function([input_data, K.learning_phase()], [y_pred])
        report_cb = ReportCallback(report,
                                   testdata,
                                   model,
                                   args.name,
                                   save=False)
Exemplo n.º 9
0
def main(args):
    try:
        if not args.model_load:
            raise ValueError()
        audio_dir = args.audio_dir

        print("\nReading test data: ")
        _, df = combine_all_wavs_and_trans_from_csvs(audio_dir)

        batch_size = args.batch_size
        batch_index = args.batch_index

        mfcc_features = args.mfccs
        n_mels = args.mels
        frequency = 22  # Sampling rate of data in khz (heroico is 22khz)

        # Training data_params:
        model_load = args.model_load

        epoch_length = 0

        # Load trained model

        custom_objects = {
            'clipped_relu': models.clipped_relu,
            '<lambda>': lambda y_true, y_pred: y_pred
        }

        # Load single GPU/CPU model or model saved *after* finished training
        model = mo.load_model(model_load, custom_objects=custom_objects)
        print("\nLoaded existing model: ", model_load)

        # Dummy loss-function to compile model, actual CTC loss-function defined as a lambda layer in model
        loss = {'ctc': lambda y_true, y_pred: y_pred}

        model.compile(loss=loss, optimizer='Adam')

        feature_shape = model.input_shape[0][2]

        # Model feature type
        if not args.feature_type:
            if feature_shape == 26:
                feature_type = 'mfcc'
            else:
                feature_type = 'spectrogram'
        else:
            feature_type = args.feature_type

        print("Feature type: ", feature_type)

        # Data generation parameters
        data_params = {
            'feature_type': feature_type,
            'batch_size': batch_size,
            'frame_length': 20 * frequency,
            'hop_length': 10 * frequency,
            'mfcc_features': mfcc_features,
            'n_mels': n_mels,
            'epoch_length': epoch_length,
            'shuffle': False
        }

        # Data generators for training, validation and testing data
        data_generator = DataGenerator(df, **data_params)

        # Print model summary
        model.summary()

        # Creates a test function that takes preprocessed sound input and outputs predictions
        # Used to calculate WER while training the network
        input_data = model.get_layer('the_input').input
        y_pred = model.get_layer('ctc').input[0]
        test_func = K.function([input_data], [y_pred])

        if args.calc_wer:
            print("\n - Calculation WER on ", audio_dir)
            wer = calc_wer(test_func, data_generator)
            print("Average WER: ", wer[1])

        predictions = predict_samples(data_generator, test_func)
        # predictions = predict_on_batch(data_generator, test_func, batch_index)
        # print ("\n - Predictions from batch index: ", batch_index, "\nFrom: ", audio_dir, "\n")
        for i in predictions:
            print("Original: ", i[0])
            print("Predicted: ", i[1], "\n")

    except (Exception, GeneratorExit, SystemExit) as e:
        raise e
        # template = "An exception of type {0} occurred. Arguments:\n{1!r}"
        # message = template.format(type(e).__name__, e.args)
        # print ("e.args: ", e.args)
        # print (message)

    finally:
        # Clear memory
        K.clear_session()