Exemplo n.º 1
0
def main(args):
    '''
    There are 5 simple steps to this program
    '''

    # 1. combine all data into 2 dataframes (train, valid)
    print("Getting data from arguments")
    train_dataprops, df_train = combine_all_wavs_and_trans_from_csvs(
        args.train_files)
    valid_dataprops, df_valid = combine_all_wavs_and_trans_from_csvs(
        args.valid_files)

    # check any special data model requirments e.g. a spectrogram
    if (args.model_arch == 1):
        model_input_type = "mfcc"
    elif (args.model_arch == 2 or args.model_arch == 5):
        print("Spectrogram required")
        # spectrogram = True
        model_input_type = "spectrogram"
    else:
        model_input_type = "mfcc"

    ## 2. init data generators
    print("Creating data batch generators")
    traindata = BatchGenerator(dataframe=df_train,
                               training=True,
                               batch_size=args.batchsize,
                               model_input_type=model_input_type)
    validdata = BatchGenerator(dataframe=df_valid,
                               training=False,
                               batch_size=args.batchsize,
                               model_input_type=model_input_type)
    inputs, outputs = traindata.get_batch(0)
    input_shape = inputs['the_input'].shape[1:]
    output_shape = inputs['the_labels'].shape[1:]

    output_dir = os.path.join('checkpoints/results', 'model')
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    ## 3. Load existing or create new model
    if args.loadcheckpointpath:
        # load existing
        print("Loading model")

        cp = args.loadcheckpointpath
        assert (os.path.isdir(cp))

        model_path = os.path.join(cp, "model")
        # assert(os.path.isfile(model_path))

        model = load_model_checkpoint(model_path)

        print("Model loaded")
    else:
        # new model recipes here
        print('New model DS{}'.format(args.model_arch))
        if (args.model_arch == 0):
            # DeepSpeech1 with Dropout
            model = ds1_dropout(input_dim=26,
                                fc_size=args.fc_size,
                                rnn_size=args.rnn_size,
                                dropout=[0.1, 0.1, 0.1],
                                output_dim=29)
        elif (args.model_arch == 1):
            # DeepSpeech1 - no dropout
            model = ds1(input_dim=26,
                        fc_size=args.fc_size,
                        rnn_size=args.rnn_size,
                        output_dim=29)
        elif (args.model_arch == 2):
            # DeepSpeech2 model
            model = ds2_gru_model(input_dim=161,
                                  fc_size=args.fc_size,
                                  rnn_size=args.rnn_size,
                                  output_dim=29)
        elif (args.model_arch == 3):
            # own model
            model = ownModel(input_shape,
                             output_shape,
                             fc_size=args.fc_size,
                             rnn_size=args.rnn_size,
                             dropout=[0.1, 0.1, 0.1],
                             output_dim=29)
        elif (args.model_arch == 4):
            # graves model
            model = graves(input_dim=26,
                           rnn_size=args.rnn_size,
                           output_dim=29,
                           std=0.5)
        elif (args.model_arch == 5):
            # cnn city
            model = cnn_city(input_dim=161,
                             fc_size=args.fc_size,
                             rnn_size=args.rnn_size,
                             output_dim=29)
        elif (args.model_arch == 6):
            # constrained model
            model = const(input_dim=26,
                          fc_size=args.fc_size,
                          rnn_size=args.rnn_size,
                          output_dim=29)
        else:
            raise ("model not found")

        print(model.summary(line_length=140))

        # required to save the JSON
        save_model(model, output_dir)

    if (args.opt.lower() == 'sgd'):
        opt = SGD(lr=args.learning_rate,
                  decay=1e-6,
                  momentum=0.9,
                  nesterov=True,
                  clipnorm=5)
    elif (args.opt.lower() == 'adam'):
        opt = Adam(lr=args.learning_rate,
                   beta_1=0.9,
                   beta_2=0.999,
                   epsilon=1e-8,
                   clipnorm=5)
    elif (args.opt.lower() == 'nadam'):
        opt = Nadam(lr=args.learning_rate,
                    beta_1=0.9,
                    beta_2=0.999,
                    epsilon=1e-8,
                    clipnorm=5)
    else:
        raise Exception("optimiser not recognised")

    model.compile(optimizer=opt, loss=ctc)

    ## 4. train

    if args.train_steps == 0:
        args.train_steps = len(df_train.index) // args.batchsize
        # print(args.train_steps)
    # we use 1/xth of the validation data at each epoch end to test val score
    if args.valid_steps == 0:
        args.valid_steps = (len(df_valid.index) // args.batchsize)
        # print(args.valid_steps)

    if args.memcheck:
        cb_list = [MemoryCallback()]
    else:
        cb_list = []

    if args.tensorboard:
        tb_cb = TensorBoard(log_dir='./tensorboard/{}/'.format(args.name),
                            write_graph=False,
                            write_images=True)
        cb_list.append(tb_cb)

    y_pred = model.get_layer('ctc').input[0]
    input_data = model.get_layer('the_input').input

    report = K.function([input_data, K.learning_phase()], [y_pred])
    report_cb = ReportCallback(report, validdata, model, args.name, save=True)

    cb_list.append(report_cb)

    model.fit_generator(
        generator=traindata.next_batch(),
        steps_per_epoch=args.train_steps,
        epochs=args.epochs,
        callbacks=cb_list,
        validation_data=validdata.next_batch(),
        validation_steps=args.valid_steps,
    )

    ## These are the most important metrics
    print("Mean WER   :", report_cb.mean_wer_log)
    print("Mean LER   :", report_cb.mean_ler_log)
    print("NormMeanLER:", report_cb.norm_mean_ler_log)

    # export to csv?
    K.clear_session()
                   tr_batch_size)  # 每个 epoch 中包含的 batch 数
display_batch = int(tr_batch_num / display_num)  # 每训练 display_batch 之后输出一次
saver = tf.train.Saver(max_to_keep=10)  # 最多保存的模型数量
for epoch in range(max_max_epoch):
    _lr = 1e-4
    if epoch > max_epoch:
        _lr = _lr * ((decay)**(epoch - max_epoch))
    print('EPOCH %d, lr=%g' % (epoch + 1, _lr))
    start_time = time.time()
    _costs = 0.0
    _accs = 0.0
    show_accs = 0.0
    show_costs = 0.0
    for batch in range(tr_batch_num):
        fetches = [accuracy, cost, train_op]
        X_batch, X_tag_batch, y_batch = data_train.next_batch(tr_batch_size)
        feed_dict = {
            X_inputs: X_batch,
            X_tag_inputs: X_tag_batch,
            y_inputs: y_batch,
            lr: _lr,
            batch_size: tr_batch_size,
            keep_prob: 0.5
        }
        _acc, _cost, _ = sess.run(
            fetches, feed_dict)  # the cost is the mean cost of one batch
        _accs += _acc
        _costs += _cost
        show_accs += _acc
        show_costs += _cost
        if (batch + 1) % display_batch == 0: