示例#1
0
文件: main.py 项目: amhajavi/Stutter
def get_prediction():

    toolkits.initialize_GPU(args)

    import model
    import generator

    params = {
        'dim': (257, args.spec_len, 1),
        'mp_pooler': toolkits.set_mp(processes=12),
        'nfft': 512,
        'spec_len': args.spec_len,
        'win_length': 400,
        'hop_length': 160,
        'n_classes': 2,
        'sampling_rate': 16000,
        'batch_size': args.batch_size,
        'shuffle': False,
        'normalize': True,
    }

    network = model.stutter_model(input_dim=params['dim'],
                                  num_class=params['n_classes'],
                                  mode='train',
                                  args=args)

    personal_folder = '/home/amh/Projects/Stutter/model/Filler/person_{}/'.format(
        args.person)
    personal_files = os.listdir(personal_folder)[-1]
    best_model = os.path.join(personal_folder, personal_files)
    print(best_model)
    network.load_weights(best_model)
    vallist, vallb = toolkits.get_datalist(
        args,
        path='../meta/Stutter_Leave_One_Fillers/validation_labels_{}.txt'.
        format(args.person))
    vld_gen = generator.DataGenerator(vallist.flatten(), vallb.flatten(),
                                      **params)
    step = 0
    preds = []
    for i in range((len(vallist) // args.batch_size) + 1):
        x_data, y_data = vld_gen.__getitem__(index=step + i)
        preds.extend(np.argmax(network.predict(x_data), axis=1))
    from sklearn.metrics import accuracy_score

    print(accuracy_score(preds, vallb))
示例#2
0
文件: main.py 项目: hmen97/VGG-Vox
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    import generator
    import keras

    # ==================================
    #       Get Train/Val.
    # ==================================
    trnlist, trnlb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/vox2_train_wav.txt')
    vallist, vallb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/vox2_val_wav.txt')

    # construct the data generator.
    params = {
        'dim': (257, 250, 1),
        'mp_pooler': toolkits.set_mp(processes=args.multiprocess),
        'nfft': 512,
        'spec_len': 250,
        'win_length': 400,
        'hop_length': 160,
        'n_classes': 5994,
        'sampling_rate': 16000,
        'batch_size': args.batch_size,
        'shuffle': True,
        'normalize': True,
    }

    # Datasets
    partition = {'train': trnlist.flatten(), 'val': vallist.flatten()}
    labels = {'train': trnlb.flatten(), 'val': vallb.flatten()}

    # Generators
    trn_gen = generator.DataGenerator(partition['train'], labels['train'],
                                      **params)
    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train',
                                           args=args)

    # ==> load pre-trained model ???
    mgpu = len(keras.backend.tensorflow_backend._get_available_gpus())
    if args.resume:
        if os.path.isfile(args.resume):
            if mgpu == 1: network.load_weights(os.path.join(args.resume))
            else:
                network.layers[mgpu + 1].load_weights(os.path.join(
                    args.resume))
            print('==> successfully loading model {}.'.format(args.resume))
        else:
            print("==> no checkpoint found at '{}'".format(args.resume))

    print(network.summary())
    print('==> gpu {} is, training {} images, classes: 0-{} '
          'loss: {}, aggregation: {}, ohemlevel: {}'.format(
              args.gpu, len(partition['train']), np.max(labels['train']),
              args.loss, args.aggregation_mode, args.ohem_level))

    model_path, log_path = set_path(args)
    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path,
                                              histogram_freq=0,
                                              write_graph=True,
                                              write_images=False,
                                              update_freq=args.batch_size * 16)
    callbacks = [
        keras.callbacks.ModelCheckpoint(os.path.join(
            model_path, 'weights-{epoch:02d}-{acc:.3f}.h5'),
                                        monitor='loss',
                                        mode='min',
                                        save_best_only=True), normal_lr,
        tbcallbacks
    ]

    if args.ohem_level > 1:  # online hard negative mining will be used
        candidate_steps = int(len(partition['train']) // args.batch_size)
        iters_per_epoch = int(
            len(partition['train']) // (args.ohem_level * args.batch_size))

        ohem_generator = generator.OHEM_generator(
            network, trn_gen, candidate_steps, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes'])

        A = ohem_generator.next(
        )  # for some reason, I need to warm up the generator

        network.fit_generator(generator.OHEM_generator(
            network, trn_gen, iters_per_epoch, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes']),
                              steps_per_epoch=iters_per_epoch,
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)

    else:
        network.fit_generator(trn_gen,
                              steps_per_epoch=int(
                                  len(partition['train']) // args.batch_size),
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    # ==================================
    #       Get Train/Val.
    # ==================================
    vallist, vallb = toolkits.get_hike_datalist(
        meta_paths=args.test_meta_data_path,
        data_paths=args.test_data_path,
        mode=model_config['loss'])
    _, valscore = toolkits.get_hike_datalist(
        meta_paths=args.test_meta_data_path,
        data_paths=args.test_data_path,
        mode='mse')

    # ==================================
    #       Get Model
    # ==================================
    # construct the data generator.
    num_class = len(score_rule)
    input_length = int(args.audio_length * 25)
    params = {
        'dim': (513, None, 1),
        'mp_pooler': toolkits.set_mp(processes=args.multiprocess),
        'nfft': 1024,
        'spec_len': input_length,
        'win_length': 1024,
        'hop_length': 640,
        'n_classes': num_class,
        'sampling_rate': 16000,
        'normalize': True,
    }

    network_eval = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                                num_class=params['n_classes'],
                                                mode='eval',
                                                args=model_config)
    # ==> load pre-trained model ???
    if args.resume:
        # ==> get real_model from arguments input,
        # load the model if the imag_model == real_model.
        if os.path.isfile(args.resume):
            network_eval.load_weights(os.path.join(args.resume),
                                      by_name=True,
                                      skip_mismatch=True)
            print('==> successfully loading model {}.'.format(args.resume))
        else:
            raise IOError("==> no checkpoint found at '{}'".format(
                args.resume))
    else:
        raise IOError('==> please type in the model to load')

    print('==> start testing.')

    v = []
    for ID in vallist:
        val_data = ut.load_data(ID, params['win_length'],
                                params['sampling_rate'], params['hop_length'],
                                params['nfft'], params['spec_len'], 'test',
                                args.data_format)
        info = network_eval.predict(np.expand_dims(val_data, (0, -1)))
        v += info.tolist()
    v = np.array(v)

    print('val data shape {}'.format(v.shape))
    if model_config['loss'] == 'mse':
        v = v.T[0] * 10 + 5
        vallb = vallb * 10 + 5
        metric = np.square(np.subtract(v, vallb)).mean()
        print('mse: ', metric)
        v_test = np.vstack([v, vallb]).astype('float').T
        df = np.hstack([vallist.reshape(-1, 1), v_test])
        df = pd.DataFrame(data=df,
                          columns=['content', 'score_predict', 'score_true'])
    else:
        valscore = valscore * 10 + 5
        v_predict = ((v < 0.5) * 1)[:, 0]
        metric = sum(v_predict == vallb) / len(vallb)
        print('confusion matrix: ', confusion_matrix(vallb, v_predict))
        print('accuracy ', metric)
        v_test = np.hstack([v,
                            vallb.reshape(-1, 1),
                            valscore.reshape(-1, 1)]).astype('float')
        df = np.hstack([vallist.reshape(-1, 1), v_test])
        df = pd.DataFrame(data=df,
                          columns=[
                              'content', 'prob_0', 'prob_1', 'true_label',
                              'score_true'
                          ])

    date = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")
    df.to_csv(
        os.path.join(args.save_dir,
                     '{}_{}_{}.csv'.format(date, model_config['loss'],
                                           metric)))
示例#4
0
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    import generator

    # ==================================
    #       Get Train/Val.
    # ==================================

    trnlist, trnlb, l2i = toolkits.load_from_kaldi_dir(args,
                                                       "train",
                                                       min_len=300)
    vallist, vallb, _ = toolkits.load_from_kaldi_dir(args,
                                                     "val",
                                                     min_len=300,
                                                     label2idx=l2i)
    if args.cmvn:
        cmvn_stats = kaldiio.load_mat(args.cmvn)
        mean_stats = cmvn_stats[0, :-1]
        count = cmvn_stats[0, -1]
        offset = np.expand_dims(mean_stats, 0) / count
        print("offset", offset)
        CMVN = offset

    else:
        CMVN = None

    if args.post_cmvn:
        cmvn_stats = kaldiio.load_mat(args.post_cmvn)
        mean_stats = cmvn_stats[0, :-1]
        count = cmvn_stats[0, -1]
        offset = np.expand_dims(mean_stats, 0) / count
        print("offset", offset)
        POSTCMVN = offset

    else:
        POSTCMVN = None

    # construct the data generator.
    params = {
        'dim': (args.dim, 300, 1),
        'mp_pooler': toolkits.set_mp(processes=args.multiprocess),
        'nfft': 512,
        'spec_len': 300,
        'win_length': 400,
        'hop_length': 160,
        'n_classes': 8,
        'sampling_rate': 16000,
        'tandem': args.tandem,
        'batch_size': args.batch_size,
        'shuffle': True,
        'normalize': False,
        'cmvn': CMVN,
        'postcmvn': POSTCMVN
    }

    # Datasets
    partition = {'train': trnlist, 'val': vallist}
    labels = {'train': trnlb.flatten(), 'val': vallb.flatten()}

    # Generators
    trn_gen = generator.DataGenerator(partition['train'], labels['train'],
                                      **params)
    val_gen = generator.DataGenerator(partition['val'], labels['val'],
                                      **params)
    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train',
                                           args=args)
    # ==> load pre-trained model ???
    mgpu = len(keras.backend.tensorflow_backend._get_available_gpus())

    if args.resume:
        print("Attempting to load", args.resume)
        if args.resume:
            if os.path.isfile(args.resume):
                if mgpu == 1:
                    # by_name=True, skip_mismatch=True
                    # https://github.com/WeidiXie/VGG-Speaker-Recognition/issues/46
                    network.load_weights(os.path.join(args.resume),
                                         by_name=True,
                                         skip_mismatch=True)
                else:
                    network.layers[mgpu + 1].load_weights(
                        os.path.join(args.resume))
                print('==> successfully loading model {}.'.format(args.resume))
            else:
                print("==> no checkpoint found at '{}'".format(args.resume))

    print(network.summary())
    print('==> gpu {} is, training {} images, classes: 0-{} '
          'loss: {}, aggregation: {}, ohemlevel: {}'.format(
              args.gpu, len(partition['train']), np.max(labels['train']),
              args.loss, args.aggregation_mode, args.ohem_level))

    model_path, log_path = set_path(args)
    with open(os.path.join(model_path, 'label2idx'), 'w') as f:
        for key in l2i.keys():
            f.write(key + ' ' + str(l2i[key]) + '\n')

    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path,
                                              histogram_freq=0,
                                              write_graph=True,
                                              write_images=False,
                                              update_freq=args.batch_size * 16)
    callbacks = [
        keras.callbacks.ModelCheckpoint(os.path.join(
            model_path, 'weights-{epoch:02d}-{val_loss:.3f}.h5'),
                                        monitor='val_loss',
                                        mode='min',
                                        save_best_only=True), normal_lr,
        tbcallbacks
    ]

    if args.ohem_level > 1:  # online hard negative mining will be used
        candidate_steps = int(len(partition['train']) // args.batch_size)
        iters_per_epoch = int(
            len(partition['train']) // (args.ohem_level * args.batch_size))

        ohem_generator = generator.OHEM_generator(
            network, trn_gen, candidate_steps, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes'])

        A = ohem_generator.next(
        )  # for some reason, I need to warm up the generator

        network.fit_generator(generator.OHEM_generator(
            network, trn_gen, iters_per_epoch, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes']),
                              steps_per_epoch=iters_per_epoch,
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)

    else:
        network.fit_generator(trn_gen,
                              validation_data=val_gen,
                              steps_per_epoch=int(
                                  len(partition['train']) // args.batch_size),
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=True,
                              workers=12,
                              verbose=1)
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    import generator

    # ==================================
    #       Get Train/Val.
    # ==================================
    trnlist, trnlb = toolkits.get_hike_datalist(
        meta_paths=args.train_meta_data_path,
        data_paths=args.train_data_path,
        mode=model_config['loss'])
    vallist, vallb = toolkits.get_hike_datalist(
        meta_paths=args.val_meta_data_path,
        data_paths=args.val_data_path,
        mode=model_config['loss'])

    input_length = int(args.audio_length * 25)
    num_class = len(score_rule)
    # construct the data generator.
    params = {
        'dim': (513, input_length, 1),
        'mp_pooler': toolkits.set_mp(processes=args.multiprocess),
        'nfft': 1024,
        'spec_len': input_length,
        'win_length': 1024,
        'hop_length': 640,
        'n_classes': num_class,
        'sampling_rate': 16000,
        'batch_size': model_config['batch_size'],
        'shuffle': True,
        'normalize': True,
        'loss': model_config['loss'],
        'data_format': args.data_format
    }

    # Datasets
    partition = {'train': trnlist.flatten(), 'val': vallist.flatten()}
    labels = {'train': trnlb.flatten(), 'val': vallb.flatten()}

    # Generators
    wandb.init(project='vgg_speaker')
    trn_gen = generator.DataGenerator(partition['train'], labels['train'],
                                      **params)
    val_gen = generator.DataGenerator(partition['val'], labels['val'],
                                      **params)
    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train',
                                           args=model_config)
    # # val data
    # val_data = [params['mp_pooler'].apply_async(ut.load_data,
    #                                 args=(ID, params['win_length'], params['sampling_rate'], params['hop_length'],
    #                                       params['nfft'], params['spec_len'], 'train', args.data_format)) for ID in partition['val']]
    # val_data = np.expand_dims(np.array([p.get() for p in val_data]), -1)

    # ==> load pre-trained model ???
    print(keras.backend.tensorflow_backend._get_available_gpus())

    if args.resume:
        print("Attempting to load", args.resume)
        if args.resume:
            if os.path.isfile(args.resume):
                network.load_weights(os.path.join(args.resume),
                                     by_name=True,
                                     skip_mismatch=True)
                print('==> successfully loading model {}.'.format(args.resume))
            else:
                raise ValueError("==> no checkpoint found at '{}'".format(
                    args.resume))

    print(network.summary())
    print('==> gpu {} is, training {} images, classes: 0-{} '
          'loss: {}, aggregation: {}, ohemlevel: {}'.format(
              args.gpu, len(partition['train']), np.max(labels['train']),
              model_config['loss'], model_config['aggregation_mode'],
              model_config['ohem_level']))

    model_path, log_path = set_path(args, model_config)
    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    # tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=False,
    #                                           update_freq=model_config['batch_size'] * 16)
    callbacks = [
        keras.callbacks.ModelCheckpoint(
            os.path.join(model_path, 'weights-{epoch:02d}-{loss:.3f}.h5'),
            monitor='loss',
            mode='min',
            save_best_only=True,
            period=20,
        ), normal_lr,
        WandbCallback()
    ]

    if model_config[
            'ohem_level'] > 1:  # online hard negative mining will be used
        candidate_steps = int(
            len(partition['train']) // model_config['batch_size'])
        iters_per_epoch = int(
            len(partition['train']) //
            (model_config['ohem_level'] * model_config['batch_size']))

        ohem_generator = generator.OHEM_generator(
            network, trn_gen, candidate_steps, model_config['ohem_level'],
            model_config['batch_size'], params['dim'], params['n_classes'])

        A = ohem_generator.next(
        )  # for some reason, I need to warm up the generator

        network.fit_generator(generator.OHEM_generator(
            network, trn_gen, iters_per_epoch, model_config['ohem_level'],
            model_config['batch_size'], params['dim'], params['n_classes']),
                              steps_per_epoch=iters_per_epoch,
                              epochs=model_config['epochs'],
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)

    else:
        if model_config['loss'] != 'mse':
            network.fit_generator(trn_gen,
                                  steps_per_epoch=int(
                                      len(partition['train']) //
                                      model_config['batch_size']),
                                  epochs=model_config['epochs'],
                                  max_queue_size=10,
                                  validation_data=val_gen,
                                  validation_freq=1,
                                  callbacks=callbacks,
                                  use_multiprocessing=False,
                                  workers=1,
                                  verbose=1)
        else:
            network.fit_generator(trn_gen,
                                  steps_per_epoch=int(
                                      len(partition['train']) //
                                      model_config['batch_size']),
                                  epochs=model_config['epochs'],
                                  max_queue_size=10,
                                  validation_data=val_gen,
                                  validation_freq=1,
                                  callbacks=callbacks,
                                  use_multiprocessing=False,
                                  workers=1,
                                  verbose=1)
示例#6
0
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    import generator

    # ==================================
    #       Get Train/Val.
    # ==================================
    feats_path = os.path.join(args.kaldi_data_dir, 'feats.scp')
    utt2spk_path = os.path.join(args.kaldi_data_dir, 'utt2spk')
    assert os.path.exists(feats_path), 'Path `{}` does not exists.'.format(feats_path)
    assert os.path.exists(utt2spk_path), 'Path `{}` does not exists.'.format(utt2spk_path)

    utt2ark = {}
    with open(feats_path) as f:
        for line in f:
            key, ark = line.split()
            if args.use_clean_only:
                if not is_clean(key):
                    continue
            ark, position = ark.split(':')
            utt2ark[key] = (key, ark, int(position))

    label2count, utt2label, label2int, label2utts = {}, {}, {}, {}
    with open(utt2spk_path) as f:
        for line in f:
            utt, label = line.split()
            if args.use_clean_only:
                if not is_clean(utt):
                    continue
            if label not in label2int:
                label2int[label] = len(label2int)
            label = label2int[label]
            utt2label[utt] = label
            if label not in label2count:
                label2count[label] = 0
            label2count[label] += 1
            if label not in label2utts:
                label2utts[label] = []
            label2utts[label].append(utt2ark[utt])

    # balancing classes
    trnlist, vallist, trnlb, vallb = [], [], [], []
    max_utts = max(label2count.values())
    for label in label2utts:
        # print('Balancing', label)
        validation_thr = label2count[label] * args.validation_ratio
        random.shuffle(label2utts[label])
        utts_array = np.array(label2utts[label])
        random_indexes = np.random.randint(low=0, high=label2count[label] - 1, size=max_utts)
        trn_indexes = random_indexes[random_indexes > validation_thr]
        val_indexes = random_indexes[random_indexes <= validation_thr]
        # print(np.max(trn_indexes), np.min(trn_indexes), np.max(val_indexes), np.min(val_indexes))
        trnlist.extend([(x[0], x[1], int(x[2])) for x in utts_array[trn_indexes]])
        trnlb.extend([label for x in range(len(trnlist))])
        # print(trnlist[:10], trnlb[:10])
        # 1/0
        vallist.extend([(x[0], x[1], int(x[2])) for x in utts_array[val_indexes]])
        vallb.extend([label for x in range(len(vallist))])

    # print(all_list[:10])
    # print(label2int)
    # print(label2count)
    # 1/0

    # label2val_count, trnlist, vallist, trnlb, vallb = {}, [], [], [], []
    # for utt in all_list:
    #     label = utt2label[utt[0]]
    #     if label not in label2val_count:
    #         label2val_count[label] = 0
    #     if label2val_count[label] <= label2count[label] * args.validation_ratio:
    #         # use for validation
    #         vallist.append(utt)
    #         vallb.append(label)
    #         label2val_count[label] += 1
    #     else:
    #         # use for training
    #         trnlist.append(utt)
    #         trnlb.append(label)

    # trnlb = keras.utils.to_categorical(trnlb)
    # vallb = keras.utils.to_categorical(vallb)

    # construct the data generator.
    params = {
        'dim': (args.num_dim, 250, 1),
        'mp_pooler': toolkits.set_mp(processes=4 * len(args.gpu.split(',')) + 1),
        'nfft': 512,
        'spec_len': 250,
        'win_length': 400,
        'hop_length': 160,
        'n_classes': len(label2count),
        'sampling_rate': 16000,
        'batch_size': args.batch_size,
        'shuffle': True,
        'normalize': True,
        'use_clean_only': args.use_clean_only
    }

    # Datasets
    partition = {'train': trnlist, 'val': vallist}
    labels = {'train': np.array(trnlb), 'val': np.array(vallb)}

    # Generators
    trn_gen = generator.DataGenerator(partition['train'], labels['train'], **params)
    val_gen = generator.DataGenerator(partition['val'], labels['val'], **params)
    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train', args=args)

    # ==> load pre-trained model ???
    mgpu = len(keras.backend.tensorflow_backend._get_available_gpus())
    if args.resume:
        if os.path.isfile(args.resume):
            if mgpu == 1: network.load_weights(os.path.join(args.resume))
            else: network.layers[mgpu + 1].load_weights(os.path.join(args.resume))
            print('==> successfully loading model {}.'.format(args.resume))
        else:
            print("==> no checkpoint found at '{}'".format(args.resume))

    print(network.summary())
    print('==> gpu {} is, training {} features, validating {} features, classes: 0-{} '
          'loss: {}, aggregation: {}, ohemlevel: {}'.format(args.gpu, len(partition['train']),
                                                            len(partition['val']), np.max(labels['train']),
                                                            args.loss, args.aggregation_mode, args.ohem_level))

    model_path, log_path = set_path(args)
    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=False,
                                              update_freq=args.batch_size * 16)
    callbacks = [keras.callbacks.ModelCheckpoint(os.path.join(model_path, 'weights-{epoch:02d}-{acc:.3f}.h5'),
                                                 monitor='loss',
                                                 mode='min',
                                                 save_best_only=True),
                 normal_lr, tbcallbacks]

    if args.ohem_level > 1:     # online hard negative mining will be used
        candidate_steps = int(len(partition['train']) // args.batch_size)
        iters_per_epoch = int(len(partition['train']) // (args.ohem_level*args.batch_size))

        ohem_generator = generator.OHEM_generator(network,
                                                  trn_gen,
                                                  candidate_steps,
                                                  args.ohem_level,
                                                  args.batch_size,
                                                  params['dim'],
                                                  params['n_classes']
                                                  )

        A = ohem_generator.next()   # for some reason, I need to warm up the generator

        network.fit_generator(generator.OHEM_generator(network, trn_gen, iters_per_epoch,
                                                       args.ohem_level, args.batch_size,
                                                       params['dim'], params['n_classes']),
                              steps_per_epoch=iters_per_epoch,
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1,
                              validation_data=val_gen,
                              validation_steps=int(len(vallist) // args.batch_size))

    else:
        network.fit_generator(trn_gen,
                              steps_per_epoch=int(len(partition['train'])//args.batch_size),
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1,
                              validation_data=val_gen,
                              validation_steps=int(len(vallist) // args.batch_size))
示例#7
0
def main():

    # gpu configuration
    toolkits.initialize_GPU(args)

    import model
    import generator

    # ==================================
    #       Get Train/Val.
    # ==================================
    trnlist, trnlb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/voxlb2_train.txt')
    vallist, vallb = toolkits.get_voxceleb2_datalist(
        args, path='../meta/voxlb2_val.txt')

    # construct the data generator.
    params = {
        'dim': (257, 250, 1),
        'mp_pooler': toolkits.set_mp(processes=args.multiprocess),
        'nfft': 512,
        'spec_len': 250,
        'win_length': 400,
        'hop_length': 160,
        'n_classes': 5994,
        'sampling_rate': 16000,
        'batch_size': args.batch_size,
        'shuffle': True,
        'normalize': True,
    }

    # Datasets
    #The Flatten layer is a utility layer that flattens an input of shape n * c * h * w to a simple vector output of shape n * (c*h*w)
    partition = {'train': trnlist.flatten(), 'val': vallist.flatten()}
    # print("partition is: ",partition)
    labels = {'train': trnlb.flatten(), 'val': vallb.flatten()}

    # Generators
    #make data
    trn_gen = generator.DataGenerator(partition['train'], labels['train'],
                                      **params)
    # create model depend on args
    network = model.vggvox_resnet2d_icassp(input_dim=params['dim'],
                                           num_class=params['n_classes'],
                                           mode='train',
                                           args=args)

    # ==> load pre-trained model ???
    mgpu = len(keras.backend.tensorflow_backend._get_available_gpus())
    if args.resume:
        if os.path.isfile(args.resume):
            if mgpu == 1: network.load_weights(os.path.join(args.resume))
            else:
                network.layers[mgpu + 1].load_weights(os.path.join(
                    args.resume))
            print('==> successfully loading model {}.'.format(args.resume))
        else:
            print("==> no checkpoint found at '{}'".format(args.resume))

    print(network.summary())
    print('==> gpu {} is, training {} images, classes: 0-{} '
          'loss: {}, aggregation: {}, ohemlevel: {}'.format(
              args.gpu, len(partition['train']), np.max(labels['train']),
              args.loss, args.aggregation_mode, args.ohem_level))

    model_path, log_path = set_path(args)
    normal_lr = keras.callbacks.LearningRateScheduler(step_decay)
    # tbcallbacks = keras.callbacks.TensorBoard(log_dir=log_path, histogram_freq=0, write_graph=True, write_images=False,
    #   update_freq=args.batch_size * 16)
    '''
    Keras callbacks return information from a training algorithm while training is taking place. ... 
    A callback is a set of functions to be applied at given stages of the training procedure. 
    You can use callbacks to get a view on internal states and statistics of the model during trainin
    '''
    # callbacks = [keras.callbacks.ModelCheckpoint(os.path.join(model_path, 'weights-{epoch:02d}-{acc:.3f}.h5'),
    #                                              monitor='loss',
    #                                              mode='min',
    #                                              save_best_only=True),
    #              normal_lr, tbcallbacks]

    if args.ohem_level > 1:  # online hard negative mining will be used

        candidate_steps = int(len(partition['train']) // args.batch_size)
        iters_per_epoch = int(
            len(partition['train']) // (args.ohem_level * args.batch_size))

        ohem_generator = generator.OHEM_generator(
            network, trn_gen, candidate_steps, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes'])

        A = ohem_generator.next(
        )  # for some reason, I need to warm up the generator

        network.fit_generator(generator.OHEM_generator(
            network, trn_gen, iters_per_epoch, args.ohem_level,
            args.batch_size, params['dim'], params['n_classes']),
                              steps_per_epoch=iters_per_epoch,
                              epochs=args.epochs,
                              max_queue_size=10,
                              callbacks=callbacks,
                              use_multiprocessing=False,
                              workers=1,
                              verbose=1)

    else:
        print("steps_per_epoch=", int(len(partition['train'])), "   ",
              args.batch_size)
        print(trn_gen)
        print(network)
        print("epochs=", args.epochs, "--------")
        print("***************", args.batch_size)
        network.fit_generator(
            trn_gen,
            steps_per_epoch=2,  #int(len(partition['train'])//args.batch_size),
            epochs=args.epochs,
            max_queue_size=2,
            #   callbacks=callbacks,
            use_multiprocessing=True,
            workers=1,
            verbose=1)  #should change to one?
        print("end!")