Пример #1
0
def eval_artinv_model(task_cfg, testset):
    # PRETRAINED MODEL #
    task_dirs = get_task_dirs(cfg=task_cfg)
    best_model = load_model(task_dirs['model'],
                            custom_objects={'rmse_avg': rmse_avg})

    # EVALUATION #
    evaluation = best_model.evaluate(x=testset.feats,
                                     y=testset.labels,
                                     batch_size=len(testset))
    print('Test Loss: {}\nTest average RMSE: {}'.format(
        evaluation[0], evaluation[1]))
    pred = best_model.predict(x=testset.feats)
    if task_cfg['network_type'] == 'LSTM':
        testset.labels = testset.labels.reshape((-1, 12))
        pred = pred.reshape((-1, 12))
    p = []
    scaler = MinMaxScaler()
    lab = scaler.fit_transform(testset.labels)
    pr = scaler.fit_transform(pred)
    for i in range(pr.shape[1]):
        p.append(pearsonr(pr[:, i], lab[:, i])[0])
    print('Average Pearson: ' + str(np.array(p).mean()))

    return evaluation[1], np.array(p).mean()
def save_history(cfg, history):
    dirs = get_task_dirs(cfg)
    log_dir = dirs['logs']
    os.makedirs(log_dir, exist_ok=True)
    hist_string = 'history_' + dirs['model_string'] + '.pkl'
    hist_file = os.path.join(log_dir, hist_string)
    with open(hist_file, 'wb') as f:
        pickle.dump(history, f)
def prepare_reconstructed_articulatory(cfg, dset, augmentation_cfg):
    # LOAD SPEECH DATASET
    cfg['experiment_name'] = 'asr_per'
    dataset = load_kaldi_dataset(cfg, dset, smooth_first=True, standardize=True)
    cfg['experiment_name'] = 'articulatory_asr'
    task_dirs = get_task_dirs(cfg=augmentation_cfg)

    # LOAD BEST MODEL AND PREDICT ARTICULATORY
    print('Predicting articulatory...')
    model = load_model(task_dirs['model'], custom_objects={'rmse_avg': rmse_avg})
    if augmentation_cfg['network_type'] == 'DNN':
        pred = model.predict(x=dataset.feats)
    elif augmentation_cfg['network_type'] == 'LSTM':
        pred = lstm_regression_results(cfg=augmentation_cfg, feats=dataset.feats, model=model)
    elif augmentation_cfg['network_type'] == 'CNN':
        feats = prepare_cnn_input(cfg=augmentation_cfg, feats=dataset.feats)
        pred = model.predict(x=feats)
    else:
        raise NotImplementedError

    # DENORMALIZE RECONSTRUCTED ARTICULATORY
    save_dir = os.path.join(WORK_DIR, augmentation_cfg['experiment_name'], augmentation_cfg['dataset'], 'data_cache')
    lab_scaler_name = os.path.join(save_dir, get_kaldi_cache_name(augmentation_cfg, dset=None).split('.')[
        0] + '_label_scaler.pkl')
    with open(lab_scaler_name, 'rb') as f:
        lab_scaler = pickle.load(f)
    rec = lab_scaler.inverse_transform(pred)

    # SAVE RECONSTRUCTED ARTICULATORY TO PICKLE BY UTTERANCE
    print('Saving articulatory to kaldi_format...')
    art_path = os.path.join(WORK_DIR, cfg['experiment_name'], cfg['dataset'], 'data_cache', 'articulatory')
    save_file_path = os.path.join(art_path,
                                  dset + '_' + augmentation_cfg['network_type'] + '_reconstructed_articulatory.ark')
    if not os.path.exists(art_path):
        os.makedirs(art_path, exist_ok=True)
    save_file = kaldi_io.open_or_fd(save_file_path, 'wb')
    start_index = 0
    dataset.end_indexes[-1] += 1
    for i, name in enumerate(dataset.uttids):
        out = rec[start_index:dataset.end_indexes[i]]
        start_index = dataset.end_indexes[i]
        kaldi_io.write_mat(save_file, out, dataset.uttids[i])
def load_augment_from_pickle(speech_cfg, augmentation_cfg, dset, standardize=False):
    dataset = load_kaldi_dataset(cfg=speech_cfg, dset=dset, standardize=standardize)
    task_dirs = get_task_dirs(cfg=augmentation_cfg)
    best_model = load_model(task_dirs['model'], custom_objects={'rmse_avg': rmse_avg})

    if augmentation_cfg['network_type'] == 'LSTM':
        timestep = augmentation_cfg['timestep']
        dim = get_bppt_dim(len(dataset.feats), timestep=timestep)
        feats = dataset.feats[:dim, :].reshape((-1, timestep, augmentation_cfg['input_dim']))
        pred = best_model.predict(x=smooth_acoustic(feats))
        pred = pred.reshape((-1, 12))
    elif augmentation_cfg['network_type'] == 'CNN':
        feats = prepare_cnn_input(cfg=augmentation_cfg, feats=dataset.feats)
        pred = best_model.predict(x=smooth_acoustic(feats))
    elif augmentation_cfg['network_type'] == 'DNN':
        feats = dataset.feats
        pred = best_model.predict(x=smooth_acoustic(feats))
    else:
        raise NotImplementedError
    dataset.feats = np.concatenate((dataset.feats, pred), axis=1)

    return dataset
Пример #5
0
def train_model(task_cfg, trainset, validset):
    # MODEL #
    print('Creating model with {} input dimension and {} output dimension.'.
          format(task_cfg['input_dim'], task_cfg['output_dim']))
    model = get_model(cfg=task_cfg)

    # CALLBACKS #
    task_dirs = get_task_dirs(cfg=task_cfg)
    early_stop = EarlyStopping(monitor='val_loss', patience=5, verbose=1)
    checkpointer = ModelCheckpoint(filepath=task_dirs['model'],
                                   monitor='val_loss',
                                   verbose=1,
                                   save_best_only=True)
    _callbacks = [early_stop, checkpointer]

    # TRAINING #
    model.fit(x=trainset.feats,
              y=trainset.labels,
              validation_data=(validset.feats, validset.labels),
              batch_size=task_cfg['batch_size'],
              epochs=task_cfg['epochs'],
              verbose=2,
              callbacks=_callbacks)