Пример #1
0
train_has_files = len([
    batch_file for batch_file in train_predictions_dir.iterdir()
    if batch_file.suffix == '.npy'
])
model = config_manager.load_model()
running_predictions = args.recompute_pred or (val_has_files
                                              == 0) or (train_has_files == 0)
if running_predictions:
    if args.store_predictions:
        print(
            '\nWARNING: storing predictions can take a lot of disk space (~40GB)\n'
        )
    train_meta = config_manager.train_datadir / 'train_metafile.txt'
    test_meta = config_manager.train_datadir / 'test_metafile.txt'
    train_samples, _ = load_files(
        metafile=str(train_meta),
        meldir=str(meldir),
        num_samples=config['n_samples'])  # (phonemes, mel)
    val_samples, _ = load_files(
        metafile=str(test_meta),
        meldir=str(meldir),
        num_samples=config['n_samples'])  # (phonemes, text, mel)

    # get model, prepare data for model, create datasets

    data_prep = DataPrepper(config=config,
                            tokenizer=model.text_pipeline.tokenizer)
    script_batch_size = 5 * config['batch_size']  # faster parallel computation
    train_dataset = Dataset(samples=train_samples,
                            preprocessor=data_prep,
                            batch_size=script_batch_size,
                            shuffle=False,
                    action='store_true',
                    help="deletes weights under this config's folder.")
parser.add_argument('--session_name', dest='session_name', default=None)
args = parser.parse_args()
config_manager = ConfigManager(config_path=args.config,
                               model_kind='autoregressive',
                               session_name=args.session_name)
config = config_manager.config
config_manager.create_remove_dirs(clear_dir=args.clear_dir,
                                  clear_logs=args.clear_logs,
                                  clear_weights=args.clear_weights)
config_manager.dump_config()
config_manager.print_config()

train_samples, _ = load_files(
    metafile=str(config_manager.train_datadir / 'train_metafile.txt'),
    meldir=str(config_manager.train_datadir / 'mels'),
    num_samples=config['n_samples'])  # (phonemes, mel)
val_samples, _ = load_files(
    metafile=str(config_manager.train_datadir / 'test_metafile.txt'),
    meldir=str(config_manager.train_datadir / 'mels'),
    num_samples=config['n_samples'])  # (phonemes, text, mel)

# get model, prepare data for model, create datasets
model = config_manager.get_model()
config_manager.compile_model(model)
data_prep = DataPrepper(config=config, tokenizer=model.text_pipeline.tokenizer)

test_list = [data_prep(s) for s in val_samples]
train_dataset = Dataset(samples=train_samples,
                        preprocessor=data_prep,
                        batch_size=config['batch_size'],