Пример #1
0
def setup_loader(ap, r, is_val=False, verbose=False, speaker_mapping=None):
    if is_val and not c.run_eval:
        loader = None
    else:
        dataset = MyDataset(
            r,
            c.text_cleaner,
            compute_linear_spec=c.model.lower() == 'tacotron',
            meta_data=meta_data_eval if is_val else meta_data_train,
            ap=ap,
            tp=c.characters if 'characters' in c.keys() else None,
            batch_group_size=0 if is_val else c.batch_group_size *
            c.batch_size,
            min_seq_len=c.min_seq_len,
            max_seq_len=c.max_seq_len,
            phoneme_cache_path=c.phoneme_cache_path,
            use_phonemes=c.use_phonemes,
            phoneme_language=c.phoneme_language,
            enable_eos_bos=c.enable_eos_bos_chars,
            verbose=verbose,
            speaker_mapping=speaker_mapping if c.use_speaker_embedding
            and c.use_external_speaker_embedding_file else None)
        sampler = DistributedSampler(dataset) if num_gpus > 1 else None
        loader = DataLoader(
            dataset,
            batch_size=c.eval_batch_size if is_val else c.batch_size,
            shuffle=False,
            collate_fn=dataset.collate_fn,
            drop_last=False,
            sampler=sampler,
            num_workers=c.num_val_loader_workers
            if is_val else c.num_loader_workers,
            pin_memory=False)
    return loader
Пример #2
0
def setup_loader(ap, r, is_val=False, verbose=False, dataset=None):
    if is_val and not c.run_eval:
        loader = None
    else:
        if dataset is None:
            dataset = MyDataset(
                r,
                c.text_cleaner,
                compute_linear_spec=c.model.lower() == "tacotron",
                meta_data=meta_data_eval if is_val else meta_data_train,
                ap=ap,
                tp=c.characters if "characters" in c.keys() else None,
                add_blank=c["add_blank"] if "add_blank" in c.keys() else False,
                batch_group_size=0 if is_val else c.batch_group_size *
                c.batch_size,
                min_seq_len=c.min_seq_len,
                max_seq_len=c.max_seq_len,
                phoneme_cache_path=c.phoneme_cache_path,
                use_phonemes=c.use_phonemes,
                phoneme_language=c.phoneme_language,
                enable_eos_bos=c.enable_eos_bos_chars,
                verbose=verbose,
                speaker_mapping=(speaker_mapping if
                                 (c.use_speaker_embedding
                                  and c.use_external_speaker_embedding_file)
                                 else None),
            )

            if c.use_phonemes and c.compute_input_seq_cache:
                # precompute phonemes to have a better estimate of sequence lengths.
                dataset.compute_input_seq(c.num_loader_workers)
            dataset.sort_items()

        sampler = DistributedSampler(dataset) if num_gpus > 1 else None
        loader = DataLoader(
            dataset,
            batch_size=c.eval_batch_size if is_val else c.batch_size,
            shuffle=False,
            collate_fn=dataset.collate_fn,
            drop_last=False,
            sampler=sampler,
            num_workers=c.num_val_loader_workers
            if is_val else c.num_loader_workers,
            pin_memory=False,
        )
    return loader
Пример #3
0
def setup_loader(ap, r, is_val=False, verbose=False):
    if is_val and not config.run_eval:
        loader = None
    else:
        dataset = MyDataset(
            r,
            config.text_cleaner,
            compute_linear_spec=False,
            meta_data=meta_data_eval if is_val else meta_data_train,
            ap=ap,
            tp=config.characters,
            add_blank=config["add_blank"],
            batch_group_size=0 if is_val else config.batch_group_size *
            config.batch_size,
            min_seq_len=config.min_seq_len,
            max_seq_len=config.max_seq_len,
            phoneme_cache_path=config.phoneme_cache_path,
            use_phonemes=config.use_phonemes,
            phoneme_language=config.phoneme_language,
            enable_eos_bos=config.enable_eos_bos_chars,
            use_noise_augment=not is_val,
            verbose=verbose,
            speaker_mapping=speaker_mapping if config.use_speaker_embedding
            and config.use_external_speaker_embedding_file else None,
        )

        if config.use_phonemes and config.compute_input_seq_cache:
            # precompute phonemes to have a better estimate of sequence lengths.
            dataset.compute_input_seq(config.num_loader_workers)
        dataset.sort_items()

        sampler = DistributedSampler(dataset) if num_gpus > 1 else None
        loader = DataLoader(
            dataset,
            batch_size=config.eval_batch_size if is_val else config.batch_size,
            shuffle=False,
            collate_fn=dataset.collate_fn,
            drop_last=False,
            sampler=sampler,
            num_workers=config.num_val_loader_workers
            if is_val else config.num_loader_workers,
            pin_memory=False,
        )
    return loader
def setup_loader(ap, r, verbose=False):
    dataset = MyDataset(
        r,
        c.text_cleaner,
        compute_linear_spec=False,
        meta_data=meta_data,
        ap=ap,
        tp=c.characters if "characters" in c.keys() else None,
        add_blank=c["add_blank"] if "add_blank" in c.keys() else False,
        batch_group_size=0,
        min_seq_len=c.min_seq_len,
        max_seq_len=c.max_seq_len,
        phoneme_cache_path=c.phoneme_cache_path,
        use_phonemes=c.use_phonemes,
        phoneme_language=c.phoneme_language,
        enable_eos_bos=c.enable_eos_bos_chars,
        use_noise_augment=False,
        verbose=verbose,
        speaker_mapping=speaker_mapping if c.use_speaker_embedding and c.use_external_speaker_embedding_file else None,
    )

    if c.use_phonemes and c.compute_input_seq_cache:
        # precompute phonemes to have a better estimate of sequence lengths.
        dataset.compute_input_seq(c.num_loader_workers)
    dataset.sort_items()

    loader = DataLoader(
        dataset,
        batch_size=c.batch_size,
        shuffle=False,
        collate_fn=dataset.collate_fn,
        drop_last=False,
        sampler=None,
        num_workers=c.num_loader_workers,
        pin_memory=False,
    )
    return loader
Пример #5
0
    num_chars = len(phonemes) if C.use_phonemes else len(symbols)
    # TODO: handle multi-speaker
    model = setup_model(num_chars, num_speakers=0, c=C)
    model, _ = load_checkpoint(model, args.model_path, None, args.use_cuda)
    model.eval()

    # data loader
    preprocessor = importlib.import_module('TTS.tts.datasets.preprocess')
    preprocessor = getattr(preprocessor, args.dataset)
    meta_data = preprocessor(args.data_path, args.dataset_metafile)
    dataset = MyDataset(
        model.decoder.r,
        C.text_cleaner,
        compute_linear_spec=False,
        ap=ap,
        meta_data=meta_data,
        tp=C.characters if 'characters' in C.keys() else None,
        add_blank=C['add_blank'] if 'add_blank' in C.keys() else False,
        use_phonemes=C.use_phonemes,
        phoneme_cache_path=C.phoneme_cache_path,
        phoneme_language=C.phoneme_language,
        enable_eos_bos=C.enable_eos_bos_chars)

    dataset.sort_items()
    loader = DataLoader(dataset,
                        batch_size=args.batch_size,
                        num_workers=4,
                        collate_fn=dataset.collate_fn,
                        shuffle=False,
                        drop_last=False)

    # compute attentions