def setup_loader(ap, r, is_val=False, verbose=False, dataset=None): if is_val and not c.run_eval: loader = None else: if dataset is None: dataset = MyDataset( r, c.text_cleaner, compute_linear_spec=c.model.lower() == "tacotron", meta_data=meta_data_eval if is_val else meta_data_train, ap=ap, tp=c.characters if "characters" in c.keys() else None, add_blank=c["add_blank"] if "add_blank" in c.keys() else False, batch_group_size=0 if is_val else c.batch_group_size * c.batch_size, min_seq_len=c.min_seq_len, max_seq_len=c.max_seq_len, phoneme_cache_path=c.phoneme_cache_path, use_phonemes=c.use_phonemes, phoneme_language=c.phoneme_language, enable_eos_bos=c.enable_eos_bos_chars, verbose=verbose, speaker_mapping=(speaker_mapping if (c.use_speaker_embedding and c.use_external_speaker_embedding_file) else None), ) if c.use_phonemes and c.compute_input_seq_cache: # precompute phonemes to have a better estimate of sequence lengths. dataset.compute_input_seq(c.num_loader_workers) dataset.sort_items() sampler = DistributedSampler(dataset) if num_gpus > 1 else None loader = DataLoader( dataset, batch_size=c.eval_batch_size if is_val else c.batch_size, shuffle=False, collate_fn=dataset.collate_fn, drop_last=False, sampler=sampler, num_workers=c.num_val_loader_workers if is_val else c.num_loader_workers, pin_memory=False, ) return loader
def setup_loader(ap, r, is_val=False, verbose=False): if is_val and not config.run_eval: loader = None else: dataset = MyDataset( r, config.text_cleaner, compute_linear_spec=False, meta_data=meta_data_eval if is_val else meta_data_train, ap=ap, tp=config.characters, add_blank=config["add_blank"], batch_group_size=0 if is_val else config.batch_group_size * config.batch_size, min_seq_len=config.min_seq_len, max_seq_len=config.max_seq_len, phoneme_cache_path=config.phoneme_cache_path, use_phonemes=config.use_phonemes, phoneme_language=config.phoneme_language, enable_eos_bos=config.enable_eos_bos_chars, use_noise_augment=not is_val, verbose=verbose, speaker_mapping=speaker_mapping if config.use_speaker_embedding and config.use_external_speaker_embedding_file else None, ) if config.use_phonemes and config.compute_input_seq_cache: # precompute phonemes to have a better estimate of sequence lengths. dataset.compute_input_seq(config.num_loader_workers) dataset.sort_items() sampler = DistributedSampler(dataset) if num_gpus > 1 else None loader = DataLoader( dataset, batch_size=config.eval_batch_size if is_val else config.batch_size, shuffle=False, collate_fn=dataset.collate_fn, drop_last=False, sampler=sampler, num_workers=config.num_val_loader_workers if is_val else config.num_loader_workers, pin_memory=False, ) return loader
def setup_loader(ap, r, verbose=False): dataset = MyDataset( r, c.text_cleaner, compute_linear_spec=False, meta_data=meta_data, ap=ap, tp=c.characters if "characters" in c.keys() else None, add_blank=c["add_blank"] if "add_blank" in c.keys() else False, batch_group_size=0, min_seq_len=c.min_seq_len, max_seq_len=c.max_seq_len, phoneme_cache_path=c.phoneme_cache_path, use_phonemes=c.use_phonemes, phoneme_language=c.phoneme_language, enable_eos_bos=c.enable_eos_bos_chars, use_noise_augment=False, verbose=verbose, speaker_mapping=speaker_mapping if c.use_speaker_embedding and c.use_external_speaker_embedding_file else None, ) if c.use_phonemes and c.compute_input_seq_cache: # precompute phonemes to have a better estimate of sequence lengths. dataset.compute_input_seq(c.num_loader_workers) dataset.sort_items() loader = DataLoader( dataset, batch_size=c.batch_size, shuffle=False, collate_fn=dataset.collate_fn, drop_last=False, sampler=None, num_workers=c.num_loader_workers, pin_memory=False, ) return loader
preprocessor = getattr(preprocessor, args.dataset) meta_data = preprocessor(args.data_path, args.dataset_metafile) dataset = MyDataset( model.decoder.r, C.text_cleaner, compute_linear_spec=False, ap=ap, meta_data=meta_data, tp=C.characters if 'characters' in C.keys() else None, add_blank=C['add_blank'] if 'add_blank' in C.keys() else False, use_phonemes=C.use_phonemes, phoneme_cache_path=C.phoneme_cache_path, phoneme_language=C.phoneme_language, enable_eos_bos=C.enable_eos_bos_chars) dataset.sort_items() loader = DataLoader(dataset, batch_size=args.batch_size, num_workers=4, collate_fn=dataset.collate_fn, shuffle=False, drop_last=False) # compute attentions file_paths = [] with torch.no_grad(): for data in tqdm(loader): # setup input data text_input = data[0] text_lengths = data[1] linear_input = data[3]