def generate( model_dir: Path, model_iteration: Optional[int], model_config: Optional[Path], output_dir: Path, use_gpu: bool, style_num: int, content_num: int, ): if model_config is None: model_config = model_dir / "config.yaml" output_dir.mkdir(exist_ok=True) save_arguments(output_dir / "arguments.yaml", generate, locals()) config = Config.from_dict(yaml.safe_load(model_config.open())) sampling_length = config.dataset.sampling_length padding_length = config.dataset.padding_length style_transfer_path, mapping_network_path, style_encoder_path = _get_network_paths( model_dir=model_dir, iteration=model_iteration, ) generator = Generator( config=config, style_transfer=style_transfer_path, mapping_network=mapping_network_path, style_encoder=style_encoder_path, use_gpu=use_gpu, ) dataset = create_dataset(config.dataset)["eval"] batch = concat_examples( [dataset[i] for i in range(max(style_num, content_num))]) for i_x, x in tqdm(enumerate(batch["x"][:content_num].split(1)), desc="generate"): x_ref = batch["x"][:style_num, sampling_length:-sampling_length] z = batch["z1"][:style_num] x = x[:, padding_length:-padding_length] x = x.expand(style_num, x.shape[1], x.shape[2]) s = generator.generate_style(x=x_ref, z=None) outputs = generator.generate(x=x, s=s) for i_style, output in enumerate(outputs): numpy.save(Path(output_dir, f"output-ref{i_style}-{i_x}.npy"), output) s = generator.generate_style(x=None, z=z) outputs = generator.generate(x=x, s=s) for i_style, output in enumerate(outputs): numpy.save(Path(output_dir, f"output-latent{i_style}-{i_x}.npy"), output) numpy.save( Path(output_dir, f"input-{i_x}.npy"), x[0][padding_length:-padding_length].numpy(), )
def generate( model_dir: Path, model_iteration: Optional[int], model_config: Optional[Path], output_dir: Path, use_gpu: bool, ): if model_config is None: model_config = model_dir / "config.yaml" output_dir.mkdir(exist_ok=True) save_arguments(output_dir / "arguments.yaml", generate, locals()) config = Config.from_dict(yaml.safe_load(model_config.open())) model_path = _get_predictor_model_path( model_dir=model_dir, iteration=model_iteration, ) generator = Generator( config=config, predictor=model_path, use_gpu=use_gpu, ) dataset = create_dataset(config.dataset)["test"] for data in tqdm(dataset, desc="generate"): target = data["target"] output = generator.generate(data["feature"])
def convert_f0( model_config: Path, input_glob: str, input_f0_statistics: Path, target_f0_statistics: Path, output_dir: Path, ): output_dir.mkdir(exist_ok=True) save_arguments(output_dir / "arguments.yaml", convert_f0, locals()) config = Config.from_dict(yaml.safe_load(model_config.open())) input_stat = numpy.load(input_f0_statistics, allow_pickle=True).item() target_stat = numpy.load(target_f0_statistics, allow_pickle=True).item() paths = list(map(Path, glob(input_glob))) for p in tqdm(paths, desc="convert_f0"): data = SamplingData.load(p) if data.array.shape[1] == (config.network.voiced_feature_size + 1 + config.network.phoneme_feature_size): f0_index = config.network.voiced_feature_size elif data.array.shape[1] == (1 + 1 + 40): f0_index = 1 else: raise ValueError(data.array.shape[1]) data.array[:, f0_index] += target_stat["mean"] - input_stat["mean"] data.save(output_dir / (p.stem + ".npy"))
def generate( model_dir: Path, model_iteration: Optional[int], model_config: Optional[Path], time_second: float, num_test: int, output_dir: Path, use_gpu: bool, ): if model_config is None: model_config = model_dir / "config.yaml" output_dir.mkdir(exist_ok=True) save_arguments(output_dir / "arguments.yaml", generate, locals()) config = Config.from_dict(yaml.safe_load(model_config.open())) model_path = _get_predictor_model_path( model_dir=model_dir, iteration=model_iteration, ) generator = Generator( config=config, predictor=model_path, use_gpu=use_gpu, ) sampling_rate = 24000 / 512 config.dataset.sampling_length = int(sampling_rate * time_second) batch_size = config.train.batch_size dataset = create_dataset(config.dataset)["test"] if isinstance(dataset, ConcatDataset): dataset = dataset.datasets[0] if isinstance(dataset.dataset, FeatureDataset): f0_paths = [inp.f0_path for inp in dataset.dataset.inputs[:num_test]] elif isinstance(dataset.dataset, SpeakerFeatureDataset): f0_paths = [ inp.f0_path for inp in dataset.dataset.dataset.inputs[:num_test] ] else: raise ValueError(dataset) for data, f0_path in zip( chunked(tqdm(dataset, desc="generate"), batch_size), chunked(f0_paths, batch_size), ): data = concat_examples(data) specs = generator.generate( f0=data["f0"], phoneme=data["phoneme"], speaker_id=data["speaker_id"] if "speaker_id" in data else None, ) for spec, p in zip(specs, f0_path): numpy.save(output_dir.joinpath(p.stem + ".npy"), spec)
def generate( model_dir: Path, model_iteration: Optional[int], model_config: Optional[Path], output_dir: Path, num_test: int, time_second: float, use_gpu: bool, ): if model_config is None: model_config = model_dir / 'config.yaml' output_dir.mkdir(exist_ok=True) save_arguments(output_dir / 'arguments.yaml', generate, locals()) config = Config.from_dict(yaml.safe_load(model_config.open())) model_path = _get_predictor_model_path( model_dir=model_dir, iteration=model_iteration, ) generator = Generator( config=config, predictor=model_path, use_gpu=use_gpu, ) batch_size = config.train.batchsize config.dataset.sampling_length = int(config.dataset.sampling_rate * time_second) dataset = create_dataset(config.dataset)['test'] if isinstance(dataset, SpeakerWavesDataset): local_paths = [input.path_local for input in dataset.wave_dataset.inputs][:num_test] elif isinstance(dataset, WavesDataset): local_paths = [input.path_local for input in dataset.inputs][:num_test] else: raise Exception() for data, local_path in tqdm(zip(chunked(dataset, batch_size), chunked(local_paths, batch_size)), desc='generate'): data = convert.concat_examples(data) output = generator.generate( local=data['local'], source=data['source'], speaker_id=data['speaker_id'] if 'speaker_id' in data else None, local_padding_length=config.dataset.local_padding_length, ) for wave, p in zip(output, local_path): wave.save(output_dir / (p.stem + '.wav'))
def generate( model_dir: Path, model_iteration: Optional[int], model_config: Optional[Path], output_dir: Path, data_par_speaker: int, use_gpu: bool, ): if model_config is None: model_config = model_dir / 'config.yaml' output_dir.mkdir(exist_ok=True) save_arguments(output_dir / 'arguments.yaml', generate, locals()) config = Config.from_dict(yaml.safe_load(model_config.open())) model_path = _get_predictor_model_path( model_dir=model_dir, iteration=model_iteration, ) generator = Generator( config=config, predictor=model_path, use_gpu=use_gpu, ) dataset = create_dataset(config.dataset)['train'] features_dict = defaultdict(list) for data in tqdm(dataset, desc='generate'): speaker_num = data['target'] if len(features_dict[speaker_num]) >= data_par_speaker: continue feature = generator.generate(data['input']) features_dict[speaker_num].append(feature) for speaker_num, features in features_dict.items(): for i, feature in enumerate(features): path = output_dir / f'{speaker_num}-{i}.npy' numpy.save(path, feature)
def generate_all( model_dir: Path, model_iteration: Optional[int], model_config: Optional[Path], output_dir: Path, transpose: bool, use_gpu: bool, ): if model_config is None: model_config = model_dir / "config.yaml" output_dir.mkdir(exist_ok=True) save_arguments(output_dir / "arguments.yaml", generate_all, locals()) config = Config.from_dict(yaml.safe_load(model_config.open())) model_path = _get_predictor_model_path( model_dir=model_dir, iteration=model_iteration, ) generator = Generator( config=config, predictor=model_path, use_gpu=use_gpu, ) config.dataset.test_num = 0 dataset = create_dataset(config.dataset)["train"] if isinstance(dataset.dataset, FeatureDataset): inputs = dataset.dataset.inputs speaker_ids = [None] * len(inputs) elif isinstance(dataset.dataset, SpeakerFeatureDataset): inputs = dataset.dataset.dataset.inputs speaker_ids = dataset.dataset.speaker_ids else: raise ValueError(dataset) for input, speaker_id in tqdm(zip(inputs, speaker_ids), total=len(inputs), desc="generate_all"): input_data = input.generate() data = FeatureDataset.extract_input( sampling_length=len(input_data.spec.array), f0_data=input_data.f0, phoneme_data=input_data.phoneme, spec_data=input_data.spec, silence_data=input_data.silence, phoneme_list_data=input_data.phoneme_list, f0_process_mode=F0ProcessMode(config.dataset.f0_process_mode), time_mask_max_second=0, ) spec = generator.generate( f0=data["f0"][numpy.newaxis], phoneme=data["phoneme"][numpy.newaxis], speaker_id=(numpy.array(speaker_id)[numpy.newaxis] if speaker_id is not None else None), )[0] if transpose: spec = spec.T name = input.f0_path.stem numpy.save(output_dir.joinpath(name + ".npy"), spec)
def generate( model_dir: Path, model_iteration: Optional[int], model_config: Optional[Path], output_dir: Path, to_voiced_scaler: bool, to_f0_scaler: bool, to_phoneme_onehot: bool, batch_size: Optional[int], num_test: int, target_glob: Optional[str], use_gpu: bool, ): if model_config is None: model_config = model_dir / "config.yaml" output_dir.mkdir(exist_ok=True) save_arguments(output_dir / "arguments.yaml", generate, locals()) config = Config.from_dict(yaml.safe_load(model_config.open())) generator = Generator( config=config, predictor=_get_model_path( model_dir=model_dir, iteration=model_iteration, prefix="predictor_", ), voiced_network=( None if not to_voiced_scaler else _get_model_path( model_dir=model_dir, iteration=model_iteration, prefix="voiced_network_", ) ), f0_network=( None if not to_f0_scaler else _get_model_path( model_dir=model_dir, iteration=model_iteration, prefix="f0_network_", ) ), phoneme_network=( None if not to_phoneme_onehot else _get_model_path( model_dir=model_dir, iteration=model_iteration, prefix="phoneme_network_", ) ), use_gpu=use_gpu, ) dataset = create_dataset(config.dataset)["test"] scale = numpy.prod(config.network.scale_list) if batch_size is None: batch_size = config.train.batch_size if isinstance(dataset, SpeakerWavesDataset): wave_paths = [data.path_wave for data in dataset.wave_dataset.inputs[:num_test]] elif isinstance(dataset, WavesDataset): wave_paths = [data.path_wave for data in dataset.inputs[:num_test]] else: raise Exception() if target_glob is not None: wave_paths += list(map(Path, glob(target_glob))) for wps in tqdm(chunked(wave_paths, batch_size), desc="generate"): waves = [Wave.load(p) for p in wps] arrays = [w.wave for w in waves] pad_lengths = [int(numpy.ceil(len(w) / scale) * scale) for w in arrays] arrays = [numpy.r_[w, numpy.zeros(max(pad_lengths) - len(w))] for w in arrays] tensors = [torch.from_numpy(array.astype(numpy.float32)) for array in arrays] output = generator.generate( wave=concat_examples(tensors), to_voiced_scaler=to_voiced_scaler, to_f0_scaler=to_f0_scaler, to_phoneme_onehot=to_phoneme_onehot, ) for feature, p, w, l in zip(output, wps, waves, pad_lengths): feature = feature.T[: l // scale] data = SamplingData(array=feature, rate=w.sampling_rate // scale) data.save(output_dir / (p.stem + ".npy"))
def generate( model_dir: Path, model_iteration: Optional[int], model_config: Optional[Path], time_second: float, num_test: int, num_train: int, output_dir: Path, use_gpu: bool, ): if model_config is None: model_config = model_dir / "config.yaml" output_dir.mkdir(exist_ok=True) save_arguments(output_dir / "arguments.yaml", generate, locals()) config = Config.from_dict(yaml.safe_load(model_config.open())) model_path = _get_predictor_model_path( model_dir=model_dir, iteration=model_iteration, ) generator = Generator( config=config, predictor=model_path, use_gpu=use_gpu, ) sampling_rate = 200 config.dataset.sampling_length = int(sampling_rate * time_second) batch_size = config.train.batch_size def generate_wrapper(dataset_type: str, num_data: int): dataset = create_dataset(config.dataset)[dataset_type] if isinstance(dataset, ConcatDataset): dataset = dataset.datasets[0] if isinstance(dataset.dataset, FeatureDataset): phoneme_paths = [ inp.phoneme_path for inp in dataset.dataset.inputs[:num_data] ] elif isinstance(dataset.dataset, SpeakerFeatureDataset): phoneme_paths = [ inp.phoneme_path for inp in dataset.dataset.dataset.inputs[:num_data] ] else: raise ValueError(dataset) for data, phoneme_path in zip( chunked(tqdm(dataset, desc="generate"), batch_size), chunked(phoneme_paths, batch_size), ): data = concat_examples(data) f0s = generator.generate( phoneme=data["phoneme"], start_accent=data["start_accent"] if "start_accent" in data else None, end_accent=data["end_accent"] if "end_accent" in data else None, speaker_id=data["speaker_id"] if "speaker_id" in data else None, ) for f0, p in zip(f0s, phoneme_path): numpy.save(output_dir.joinpath(p.stem + ".npy"), f0) if num_test > 0: generate_wrapper("test", num_test) if num_train > 0: generate_wrapper("train", num_train)
def generate( model_dir: Path, model_iteration: Optional[int], model_config: Optional[Path], output_dir: Path, batch_size: int, num_test: int, from_train_data: bool, time_second: float, val_local_glob: str, val_speaker_id: Optional[int], noise_schedule_start: float, noise_schedule_stop: float, noise_schedule_num: int, use_gpu: bool, ): output_dir.mkdir(exist_ok=True) save_arguments(output_dir / "arguments.yaml", generate, locals()) if model_config is None: model_config = model_dir / "config.yaml" config = Config.from_dict(yaml.safe_load(model_config.open())) model_path = _get_predictor_model_path( model_dir=model_dir, iteration=model_iteration, ) print("model path: ", model_path) generator = Generator( config=config, noise_schedule_config=NoiseScheduleModelConfig( start=noise_schedule_start, stop=noise_schedule_stop, num=noise_schedule_num), predictor=model_path, sampling_rate=config.dataset.sampling_rate, use_gpu=use_gpu, ) local_padding_second = 1 local_padding_length = config.dataset.sampling_rate * local_padding_second config.dataset.sampling_length = int(config.dataset.sampling_rate * time_second) config.dataset.local_padding_length = local_padding_length dataset = create_dataset( config.dataset)["test" if not from_train_data else "train"] if isinstance(dataset, SpeakerWavesDataset): wave_paths = [ input.path_wave for input in dataset.wave_dataset.inputs[:num_test] ] elif isinstance(dataset, WavesDataset): wave_paths = [input.path_wave for input in dataset.inputs[:num_test]] else: raise Exception() for data, wave_path in tqdm( zip(chunked(dataset, batch_size), chunked(wave_paths, batch_size)), desc="generate", ): data = concat_examples(data) output = generator.generate( local=data["local"], local_padding_length=local_padding_length, speaker_id=data["speaker_id"] if "speaker_id" in data else None, ) for wave, p in zip(output, wave_path): wave.save(output_dir / (p.stem + ".wav")) # validation if val_local_glob is not None: local_paths = sorted([Path(p) for p in glob(val_local_glob)]) speaker_ids = [val_speaker_id] * len(local_paths) for local_path, speaker_id in zip(chunked(local_paths, batch_size), chunked(speaker_ids, batch_size)): datas = [SamplingData.load(p) for p in local_path] size = int( (time_second + local_padding_second * 2) * datas[0].rate) local = numpy.stack([ (data.array[:size].T if len(data.array) >= size else numpy.pad( data.array, ((0, size - len(data.array)), (0, 0)), mode="edge", ).T) for data in datas ]) output = generator.generate( local=local, local_padding_length=local_padding_length, speaker_id=(numpy.stack(speaker_id) if speaker_id[0] is not None else None), ) for wave, p in zip(output, local_path): wave.save(output_dir / (p.stem + ".wav"))
def generate( model_dir: Path, model_iteration: int, model_config: Path, time_length: float, input_batchsize: Optional[int], num_test: int, sampling_policy: str, disable_fast_inference: bool, val_local_glob: str, val_speaker_num: Optional[int], output_dir: Path, ): output_dir.mkdir(exist_ok=True, parents=True) save_arguments(output_dir / "arguments.yaml", generate, locals()) if model_config is None: model_config = model_dir / "config.yaml" config = Config.from_dict(yaml.safe_load(model_config.open())) model_path = _get_predictor_model_path( model_dir=model_dir, iteration=model_iteration, ) print("model path: ", model_path) batchsize = (input_batchsize if input_batchsize is not None else config.train.batchsize) generator = Generator( config=config, predictor=model_path, use_gpu=True, max_batch_size=batchsize, use_fast_inference=not disable_fast_inference, ) dataset = create_dataset(config.dataset)["test"] if isinstance(dataset, TensorWrapperDataset): dataset = dataset.dataset if isinstance(dataset, WavesDataset): inputs = dataset.inputs local_paths = [input.path_local for input in inputs[:num_test]] speaker_nums = [None] * num_test elif isinstance(dataset, SpeakerWavesDataset): inputs = dataset.wave_dataset.inputs local_paths = [input.path_local for input in inputs[:num_test]] speaker_nums = dataset.speaker_nums[:num_test] else: raise ValueError(dataset) # random for local_path, speaker_num in zip(chunked(local_paths, batchsize), chunked(speaker_nums, batchsize)): process( generator=generator, local_paths=local_path, local_sampling_rate=config.dataset.local_sampling_rate, time_length=time_length, speaker_nums=speaker_num if speaker_num[0] is not None else None, sampling_policy=SamplingPolicy(sampling_policy), output_dir=output_dir, ) # validation if val_local_glob is not None: local_paths = sorted([Path(p) for p in glob.glob(val_local_glob)]) speaker_nums = [val_speaker_num] * len(local_paths) for local_path, speaker_num in zip(chunked(local_paths, batchsize), chunked(speaker_nums, batchsize)): process( generator=generator, local_paths=local_path, local_sampling_rate=config.dataset.local_sampling_rate, time_length=time_length, speaker_nums=speaker_num if speaker_num[0] is not None else None, sampling_policy=SamplingPolicy(sampling_policy), output_dir=output_dir, )