Пример #1
0
def generate(
    model_dir: Path,
    model_iteration: Optional[int],
    model_config: Optional[Path],
    output_dir: Path,
    use_gpu: bool,
    style_num: int,
    content_num: int,
):
    if model_config is None:
        model_config = model_dir / "config.yaml"

    output_dir.mkdir(exist_ok=True)
    save_arguments(output_dir / "arguments.yaml", generate, locals())

    config = Config.from_dict(yaml.safe_load(model_config.open()))
    sampling_length = config.dataset.sampling_length
    padding_length = config.dataset.padding_length

    style_transfer_path, mapping_network_path, style_encoder_path = _get_network_paths(
        model_dir=model_dir,
        iteration=model_iteration,
    )
    generator = Generator(
        config=config,
        style_transfer=style_transfer_path,
        mapping_network=mapping_network_path,
        style_encoder=style_encoder_path,
        use_gpu=use_gpu,
    )

    dataset = create_dataset(config.dataset)["eval"]

    batch = concat_examples(
        [dataset[i] for i in range(max(style_num, content_num))])

    for i_x, x in tqdm(enumerate(batch["x"][:content_num].split(1)),
                       desc="generate"):
        x_ref = batch["x"][:style_num, sampling_length:-sampling_length]
        z = batch["z1"][:style_num]

        x = x[:, padding_length:-padding_length]
        x = x.expand(style_num, x.shape[1], x.shape[2])

        s = generator.generate_style(x=x_ref, z=None)
        outputs = generator.generate(x=x, s=s)
        for i_style, output in enumerate(outputs):
            numpy.save(Path(output_dir, f"output-ref{i_style}-{i_x}.npy"),
                       output)

        s = generator.generate_style(x=None, z=z)
        outputs = generator.generate(x=x, s=s)
        for i_style, output in enumerate(outputs):
            numpy.save(Path(output_dir, f"output-latent{i_style}-{i_x}.npy"),
                       output)

        numpy.save(
            Path(output_dir, f"input-{i_x}.npy"),
            x[0][padding_length:-padding_length].numpy(),
        )
Пример #2
0
def generate(
    model_dir: Path,
    model_iteration: Optional[int],
    model_config: Optional[Path],
    output_dir: Path,
    use_gpu: bool,
):
    if model_config is None:
        model_config = model_dir / "config.yaml"

    output_dir.mkdir(exist_ok=True)
    save_arguments(output_dir / "arguments.yaml", generate, locals())

    config = Config.from_dict(yaml.safe_load(model_config.open()))

    model_path = _get_predictor_model_path(
        model_dir=model_dir,
        iteration=model_iteration,
    )
    generator = Generator(
        config=config,
        predictor=model_path,
        use_gpu=use_gpu,
    )

    dataset = create_dataset(config.dataset)["test"]
    for data in tqdm(dataset, desc="generate"):
        target = data["target"]
        output = generator.generate(data["feature"])
Пример #3
0
def convert_f0(
    model_config: Path,
    input_glob: str,
    input_f0_statistics: Path,
    target_f0_statistics: Path,
    output_dir: Path,
):
    output_dir.mkdir(exist_ok=True)
    save_arguments(output_dir / "arguments.yaml", convert_f0, locals())

    config = Config.from_dict(yaml.safe_load(model_config.open()))

    input_stat = numpy.load(input_f0_statistics, allow_pickle=True).item()
    target_stat = numpy.load(target_f0_statistics, allow_pickle=True).item()

    paths = list(map(Path, glob(input_glob)))

    for p in tqdm(paths, desc="convert_f0"):
        data = SamplingData.load(p)

        if data.array.shape[1] == (config.network.voiced_feature_size + 1 +
                                   config.network.phoneme_feature_size):
            f0_index = config.network.voiced_feature_size
        elif data.array.shape[1] == (1 + 1 + 40):
            f0_index = 1
        else:
            raise ValueError(data.array.shape[1])

        data.array[:, f0_index] += target_stat["mean"] - input_stat["mean"]
        data.save(output_dir / (p.stem + ".npy"))
Пример #4
0
def generate(
    model_dir: Path,
    model_iteration: Optional[int],
    model_config: Optional[Path],
    time_second: float,
    num_test: int,
    output_dir: Path,
    use_gpu: bool,
):
    if model_config is None:
        model_config = model_dir / "config.yaml"

    output_dir.mkdir(exist_ok=True)
    save_arguments(output_dir / "arguments.yaml", generate, locals())

    config = Config.from_dict(yaml.safe_load(model_config.open()))

    model_path = _get_predictor_model_path(
        model_dir=model_dir,
        iteration=model_iteration,
    )
    generator = Generator(
        config=config,
        predictor=model_path,
        use_gpu=use_gpu,
    )

    sampling_rate = 24000 / 512
    config.dataset.sampling_length = int(sampling_rate * time_second)

    batch_size = config.train.batch_size

    dataset = create_dataset(config.dataset)["test"]
    if isinstance(dataset, ConcatDataset):
        dataset = dataset.datasets[0]

    if isinstance(dataset.dataset, FeatureDataset):
        f0_paths = [inp.f0_path for inp in dataset.dataset.inputs[:num_test]]
    elif isinstance(dataset.dataset, SpeakerFeatureDataset):
        f0_paths = [
            inp.f0_path for inp in dataset.dataset.dataset.inputs[:num_test]
        ]
    else:
        raise ValueError(dataset)

    for data, f0_path in zip(
            chunked(tqdm(dataset, desc="generate"), batch_size),
            chunked(f0_paths, batch_size),
    ):
        data = concat_examples(data)
        specs = generator.generate(
            f0=data["f0"],
            phoneme=data["phoneme"],
            speaker_id=data["speaker_id"] if "speaker_id" in data else None,
        )

        for spec, p in zip(specs, f0_path):
            numpy.save(output_dir.joinpath(p.stem + ".npy"), spec)
Пример #5
0
def generate(
        model_dir: Path,
        model_iteration: Optional[int],
        model_config: Optional[Path],
        output_dir: Path,
        num_test: int,
        time_second: float,
        use_gpu: bool,
):
    if model_config is None:
        model_config = model_dir / 'config.yaml'

    output_dir.mkdir(exist_ok=True)
    save_arguments(output_dir / 'arguments.yaml', generate, locals())

    config = Config.from_dict(yaml.safe_load(model_config.open()))

    model_path = _get_predictor_model_path(
        model_dir=model_dir,
        iteration=model_iteration,
    )
    generator = Generator(
        config=config,
        predictor=model_path,
        use_gpu=use_gpu,
    )

    batch_size = config.train.batchsize

    config.dataset.sampling_length = int(config.dataset.sampling_rate * time_second)
    dataset = create_dataset(config.dataset)['test']

    if isinstance(dataset, SpeakerWavesDataset):
        local_paths = [input.path_local for input in dataset.wave_dataset.inputs][:num_test]
    elif isinstance(dataset, WavesDataset):
        local_paths = [input.path_local for input in dataset.inputs][:num_test]
    else:
        raise Exception()

    for data, local_path in tqdm(zip(chunked(dataset, batch_size), chunked(local_paths, batch_size)), desc='generate'):
        data = convert.concat_examples(data)
        output = generator.generate(
            local=data['local'],
            source=data['source'],
            speaker_id=data['speaker_id'] if 'speaker_id' in data else None,
            local_padding_length=config.dataset.local_padding_length,
        )

        for wave, p in zip(output, local_path):
            wave.save(output_dir / (p.stem + '.wav'))
Пример #6
0
def generate(
    model_dir: Path,
    model_iteration: Optional[int],
    model_config: Optional[Path],
    output_dir: Path,
    data_par_speaker: int,
    use_gpu: bool,
):
    if model_config is None:
        model_config = model_dir / 'config.yaml'

    output_dir.mkdir(exist_ok=True)
    save_arguments(output_dir / 'arguments.yaml', generate, locals())

    config = Config.from_dict(yaml.safe_load(model_config.open()))

    model_path = _get_predictor_model_path(
        model_dir=model_dir,
        iteration=model_iteration,
    )
    generator = Generator(
        config=config,
        predictor=model_path,
        use_gpu=use_gpu,
    )

    dataset = create_dataset(config.dataset)['train']
    features_dict = defaultdict(list)
    for data in tqdm(dataset, desc='generate'):
        speaker_num = data['target']
        if len(features_dict[speaker_num]) >= data_par_speaker:
            continue

        feature = generator.generate(data['input'])
        features_dict[speaker_num].append(feature)

    for speaker_num, features in features_dict.items():
        for i, feature in enumerate(features):
            path = output_dir / f'{speaker_num}-{i}.npy'
            numpy.save(path, feature)
Пример #7
0
def generate_all(
    model_dir: Path,
    model_iteration: Optional[int],
    model_config: Optional[Path],
    output_dir: Path,
    transpose: bool,
    use_gpu: bool,
):
    if model_config is None:
        model_config = model_dir / "config.yaml"

    output_dir.mkdir(exist_ok=True)
    save_arguments(output_dir / "arguments.yaml", generate_all, locals())

    config = Config.from_dict(yaml.safe_load(model_config.open()))

    model_path = _get_predictor_model_path(
        model_dir=model_dir,
        iteration=model_iteration,
    )
    generator = Generator(
        config=config,
        predictor=model_path,
        use_gpu=use_gpu,
    )

    config.dataset.test_num = 0
    dataset = create_dataset(config.dataset)["train"]

    if isinstance(dataset.dataset, FeatureDataset):
        inputs = dataset.dataset.inputs
        speaker_ids = [None] * len(inputs)
    elif isinstance(dataset.dataset, SpeakerFeatureDataset):
        inputs = dataset.dataset.dataset.inputs
        speaker_ids = dataset.dataset.speaker_ids
    else:
        raise ValueError(dataset)

    for input, speaker_id in tqdm(zip(inputs, speaker_ids),
                                  total=len(inputs),
                                  desc="generate_all"):
        input_data = input.generate()
        data = FeatureDataset.extract_input(
            sampling_length=len(input_data.spec.array),
            f0_data=input_data.f0,
            phoneme_data=input_data.phoneme,
            spec_data=input_data.spec,
            silence_data=input_data.silence,
            phoneme_list_data=input_data.phoneme_list,
            f0_process_mode=F0ProcessMode(config.dataset.f0_process_mode),
            time_mask_max_second=0,
        )

        spec = generator.generate(
            f0=data["f0"][numpy.newaxis],
            phoneme=data["phoneme"][numpy.newaxis],
            speaker_id=(numpy.array(speaker_id)[numpy.newaxis]
                        if speaker_id is not None else None),
        )[0]

        if transpose:
            spec = spec.T

        name = input.f0_path.stem
        numpy.save(output_dir.joinpath(name + ".npy"), spec)
Пример #8
0
def generate(
    model_dir: Path,
    model_iteration: Optional[int],
    model_config: Optional[Path],
    output_dir: Path,
    to_voiced_scaler: bool,
    to_f0_scaler: bool,
    to_phoneme_onehot: bool,
    batch_size: Optional[int],
    num_test: int,
    target_glob: Optional[str],
    use_gpu: bool,
):
    if model_config is None:
        model_config = model_dir / "config.yaml"

    output_dir.mkdir(exist_ok=True)
    save_arguments(output_dir / "arguments.yaml", generate, locals())

    config = Config.from_dict(yaml.safe_load(model_config.open()))

    generator = Generator(
        config=config,
        predictor=_get_model_path(
            model_dir=model_dir,
            iteration=model_iteration,
            prefix="predictor_",
        ),
        voiced_network=(
            None
            if not to_voiced_scaler
            else _get_model_path(
                model_dir=model_dir,
                iteration=model_iteration,
                prefix="voiced_network_",
            )
        ),
        f0_network=(
            None
            if not to_f0_scaler
            else _get_model_path(
                model_dir=model_dir,
                iteration=model_iteration,
                prefix="f0_network_",
            )
        ),
        phoneme_network=(
            None
            if not to_phoneme_onehot
            else _get_model_path(
                model_dir=model_dir,
                iteration=model_iteration,
                prefix="phoneme_network_",
            )
        ),
        use_gpu=use_gpu,
    )

    dataset = create_dataset(config.dataset)["test"]
    scale = numpy.prod(config.network.scale_list)

    if batch_size is None:
        batch_size = config.train.batch_size

    if isinstance(dataset, SpeakerWavesDataset):
        wave_paths = [data.path_wave for data in dataset.wave_dataset.inputs[:num_test]]
    elif isinstance(dataset, WavesDataset):
        wave_paths = [data.path_wave for data in dataset.inputs[:num_test]]
    else:
        raise Exception()

    if target_glob is not None:
        wave_paths += list(map(Path, glob(target_glob)))

    for wps in tqdm(chunked(wave_paths, batch_size), desc="generate"):
        waves = [Wave.load(p) for p in wps]
        arrays = [w.wave for w in waves]

        pad_lengths = [int(numpy.ceil(len(w) / scale) * scale) for w in arrays]
        arrays = [numpy.r_[w, numpy.zeros(max(pad_lengths) - len(w))] for w in arrays]

        tensors = [torch.from_numpy(array.astype(numpy.float32)) for array in arrays]
        output = generator.generate(
            wave=concat_examples(tensors),
            to_voiced_scaler=to_voiced_scaler,
            to_f0_scaler=to_f0_scaler,
            to_phoneme_onehot=to_phoneme_onehot,
        )

        for feature, p, w, l in zip(output, wps, waves, pad_lengths):
            feature = feature.T[: l // scale]
            data = SamplingData(array=feature, rate=w.sampling_rate // scale)
            data.save(output_dir / (p.stem + ".npy"))
Пример #9
0
def generate(
    model_dir: Path,
    model_iteration: Optional[int],
    model_config: Optional[Path],
    time_second: float,
    num_test: int,
    num_train: int,
    output_dir: Path,
    use_gpu: bool,
):
    if model_config is None:
        model_config = model_dir / "config.yaml"

    output_dir.mkdir(exist_ok=True)
    save_arguments(output_dir / "arguments.yaml", generate, locals())

    config = Config.from_dict(yaml.safe_load(model_config.open()))

    model_path = _get_predictor_model_path(
        model_dir=model_dir,
        iteration=model_iteration,
    )
    generator = Generator(
        config=config,
        predictor=model_path,
        use_gpu=use_gpu,
    )

    sampling_rate = 200
    config.dataset.sampling_length = int(sampling_rate * time_second)

    batch_size = config.train.batch_size

    def generate_wrapper(dataset_type: str, num_data: int):
        dataset = create_dataset(config.dataset)[dataset_type]
        if isinstance(dataset, ConcatDataset):
            dataset = dataset.datasets[0]

        if isinstance(dataset.dataset, FeatureDataset):
            phoneme_paths = [
                inp.phoneme_path for inp in dataset.dataset.inputs[:num_data]
            ]
        elif isinstance(dataset.dataset, SpeakerFeatureDataset):
            phoneme_paths = [
                inp.phoneme_path
                for inp in dataset.dataset.dataset.inputs[:num_data]
            ]
        else:
            raise ValueError(dataset)

        for data, phoneme_path in zip(
                chunked(tqdm(dataset, desc="generate"), batch_size),
                chunked(phoneme_paths, batch_size),
        ):
            data = concat_examples(data)
            f0s = generator.generate(
                phoneme=data["phoneme"],
                start_accent=data["start_accent"]
                if "start_accent" in data else None,
                end_accent=data["end_accent"]
                if "end_accent" in data else None,
                speaker_id=data["speaker_id"]
                if "speaker_id" in data else None,
            )

            for f0, p in zip(f0s, phoneme_path):
                numpy.save(output_dir.joinpath(p.stem + ".npy"), f0)

    if num_test > 0:
        generate_wrapper("test", num_test)

    if num_train > 0:
        generate_wrapper("train", num_train)
Пример #10
0
def generate(
    model_dir: Path,
    model_iteration: Optional[int],
    model_config: Optional[Path],
    output_dir: Path,
    batch_size: int,
    num_test: int,
    from_train_data: bool,
    time_second: float,
    val_local_glob: str,
    val_speaker_id: Optional[int],
    noise_schedule_start: float,
    noise_schedule_stop: float,
    noise_schedule_num: int,
    use_gpu: bool,
):
    output_dir.mkdir(exist_ok=True)
    save_arguments(output_dir / "arguments.yaml", generate, locals())

    if model_config is None:
        model_config = model_dir / "config.yaml"
    config = Config.from_dict(yaml.safe_load(model_config.open()))

    model_path = _get_predictor_model_path(
        model_dir=model_dir,
        iteration=model_iteration,
    )
    print("model path: ", model_path)
    generator = Generator(
        config=config,
        noise_schedule_config=NoiseScheduleModelConfig(
            start=noise_schedule_start,
            stop=noise_schedule_stop,
            num=noise_schedule_num),
        predictor=model_path,
        sampling_rate=config.dataset.sampling_rate,
        use_gpu=use_gpu,
    )

    local_padding_second = 1
    local_padding_length = config.dataset.sampling_rate * local_padding_second

    config.dataset.sampling_length = int(config.dataset.sampling_rate *
                                         time_second)
    config.dataset.local_padding_length = local_padding_length
    dataset = create_dataset(
        config.dataset)["test" if not from_train_data else "train"]

    if isinstance(dataset, SpeakerWavesDataset):
        wave_paths = [
            input.path_wave for input in dataset.wave_dataset.inputs[:num_test]
        ]
    elif isinstance(dataset, WavesDataset):
        wave_paths = [input.path_wave for input in dataset.inputs[:num_test]]
    else:
        raise Exception()

    for data, wave_path in tqdm(
            zip(chunked(dataset, batch_size), chunked(wave_paths, batch_size)),
            desc="generate",
    ):
        data = concat_examples(data)
        output = generator.generate(
            local=data["local"],
            local_padding_length=local_padding_length,
            speaker_id=data["speaker_id"] if "speaker_id" in data else None,
        )

        for wave, p in zip(output, wave_path):
            wave.save(output_dir / (p.stem + ".wav"))

    # validation
    if val_local_glob is not None:
        local_paths = sorted([Path(p) for p in glob(val_local_glob)])
        speaker_ids = [val_speaker_id] * len(local_paths)
        for local_path, speaker_id in zip(chunked(local_paths, batch_size),
                                          chunked(speaker_ids, batch_size)):
            datas = [SamplingData.load(p) for p in local_path]
            size = int(
                (time_second + local_padding_second * 2) * datas[0].rate)
            local = numpy.stack([
                (data.array[:size].T if len(data.array) >= size else numpy.pad(
                    data.array,
                    ((0, size - len(data.array)), (0, 0)),
                    mode="edge",
                ).T) for data in datas
            ])

            output = generator.generate(
                local=local,
                local_padding_length=local_padding_length,
                speaker_id=(numpy.stack(speaker_id)
                            if speaker_id[0] is not None else None),
            )

            for wave, p in zip(output, local_path):
                wave.save(output_dir / (p.stem + ".wav"))
Пример #11
0
def generate(
    model_dir: Path,
    model_iteration: int,
    model_config: Path,
    time_length: float,
    input_batchsize: Optional[int],
    num_test: int,
    sampling_policy: str,
    disable_fast_inference: bool,
    val_local_glob: str,
    val_speaker_num: Optional[int],
    output_dir: Path,
):
    output_dir.mkdir(exist_ok=True, parents=True)
    save_arguments(output_dir / "arguments.yaml", generate, locals())

    if model_config is None:
        model_config = model_dir / "config.yaml"
    config = Config.from_dict(yaml.safe_load(model_config.open()))

    model_path = _get_predictor_model_path(
        model_dir=model_dir,
        iteration=model_iteration,
    )
    print("model path: ", model_path)

    batchsize = (input_batchsize
                 if input_batchsize is not None else config.train.batchsize)

    generator = Generator(
        config=config,
        predictor=model_path,
        use_gpu=True,
        max_batch_size=batchsize,
        use_fast_inference=not disable_fast_inference,
    )

    dataset = create_dataset(config.dataset)["test"]
    if isinstance(dataset, TensorWrapperDataset):
        dataset = dataset.dataset

    if isinstance(dataset, WavesDataset):
        inputs = dataset.inputs
        local_paths = [input.path_local for input in inputs[:num_test]]
        speaker_nums = [None] * num_test
    elif isinstance(dataset, SpeakerWavesDataset):
        inputs = dataset.wave_dataset.inputs
        local_paths = [input.path_local for input in inputs[:num_test]]
        speaker_nums = dataset.speaker_nums[:num_test]
    else:
        raise ValueError(dataset)

    # random
    for local_path, speaker_num in zip(chunked(local_paths, batchsize),
                                       chunked(speaker_nums, batchsize)):
        process(
            generator=generator,
            local_paths=local_path,
            local_sampling_rate=config.dataset.local_sampling_rate,
            time_length=time_length,
            speaker_nums=speaker_num if speaker_num[0] is not None else None,
            sampling_policy=SamplingPolicy(sampling_policy),
            output_dir=output_dir,
        )

    # validation
    if val_local_glob is not None:
        local_paths = sorted([Path(p) for p in glob.glob(val_local_glob)])
        speaker_nums = [val_speaker_num] * len(local_paths)
        for local_path, speaker_num in zip(chunked(local_paths, batchsize),
                                           chunked(speaker_nums, batchsize)):
            process(
                generator=generator,
                local_paths=local_path,
                local_sampling_rate=config.dataset.local_sampling_rate,
                time_length=time_length,
                speaker_nums=speaker_num
                if speaker_num[0] is not None else None,
                sampling_policy=SamplingPolicy(sampling_policy),
                output_dir=output_dir,
            )