Exemplo n.º 1
0
    def __init__(self, n_loops, n_layers, residual_channels, condition_dim,
                 filter_size):
        """ParallelWaveNet, an inverse autoregressive flow model, it contains several flows(WaveNets).

        Args:
            n_loops (List[int]): `n_loop` for each flow.
            n_layers (List[int]): `n_layer` for each flow.
            residual_channels (int): `residual_channels` for every flow.
            condition_dim (int): `condition_dim` for every flow.
            filter_size (int): `filter_size` for every flow.
        """
        super(ParallelWaveNet, self).__init__()
        self.flows = dg.LayerList()
        for n_loop, n_layer in zip(n_loops, n_layers):
            # teacher's log_scale_min does not matter herem, -100 is a dummy value
            self.flows.append(
                WaveNet(n_loop, n_layer, residual_channels, 3, condition_dim,
                        filter_size, "mog", -100.0))
Exemplo n.º 2
0
        place = fluid.CPUPlace()
    else:
        place = fluid.CUDAPlace(args.device)

    with dg.guard(place):
        model_config = config["model"]
        upsampling_factors = model_config["upsampling_factors"]
        encoder = UpsampleNet(upsampling_factors)

        n_loop = model_config["n_loop"]
        n_layer = model_config["n_layer"]
        residual_channels = model_config["residual_channels"]
        output_dim = model_config["output_dim"]
        loss_type = model_config["loss_type"]
        log_scale_min = model_config["log_scale_min"]
        decoder = WaveNet(n_loop, n_layer, residual_channels, output_dim,
                          n_mels, filter_size, loss_type, log_scale_min)

        model = ConditionalWavenet(encoder, decoder)
        summary(model)

        # load model parameters
        checkpoint_dir = os.path.join(args.output, "checkpoints")
        if args.checkpoint:
            iteration = io.load_parameters(model,
                                           checkpoint_path=args.checkpoint)
        else:
            iteration = io.load_parameters(model,
                                           checkpoint_dir=checkpoint_dir,
                                           iteration=args.iteration)
        assert iteration > 0, "A trained model is needed."
Exemplo n.º 3
0
def synthesis_with_clarinet(config_path, checkpoint, mel_spectrogram, place):
    with open(config_path, 'rt') as f:
        config = yaml.safe_load(f)

    data_config = config["data"]
    n_mels = data_config["n_mels"]

    teacher_config = config["teacher"]
    n_loop = teacher_config["n_loop"]
    n_layer = teacher_config["n_layer"]
    filter_size = teacher_config["filter_size"]

    # only batch=1 for validation is enabled

    with dg.guard(place):
        # conditioner(upsampling net)
        conditioner_config = config["conditioner"]
        upsampling_factors = conditioner_config["upsampling_factors"]
        upsample_net = UpsampleNet(upscale_factors=upsampling_factors)
        freeze(upsample_net)

        residual_channels = teacher_config["residual_channels"]
        loss_type = teacher_config["loss_type"]
        output_dim = teacher_config["output_dim"]
        log_scale_min = teacher_config["log_scale_min"]
        assert loss_type == "mog" and output_dim == 3, \
            "the teacher wavenet should be a wavenet with single gaussian output"

        teacher = WaveNet(n_loop, n_layer, residual_channels, output_dim,
                          n_mels, filter_size, loss_type, log_scale_min)
        # load & freeze upsample_net & teacher
        freeze(teacher)

        student_config = config["student"]
        n_loops = student_config["n_loops"]
        n_layers = student_config["n_layers"]
        student_residual_channels = student_config["residual_channels"]
        student_filter_size = student_config["filter_size"]
        student_log_scale_min = student_config["log_scale_min"]
        student = ParallelWaveNet(n_loops, n_layers, student_residual_channels,
                                  n_mels, student_filter_size)

        stft_config = config["stft"]
        stft = STFT(n_fft=stft_config["n_fft"],
                    hop_length=stft_config["hop_length"],
                    win_length=stft_config["win_length"])

        lmd = config["loss"]["lmd"]
        model = Clarinet(upsample_net, teacher, student, stft,
                         student_log_scale_min, lmd)
        io.load_parameters(model=model, checkpoint_path=checkpoint)

        if not os.path.exists(args.output):
            os.makedirs(args.output)
        model.eval()

        # Rescale mel_spectrogram.
        min_level, ref_level = 1e-5, 20  # hard code it
        mel_spectrogram = 20 * np.log10(np.maximum(min_level, mel_spectrogram))
        mel_spectrogram = mel_spectrogram - ref_level
        mel_spectrogram = np.clip((mel_spectrogram + 100) / 100, 0, 1)

        mel_spectrogram = dg.to_variable(mel_spectrogram)
        mel_spectrogram = fluid.layers.transpose(mel_spectrogram, [0, 2, 1])

        wav_var = model.synthesis(mel_spectrogram)
        wav_np = wav_var.numpy()[0]

        return wav_np