示例#1
0
    def forward(self, audio, mel, audio_start, clip_kl=True):
        """Compute loss of Clarinet model.

        Args:
            audio (Variable): shape(B, T_audio), dtype flaot32, ground truth waveform.
            mel (Variable): shape(B, F, T_mel), dtype flaot32, condition(mel spectrogram here).
            audio_start (Variable): shape(B, ), dtype int64, audio starts positions.
            clip_kl (bool, optional): whether to clip kl_loss by maximum=100. Defaults to True.

        Returns:
            Dict(str, Variable)
            loss (Variable): shape(1, ), dtype flaot32, total loss.
            kl (Variable): shape(1, ), dtype flaot32, kl divergence between the teacher's output distribution and student's output distribution.
            regularization (Variable): shape(1, ), dtype flaot32, a regularization term of the KL divergence.
            spectrogram_frame_loss (Variable): shape(1, ), dytpe: float, stft loss, the L1-distance of the magnitudes of the spectrograms of the ground truth waveform and synthesized waveform.
        """
        batch_size, audio_length = audio.shape  # audio clip's length

        z = F.gaussian_random(audio.shape)
        condition = self.encoder(mel)  # (B, C, T)
        condition_slice = crop(condition, audio_start, audio_length)

        x, s_means, s_scales = self.student(z, condition_slice)  # all [0: T]
        s_means = s_means[:, 1:]  # (B, T-1), time steps [1: T]
        s_scales = s_scales[:, 1:]  # (B, T-1), time steps [1: T]
        s_clipped_scales = F.clip(s_scales, self.min_log_scale, 100.)

        # teacher outputs single gaussian
        y = self.teacher(x[:, :-1], condition_slice[:, :, 1:])
        _, t_means, t_scales = F.split(y, 3, -1)  # time steps [1: T]
        t_means = F.squeeze(t_means, [-1])  # (B, T-1), time steps [1: T]
        t_scales = F.squeeze(t_scales, [-1])  # (B, T-1), time steps [1: T]
        t_clipped_scales = F.clip(t_scales, self.min_log_scale, 100.)

        s_distribution = D.Normal(s_means, F.exp(s_clipped_scales))
        t_distribution = D.Normal(t_means, F.exp(t_clipped_scales))

        # kl divergence loss, so we only need to sample once? no MC
        kl = s_distribution.kl_divergence(t_distribution)
        if clip_kl:
            kl = F.clip(kl, -100., 10.)
        # context size dropped
        kl = F.reduce_mean(kl[:, self.teacher.context_size:])
        # major diff here
        regularization = F.mse_loss(t_scales[:, self.teacher.context_size:],
                                    s_scales[:, self.teacher.context_size:])

        # introduce information from real target
        spectrogram_frame_loss = F.mse_loss(self.stft.magnitude(audio),
                                            self.stft.magnitude(x))
        loss = kl + self.lmd * regularization + spectrogram_frame_loss
        loss_dict = {
            "loss": loss,
            "kl_divergence": kl,
            "regularization": regularization,
            "stft_loss": spectrogram_frame_loss
        }
        return loss_dict
示例#2
0
    def synthesis(self, mel):
        """Synthesize waveform using the encoder and the student network.

        Args:
            mel (Variable): shape(B, F, T_mel), the condition(mel spectrogram here).

        Returns:
            Variable: shape(B, T_audio), the synthesized waveform. (T_audio = T_mel * upscale_factor, where upscale_factor is the `upscale_factor` of the encoder.)
        """
        condition = self.encoder(mel)
        samples_shape = (condition.shape[0], condition.shape[-1])
        z = F.gaussian_random(samples_shape)
        x, s_means, s_scales = self.student(z, condition)
        return x
示例#3
0
    def init_projection_matrix(self, x):
        # Set if using projection matrix
        self.params.use_projection_matrix = getattr(self.params,
                                                    'use_projection_matrix',
                                                    True)

        if self.params.use_projection_matrix:
            self.compressed_dim = self.fparams.attribute(
                'compressed_dim', None)

            proj_init_method = getattr(self.params, 'proj_init_method', 'pca')
            if proj_init_method == 'pca':
                raise NotImplementedError
            elif proj_init_method == 'randn':
                with fluid.dygraph.guard():
                    self.projection_matrix = TensorList([
                        None if cdim is None else layers.gaussian_random(
                            (cdim, ex.shape[1], 1, 1), 0.0, 1 /
                            math.sqrt(ex.shape[1])).numpy()
                        for ex, cdim in zip(x, self.compressed_dim)
                    ])
            elif proj_init_method == 'np_randn':
                rng = np.random.RandomState(0)
                self.projection_matrix = TensorList([
                    None if cdim is None else rng.normal(
                        size=(cdim, ex.shape[1], 1, 1),
                        loc=0.0,
                        scale=1 / math.sqrt(ex.shape[1])).astype('float32')
                    for ex, cdim in zip(x, self.compressed_dim)
                ])
            elif proj_init_method == 'ones':
                self.projection_matrix = TensorList([
                    None if cdim is None else np.ones(
                        (cdim, ex.shape[1], 1, 1), 'float32') /
                    math.sqrt(ex.shape[1])
                    for ex, cdim in zip(x, self.compressed_dim)
                ])
        else:
            self.compressed_dim = x.size(1)
            self.projection_matrix = TensorList([None] * len(x))
示例#4
0
 def test_gaussian_random(self):
     program = Program()
     with program_guard(program):
         out = layers.gaussian_random(shape=[20, 30])
         self.assertIsNotNone(out)
     print(str(program))
示例#5
0
    def init_optimization(self, train_x, init_y):
        # Initialize filter
        filter_init_method = getattr(self.params, 'filter_init_method',
                                     'zeros')
        self.filter = TensorList([
            np.zeros([1, cdim, sz[0], sz[1]], 'float32') for x, cdim, sz in
            zip(train_x, self.compressed_dim, self.kernel_size)
        ])
        if filter_init_method == 'zeros':
            pass
        elif filter_init_method == 'ones':
            for idx, f in enumerate(self.filter):
                self.filter[idx] = np.ones(f.shape, 'float32') / np.prod(
                    f.shape)
        elif filter_init_method == 'np_randn':
            rng = np.random.RandomState(0)
            for idx, f in enumerate(self.filter):
                self.filter[idx] = rng.normal(
                    size=f.shape, loc=0,
                    scale=1 / np.prod(f.shape)).astype('float32')
        elif filter_init_method == 'randn':
            for idx, f in enumerate(self.filter):
                with fluid.dygraph.guard():
                    self.filter[idx] = layers.gaussian_random(
                        f.shape, std=1 / np.prod(f.shape)).numpy()
        else:
            raise ValueError('Unknown "filter_init_method"')

        # Get parameters
        self.params.update_projection_matrix = getattr(
            self.params, 'update_projection_matrix',
            True) and self.params.use_projection_matrix
        optimizer = getattr(self.params, 'optimizer', 'GaussNewtonCG')

        # Setup factorized joint optimization
        if self.params.update_projection_matrix:
            self.joint_problem = FactorizedConvProblem(
                self.init_training_samples, init_y, self.filter_reg,
                self.fparams.attribute('projection_reg'), self.params,
                self.init_sample_weights, self.projection_activation,
                self.response_activation)

            # Variable containing both filter and projection matrix
            joint_var = self.filter.concat(self.projection_matrix)

            # Initialize optimizer
            analyze_convergence = getattr(self.params, 'analyze_convergence',
                                          False)
            if optimizer == 'GaussNewtonCG':
                self.joint_optimizer = GaussNewtonCG(
                    self.joint_problem,
                    joint_var,
                    plotting=(self.params.debug >= 3),
                    analyze=True,
                    fig_num=(12, 13, 14))
            elif optimizer == 'GradientDescentL2':
                self.joint_optimizer = GradientDescentL2(
                    self.joint_problem,
                    joint_var,
                    self.params.optimizer_step_length,
                    self.params.optimizer_momentum,
                    plotting=(self.params.debug >= 3),
                    debug=analyze_convergence,
                    fig_num=(12, 13))

            # Do joint optimization
            if isinstance(self.params.init_CG_iter, (list, tuple)):
                self.joint_optimizer.run(self.params.init_CG_iter)
            else:
                self.joint_optimizer.run(
                    self.params.init_CG_iter // self.params.init_GN_iter,
                    self.params.init_GN_iter)

            # Get back filter and optimizer
            len_x = len(self.joint_optimizer.x)
            self.filter = self.joint_optimizer.x[:len_x // 2]  # w2 in paper
            self.projection_matrix = self.joint_optimizer.x[len_x //
                                                            2:]  # w1 in paper

            if analyze_convergence:
                opt_name = 'CG' if getattr(self.params, 'CG_optimizer',
                                           True) else 'GD'
                for val_name, values in zip(['loss', 'gradient'], [
                        self.joint_optimizer.losses,
                        self.joint_optimizer.gradient_mags
                ]):
                    val_str = ' '.join(
                        ['{:.8e}'.format(v.item()) for v in values])
                    file_name = '{}_{}.txt'.format(opt_name, val_name)
                    with open(file_name, 'a') as f:
                        f.write(val_str + '\n')
                raise RuntimeError('Exiting')

        # Re-project samples with the new projection matrix
        compressed_samples = self.project_sample(self.init_training_samples,
                                                 self.projection_matrix)
        for train_samp, init_samp in zip(self.training_samples,
                                         compressed_samples):
            for idx in range(init_samp.shape[0]):
                train_samp[idx] = init_samp[idx]

        self.hinge_mask = None

        # Initialize optimizer
        self.conv_problem = ConvProblem(self.training_samples, self.y,
                                        self.filter_reg, self.sample_weights,
                                        self.response_activation)

        if optimizer == 'GaussNewtonCG':
            self.filter_optimizer = ConjugateGradient(
                self.conv_problem,
                self.filter,
                fletcher_reeves=self.params.fletcher_reeves,
                direction_forget_factor=self.params.direction_forget_factor,
                debug=(self.params.debug >= 3),
                fig_num=(12, 13))
        elif optimizer == 'GradientDescentL2':
            self.filter_optimizer = GradientDescentL2(
                self.conv_problem,
                self.filter,
                self.params.optimizer_step_length,
                self.params.optimizer_momentum,
                debug=(self.params.debug >= 3),
                fig_num=12)

        # Transfer losses from previous optimization
        if self.params.update_projection_matrix:
            self.filter_optimizer.residuals = self.joint_optimizer.residuals
            self.filter_optimizer.losses = self.joint_optimizer.losses

        if not self.params.update_projection_matrix:
            self.filter_optimizer.run(self.params.init_CG_iter)

        # Post optimization
        self.filter_optimizer.run(self.params.post_init_CG_iter)
        self.filter = self.filter_optimizer.x

        # Free memory
        del self.init_training_samples
        if self.params.use_projection_matrix:
            del self.joint_problem, self.joint_optimizer
示例#6
0
    def point_network_decoder(p_vec, q_vec, decoder_size):
        random_attn = layers.gaussian_random(shape=[1, decoder_size])
	random_attn = layers.sequence_expand(x=random_attn, y=q_vec)
        random_attn = layers.fc(input=random_attn, size=decoder_size, act=None)
        U = layers.fc(input=q_vec,
			    size=decoder_size,
			    act=None) + random_attn
        U = layers.tanh(U)
        
        logits = layers.fc(input=U,
			    size=1,
			    act=None)
        scores = layers.sequence_softmax(input=logits)
	pooled_vec = layers.elementwise_mul(x=q_vec, y=scores, axis=0)
	pooled_vec = layers.sequence_pool(input=pooled_vec, pool_type='sum')

        init_state = layers.fc(input=pooled_vec,
			    size=decoder_size,
			    act=None)

        def custom_dynamic_rnn(p_vec, init_state, decoder_size):
            context = layers.fc(input=p_vec,
			    size=decoder_size,
			    act=None)

	    drnn = layers.DynamicRNN()
	    with drnn.block():
		H_s = drnn.step_input(p_vec)
		ctx = drnn.static_input(context)

		c_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev = drnn.memory(init=init_state, need_reorder=True)
		m_prev1 = layers.fc(input=m_prev, size=decoder_size, act=None)
		m_prev1 = layers.sequence_expand(x=m_prev1, y=ctx)

		Fk = ctx + m_prev1
		Fk = layers.fc(input=Fk, size=decoder_size, act='tanh')
		logits = layers.fc(input=Fk, size=1, act=None)

		scores = layers.sequence_softmax(input=logits)
		attn_ctx = layers.elementwise_mul(x=ctx, y=scores, axis=0)
		attn_ctx = layers.sequence_pool(input=attn_ctx, pool_type='sum')
		hidden_t, cell_t = lstm_step(attn_ctx, hidden_t_prev=m_prev1, cell_t_prev=c_prev, size=decoder_size)

		drnn.update_memory(ex_mem=m_prev, new_mem=hidden_t)
		drnn.update_memory(ex_mem=c_prev, new_mem=cell_t)
      
		drnn.output(scores)
	    beta = drnn()
            return beta

        fw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size) 
        bw_outputs = custom_dynamic_rnn(p_vec, init_state, decoder_size)
       
        def sequence_slice(x, index):
            #offset = layers.fill_constant(shape=[1, args.batch_size], value=index, dtype='float32')
            #length = layers.fill_constant(shape=[1, args.batch_size], value=1, dtype='float32')
            #return layers.sequence_slice(x, offset, length)
            idx = layers.fill_constant(shape=[1], value=1, dtype='int32')
            idx.stop_gradient = True
            from paddle.fluid.layers.control_flow import lod_rank_table 
            from paddle.fluid.layers.control_flow import lod_tensor_to_array 
            from paddle.fluid.layers.control_flow import array_read 
            from paddle.fluid.layers.control_flow import array_to_lod_tensor 
            table = lod_rank_table(x, level=0)
            table.stop_gradient = True
            array = lod_tensor_to_array(x, table)
            slice_array = array_read(array=array, i=idx)
            return array_to_lod_tensor(slice_array, table)
        
        start_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 0), y=sequence_slice(bw_outputs, 1), axis=0) / 2
        end_prob = layers.elementwise_mul(x=sequence_slice(fw_outputs, 1), y=sequence_slice(bw_outputs, 0), axis=0) / 2
        return start_prob, end_prob