def decoder_step(gru_unit,
                 cue_gru_unit,
                 step_in,
                 hidden,
                 input_size,
                 hidden_size,
                 memory,
                 memory_mask,
                 knowledge,
                 mask=None):
    """ decoder step """
    # get attention out
    # get hidden top layers
    top_hidden = layers.slice(hidden, axes=[0], starts=[0], ends=[1])
    top_hidden = layers.squeeze(top_hidden, axes=[0])
    top_hidden = layers.unsqueeze(top_hidden, axes=[1])

    weight_memory, attn = dot_attention(top_hidden, memory, memory_mask)

    step_in = layers.unsqueeze(step_in, axes=[1])
    rnn_input_list = [step_in, weight_memory]
    if weight_memory.shape[0] == -1:
        knowledge_1 = layers.reshape(knowledge, shape=weight_memory.shape)
    else:
        knowledge_1 = knowledge
    cue_input_list = [knowledge_1, weight_memory]
    output_list = [weight_memory]

    rnn_input = layers.concat(rnn_input_list, axis=2)

    rnn_input = layers.squeeze(rnn_input, axes=[1])
    rnn_output, rnn_last_hidden = gru_unit(rnn_input, hidden, mask)

    cue_input = layers.concat(cue_input_list, axis=2)
    cue_input = layers.squeeze(cue_input, axes=[1])
    cue_rnn_out, cue_rnn_last_hidden = cue_gru_unit(cue_input, hidden, mask)

    h_y = layers.tanh(
        fc(rnn_last_hidden, hidden_size, hidden_size, name="dec_fc1"))
    h_cue = layers.tanh(
        fc(cue_rnn_last_hidden, hidden_size, hidden_size, name="dec_fc2"))

    concate_y_cue = layers.concat([h_y, h_cue], axis=2)
    k = layers.sigmoid(fc(concate_y_cue, hidden_size * 2, 1, name='dec_fc3'))

    new_hidden = h_y * k - h_cue * (k - 1.0)

    new_hidden_tmp = layers.transpose(new_hidden, perm=[1, 0, 2])
    output_list.append(new_hidden_tmp)

    real_out = layers.concat(output_list, axis=2)

    if mask:
        mask_tmp = layers.unsqueeze(mask, axes=[0])
        new_hidden = layers.elementwise_mul((new_hidden - hidden),
                                            mask_tmp,
                                            axis=0)
        new_hidden += hidden

    return real_out, new_hidden
Пример #2
0
    def forward(self, audio, mel, audio_start, clip_kl=True):
        """Compute loss of Clarinet model.

        Args:
            audio (Variable): shape(B, T_audio), dtype flaot32, ground truth waveform.
            mel (Variable): shape(B, F, T_mel), dtype flaot32, condition(mel spectrogram here).
            audio_start (Variable): shape(B, ), dtype int64, audio starts positions.
            clip_kl (bool, optional): whether to clip kl_loss by maximum=100. Defaults to True.

        Returns:
            Dict(str, Variable)
            loss (Variable): shape(1, ), dtype flaot32, total loss.
            kl (Variable): shape(1, ), dtype flaot32, kl divergence between the teacher's output distribution and student's output distribution.
            regularization (Variable): shape(1, ), dtype flaot32, a regularization term of the KL divergence.
            spectrogram_frame_loss (Variable): shape(1, ), dytpe: float, stft loss, the L1-distance of the magnitudes of the spectrograms of the ground truth waveform and synthesized waveform.
        """
        batch_size, audio_length = audio.shape  # audio clip's length

        z = F.gaussian_random(audio.shape)
        condition = self.encoder(mel)  # (B, C, T)
        condition_slice = crop(condition, audio_start, audio_length)

        x, s_means, s_scales = self.student(z, condition_slice)  # all [0: T]
        s_means = s_means[:, 1:]  # (B, T-1), time steps [1: T]
        s_scales = s_scales[:, 1:]  # (B, T-1), time steps [1: T]
        s_clipped_scales = F.clip(s_scales, self.min_log_scale, 100.)

        # teacher outputs single gaussian
        y = self.teacher(x[:, :-1], condition_slice[:, :, 1:])
        _, t_means, t_scales = F.split(y, 3, -1)  # time steps [1: T]
        t_means = F.squeeze(t_means, [-1])  # (B, T-1), time steps [1: T]
        t_scales = F.squeeze(t_scales, [-1])  # (B, T-1), time steps [1: T]
        t_clipped_scales = F.clip(t_scales, self.min_log_scale, 100.)

        s_distribution = D.Normal(s_means, F.exp(s_clipped_scales))
        t_distribution = D.Normal(t_means, F.exp(t_clipped_scales))

        # kl divergence loss, so we only need to sample once? no MC
        kl = s_distribution.kl_divergence(t_distribution)
        if clip_kl:
            kl = F.clip(kl, -100., 10.)
        # context size dropped
        kl = F.reduce_mean(kl[:, self.teacher.context_size:])
        # major diff here
        regularization = F.mse_loss(t_scales[:, self.teacher.context_size:],
                                    s_scales[:, self.teacher.context_size:])

        # introduce information from real target
        spectrogram_frame_loss = F.mse_loss(self.stft.magnitude(audio),
                                            self.stft.magnitude(x))
        loss = kl + self.lmd * regularization + spectrogram_frame_loss
        loss_dict = {
            "loss": loss,
            "kl_divergence": kl,
            "regularization": regularization,
            "stft_loss": spectrogram_frame_loss
        }
        return loss_dict
Пример #3
0
    def forward(self, q, k, v, lengths, speaker_embed, start_index, 
                force_monotonic=False, prev_coeffs=None, window=None):
        # add position encoding as an inductive bias 
        if self.has_bias: # multi-speaker model
            omega_q = 2 * F.sigmoid(
                F.squeeze(self.q_pos_affine(speaker_embed), axes=[-1]))
            omega_k = 2 * self.omega_initial * F.sigmoid(F.squeeze(
                self.k_pos_affine(speaker_embed), axes=[-1]))
        else: # single-speaker case
            batch_size = q.shape[0]
            omega_q = F.ones((batch_size, ), dtype="float32")
            omega_k = F.ones((batch_size, ), dtype="float32") * self.omega_default
        q += self.position_encoding_weight * positional_encoding(q, start_index, omega_q)
        k += self.position_encoding_weight * positional_encoding(k, 0, omega_k)

        q, k, v = self.q_affine(q), self.k_affine(k), self.v_affine(v)
        activations = F.matmul(q, k, transpose_y=True)
        activations /= np.sqrt(self.attention_dim)

        if self.training:
            # mask the <pad> parts from the encoder
            mask = F.sequence_mask(lengths, dtype="float32")
            attn_bias = F.scale(1. - mask, -1000)
            activations += F.unsqueeze(attn_bias, [1])
        elif force_monotonic:
            assert window is not None
            backward_step, forward_step = window
            T_enc = k.shape[1]
            batch_size, T_dec, _ = q.shape

            # actually T_dec = 1 here
            alpha = F.fill_constant((batch_size, T_dec), value=0, dtype="int64") \
                   if prev_coeffs is None \
                   else F.argmax(prev_coeffs, axis=-1)
            backward = F.sequence_mask(alpha - backward_step, maxlen=T_enc, dtype="bool")
            forward = F.sequence_mask(alpha + forward_step, maxlen=T_enc, dtype="bool")
            mask = F.cast(F.logical_xor(backward, forward), "float32")
            # print("mask's shape:", mask.shape)
            attn_bias = F.scale(1. - mask, -1000)
            activations += attn_bias

        # softmax
        coefficients = F.softmax(activations, axis=-1)
        # context vector
        coefficients = F.dropout(coefficients, 1. - self.keep_prob,
                                 dropout_implementation='upscale_in_train')
        contexts = F.matmul(coefficients, v)
        # context normalization
        enc_lengths = F.cast(F.unsqueeze(lengths, axes=[1, 2]), "float32")
        contexts *= F.sqrt(enc_lengths)
        # out affine
        contexts = self.out_affine(contexts)
        return contexts, coefficients
Пример #4
0
 def forward(self, img, label, mask=None, return_loss=True):
     outs = self.backbone(img)
     cls_out = self.avgpool(outs[-1])
     if return_loss:
         cls_out = L.dropout(cls_out,
                             dropout_prob=self.dropout,
                             is_test=False)
         cls_out = self.fc(L.squeeze(cls_out, axes=[2, 3]))
         losses = self.get_losses(outs, cls_out, mask, label)
         return losses
     else:
         cls_out = self.fc(L.squeeze(cls_out, axes=[2, 3]))
         cls_out = L.softmax(cls_out).numpy()[:, 0]
         return cls_out
Пример #5
0
    def forward(self, encoder_output):
        """
        Predict the duration of each character.
        
        Args:
            encoder_output (Variable): shape(B, T, C), dtype float32, the encoder output.
        
        Returns:
            out (Variable): shape(B, T, C), the output of duration predictor.
        """
        # encoder_output.shape(N, T, C)
        out = layers.transpose(encoder_output, [0, 2, 1])
        out = self.conv1(out)
        out = layers.transpose(out, [0, 2, 1])
        out = layers.dropout(layers.relu(self.layer_norm1(out)),
                             self.dropout,
                             dropout_implementation='upscale_in_train')
        out = layers.transpose(out, [0, 2, 1])
        out = self.conv2(out)
        out = layers.transpose(out, [0, 2, 1])
        out = layers.dropout(layers.relu(self.layer_norm2(out)),
                             self.dropout,
                             dropout_implementation='upscale_in_train')
        out = layers.relu(self.linear(out))
        out = layers.squeeze(out, axes=[-1])

        return out
Пример #6
0
 def test_squeeze(self):
     program = Program()
     with program_guard(program):
         x = layers.data(name='x', shape=[1, 1, 4], dtype='float32')
         out = layers.squeeze(input=x, axes=[2])
         self.assertIsNotNone(out)
     print(str(program))
Пример #7
0
def epoch_predict(env, args, model, loader):
    """Predict in one epoch"""
    model.eval()

    arcs, rels, probs = [], [], []
    for words, feats in loader():
        # ignore the first token of each sentence
        tmp_words = layers.pad(words[:, 1:],
                               paddings=[0, 0, 1, 0],
                               pad_value=args.pad_index)
        mask = tmp_words != args.pad_index
        lens = nn.reduce_sum(mask, -1)
        s_arc, s_rel = model(words, feats)
        arc_preds, rel_preds = decode(args, s_arc, s_rel, mask)
        arcs.extend(
            layers.split(nn.masked_select(arc_preds, mask),
                         lens.numpy().tolist()))
        rels.extend(
            layers.split(nn.masked_select(rel_preds, mask),
                         lens.numpy().tolist()))
        if args.prob:
            arc_probs = nn.index_sample(layers.softmax(s_arc, -1),
                                        layers.unsqueeze(arc_preds, -1))
            probs.extend(
                layers.split(
                    nn.masked_select(layers.squeeze(arc_probs, axes=[-1]),
                                     mask),
                    lens.numpy().tolist()))
    arcs = [seq.numpy().tolist() for seq in arcs]
    rels = [env.REL.vocab[seq.numpy().tolist()] for seq in rels]
    probs = [[round(p, 3) for p in seq.numpy().tolist()] for seq in probs]

    return arcs, rels, probs
Пример #8
0
 def forward(self, seq):
     seq = layers.transpose(seq, [0, 2, 1])
     seq = layers.unsqueeze(seq, -1)
     seq = self.conv2d(seq)
     seq = layers.squeeze(seq, [-1])
     seq = layers.transpose(seq, [0, 2, 1])
     return seq
Пример #9
0
    def create_loss_op(self, predict, label, epsilon=1e-7):
        """compute loss with tensor

         Args:
         predict: model output tensor activated by softmax
         label: a non-sparse tensor

         Returns:
         loss: cross-entropy loss
         """
        if self.loss_type == "nl" and self.model_type == "train":
            one_hot_label = fluid.one_hot(label, depth=predict.shape[-1])
            one_hot_label = FL.squeeze(one_hot_label, axes=[-2])
            # log
            neg_prob = 1 - predict
            log_neg_prob = FL.log(
                fluid.layers.clip(neg_prob, min=epsilon, max=1.))
            ce_loss = -1 * log_neg_prob * one_hot_label
            cost = FL.reduce_sum(ce_loss, dim=-1, keep_dim=True)
        else:  # PL or evaluation
            cost = FL.cross_entropy(predict, label)

        loss = FL.mean(cost)

        return loss
Пример #10
0
    def forward(self, *args, **kwargs):
        """
        Args:
            labels (optional, `Variable` of shape [batch_size, seq_len]): 
                ground truth label id for each token
        Returns:
            loss (`Variable` of shape []):
                Cross entropy loss mean over batch and time, ignore positions where label == -100
                if labels not set, returns None
            logits (`Variable` of shape [batch_size, seq_len, hidden_size]):
                output logits of classifier
            loss_weights (`Variable` of shape [batch_size, seq_len]):
                weigths of loss for each tokens.
            ignore_index (int):
                when label == `ignore_index`, this token will not contribute to loss
        """
        ignore_index = kwargs.pop('ignore_index', -100)
        labels = kwargs.pop('labels', None)
        loss_weights = kwargs.pop('loss_weights', None)
        pooled, encoded = super(ErnieModelForTokenClassification, self).forward(*args, **kwargs)
        hidden = self.dropout(encoded)  # maybe not?
        logits = self.classifier(hidden)

        if labels is not None:
            if len(labels.shape) == 2:
                labels = L.unsqueeze(labels, axes=[-1])
            loss = L.softmax_with_cross_entropy(logits, labels, ignore_index=ignore_index)
            if loss_weights is not None:
                loss = L.squeeze(loss, [-1]) * loss_weights
            loss = L.reduce_mean(loss)
        else:
            loss = None
        return loss, logits
Пример #11
0
    def get_metrics(self, inputs, outputs):
        """Get metrics."""
        metrics = {}
        pooled_out = self._get_pooled_output(outputs["enc_out"])
        cls_logits = self._get_classifier_output(pooled_out,
                                                 num_classes=self.num_classes,
                                                 name="cls")
        cls_loss, cls_softmax = layers.softmax_with_cross_entropy(
            logits=cls_logits, label=inputs["label"], return_softmax=True)

        cls_acc = layers.accuracy(cls_softmax, inputs["label"])
        mean_cls_loss = layers.mean(cls_loss)

        metrics["loss"] = mean_cls_loss
        metrics["cls_loss"] = mean_cls_loss
        metrics["cls_acc"] = cls_acc

        # statistics for recall & precision & f1
        if self.num_classes == 2:
            pred = layers.argmax(cls_softmax, axis=1)
            label = layers.squeeze(inputs["label"], axes=[1])
            metrics["stat_tp"] = layers.reduce_sum(
                layers.logical_and(pred == 1, label == 1).astype("float32"))
            metrics["stat_fp"] = layers.reduce_sum(
                layers.logical_and(pred == 1, label == 0).astype("float32"))
            metrics["stat_tn"] = layers.reduce_sum(
                layers.logical_and(pred == 0, label == 0).astype("float32"))
            metrics["stat_fn"] = layers.reduce_sum(
                layers.logical_and(pred == 0, label == 1).astype("float32"))
        return metrics
Пример #12
0
 def attention(self, hidden, encoder_output, encoder_output_proj,
               encoder_padding_mask):
     # 定义attention用以计算context,即 c_i,这里使用Bahdanau attention机制
     decoder_state_proj = layers.unsqueeze(
         layers.fc(hidden, size=self.hidden_size, bias_attr=False), [1])
     # 拿解码器的一个向量,和编码器的所有输出,进行一个结合/混合/融合/交融/关联
     mixed_state = fluid.layers.elementwise_add(
         encoder_output_proj,
         layers.expand(decoder_state_proj,
                       [1, layers.shape(decoder_state_proj)[1], 1]))
     # 解码器的一个向量,和编码器的所有输出,进行一个结合/混合/融合/交融/关联 后,进行全连接转成一个数值关系
     attn_scores = layers.squeeze(
         layers.fc(input=mixed_state,
                   size=1,
                   num_flatten_dims=2,
                   bias_attr=False), [2])
     if encoder_padding_mask is not None:
         attn_scores = layers.elementwise_add(attn_scores,
                                              encoder_padding_mask)
     # 数值关系softmax,变成了权重关系
     attn_scores = layers.softmax(attn_scores)
     # 加权平均权重,就是解码器的一个向量一顿操作后,拿到的上下文向量
     context = layers.reduce_sum(layers.elementwise_mul(encoder_output,
                                                        attn_scores,
                                                        axis=0),
                                 dim=1)
     return context
Пример #13
0
    def _get_pooled_output(self, enc_out, idx=None, name="pooled"):
        """Get pooled output of the last output embedding in Transformer.

        Args:
            enc_out: the output embeddings of Transformer, shape is [batch_size, max_seq_len, hidden_size]
            idx (optional): the selected indices in pooling operator, shape is [batch_size, 1] or [batch_size, 2].
            name: a string, the name of the pooling layer.

        Returns:
            pooled_out: the pooled output embedding, shape is [batch_size, hidden_size].
        """
        if idx is None:
            feat = enc_out[:, 0]
        elif len(idx.shape) == 2 and idx.shape[1] == 1:
            enc_out = layers.squeeze(enc_out, [1])
            feat = layers.gather(input=enc_out, index=idx)
        elif len(idx.shape) == 2 and idx.shape[1] == 2:
            feat = layers.gather_nd(input=enc_out, index=idx)
        else:
            raise ValueError(f"Invalid indices shape {idx.shape} is used")

        pooled_out = layers.fc(
            input=feat,
            size=self.hidden_size,
            act="tanh",
            param_attr=fluid.ParamAttr(name=f"{name}_fc.w_0", initializer=self.param_initializer),
            bias_attr=f"{name}_fc.b_0")
        return pooled_out
        def decoder_step(currrent_in, pre_feed, pre_hidden_array,
                         pre_cell_array, enc_memory):
            new_hidden_array = []
            new_cell_array = []

            step_input = layers.concat([currrent_in, pre_feed], 1)

            for i in range(self.num_layers):
                pre_hidden = pre_hidden_array[i]
                pre_cell = pre_cell_array[i]

                new_hidden, new_cell = dec_unit_list[i](step_input, pre_hidden,
                                                        pre_cell)

                new_hidden_array.append(new_hidden)
                new_cell_array.append(new_cell)

                step_input = new_hidden

            memory_mask = src_mask - 1.0
            enc_memory = layers.matmul(enc_memory, memory_weight)
            att_in = layers.unsqueeze(step_input, [1])
            dec_att, _ = dot_attention(att_in, enc_memory)
            dec_att = layers.squeeze(dec_att, [1])
            concat_att_out = layers.concat([dec_att, step_input], 1)
            concat_att_out = layers.matmul(concat_att_out, attention_weight)

            return concat_att_out, new_hidden_array, new_cell_array
Пример #15
0
 def forward(self, cue, label, return_loss=True):
     out = self.conv1(cue)
     out = self.norm1(out)
     out = self.maxpool(out)
     out = self.conv2(out)
     out = self.norm2(out)
     out = self.avgpool(out)
     if return_loss:
         cls_out = L.dropout(out, dropout_prob=0.5, is_test=False)
         cls_out = self.fc(L.squeeze(cls_out, axes=[2, 3]))
         loss_cls = L.mean(L.cross_entropy(cls_out, label))
         losses = dict(loss_cls=loss_cls, loss=loss_cls)
         return losses
     else:
         cls_out = self.fc(L.squeeze(out, axes=[2, 3]))
         cls_out = L.softmax(cls_out).numpy()[:, 0]
         return cls_out
Пример #16
0
def encoder_1(x_emb,
              vocab_size,
              emb_size,
              init_hidden=None,
              init_cell=None,
              para_name='',
              args=None):
    rnn_input = x_emb
    #rnn_input.stop_gradient = True
    rnn_outs = []
    rnn_outs_ori = []
    cells = []
    projs = []
    num_layers = 2
    for i in range(num_layers):
        #rnn_input = dropout(rnn_input, False, args)
        if init_hidden and init_cell:
            h0 = layers.squeeze(layers.slice(init_hidden,
                                             axes=[0],
                                             starts=[i],
                                             ends=[i + 1]),
                                axes=[0])
            c0 = layers.squeeze(layers.slice(init_cell,
                                             axes=[0],
                                             starts=[i],
                                             ends=[i + 1]),
                                axes=[0])
        else:
            h0 = c0 = None
        rnn_out, cell, input_proj = lstmp_encoder(
            rnn_input, hidden_size, h0, c0,
            para_name + 'layer{}'.format(i + 1), emb_size, test_mode, args)
        rnn_out_ori = rnn_out
        if i > 0:
            rnn_out = rnn_out + rnn_input
        #rnn_out = dropout(rnn_out, test_mode, args)
        rnn_out.stop_gradient = True
        rnn_outs.append(rnn_out)
        #rnn_outs_ori.stop_gradient = True
        rnn_outs_ori.append(rnn_out_ori)
    #ipdb.set_trace()
    #layers.Print(input_seq, message='input_seq', summarize=10)
    #layers.Print(rnn_outs[-1], message='rnn_outs', summarize=10)
    return rnn_outs[-1], rnn_outs_ori
Пример #17
0
 def get_losses(self, out, cls_out, mask, gt_labels):
     loss_cls = L.mean(L.cross_entropy(cls_out,
                                       gt_labels)) * self.train_cfg['w_cls']
     loss_tir = 0
     for feat in out[:-1]:
         feat = L.squeeze(self.avgpool(feat), axes=[2, 3])
         loss_tir += self.triple_loss(feat,
                                      gt_labels) * self.train_cfg['w_tri']
     loss = loss_cls + loss_tir
     return dict(loss_cls=loss_cls, loss_tir=loss_tir, loss=loss)
def rnn_decoder(gru_unit,
                cue_gru_unit,
                input,
                input_size,
                hidden_size,
                num_layers,
                memory,
                memory_mask,
                knowledge,
                output_size,
                init_hidden=None,
                mask=None,
                dropout=0.0,
                batch_first=True,
                name="decoder"):
    """ rnn decoder """
    input_emb = get_embedding(input, input_size, output_size)
    if batch_first:
        input_emb = layers.transpose(input_emb, perm=[1, 0, 2])
        if mask:
            trans_mask = layers.transpose(mask, perm=[1, 0])

    rnn = PaddingRNN()
    with rnn.step():
        step_in = rnn.step_input(input_emb)
        step_mask = None

        if mask:
            step_mask = rnn.step_input(trans_mask)

        # split pre_hidden
        pre_hidden_list = []

        pre_hidden = rnn.memory(init=init_hidden)
        real_out, last_hidden = \
            decoder_step(gru_unit, cue_gru_unit, step_in, pre_hidden, input_size,
                         hidden_size, memory, memory_mask, knowledge, mask=step_mask)

        rnn.update_memory(pre_hidden, last_hidden)

        step_in = layers.squeeze(real_out, axes=[1])
        rnn.step_output(step_in)

    rnnout = rnn()
    rnnout = layers.transpose(rnnout, perm=[1, 0, 2])
    rnnout = layers.elementwise_mul(rnnout, mask, axis=0)

    output_in_size = hidden_size + hidden_size
    rnnout = layers.dropout(rnnout, dropout_prob=dropout)
    rnnout = fc(rnnout, output_in_size, hidden_size, name='dec_out_fc1')
    rnnout = fc(rnnout, hidden_size, output_size, name='dec_out_fc2')

    softmax_out = layers.softmax(rnnout)

    return softmax_out
Пример #19
0
    def _calc_bow_logits(self, enc_out, bow_idx):
        """Get the logits of BoW task.

        The network may share weight with token embeddings.

        Args:
            enc_out: the output embeddings of Transformer, shape is [batch_size, max_seq_len, hidden_dim]
            bow_idx: the indices of prediction tokens, shape is [num_predictions, 1] or [num_predictions, 2].

        Returns:
            logits: the logits of prediction task, shape is [num_predictions, vocab_size].
        """
        if len(bow_idx.shape) == 2 and bow_idx.shape[1] == 1:
            enc_out = layers.squeeze(enc_out, [1])
            bow_feat = layers.gather(input=enc_out, index=bow_idx, overwrite=False)
        elif len(bow_idx.shape) == 2 and bow_idx.shape[1] == 2:
            bow_feat = layers.gather_nd(input=enc_out, index=bow_idx)
        else:
            raise ValueError(f"Invalid indices shape {bow_idx.shape} is used")

        bow_trans_feat = layers.fc(
            input=bow_feat,
            size=self.emb_size,
            act=self.hidden_act,
            param_attr=fluid.ParamAttr(
                name="bow_trans_fc.w_0",
                initializer=self.param_initializer),
            bias_attr="bow_trans_fc.b_0")

        bow_trans_feat = pre_process_layer(
            bow_trans_feat, self.post_cls_cmd, name="bow_trans")

        if self.weight_sharing:
            bow_logits = layers.matmul(
                x=bow_trans_feat,
                y=fluid.default_main_program().global_block().var(
                    self.token_emb_name),
                transpose_y=True)
            if self.cls_bias:
                bow_logits += layers.create_parameter(
                    shape=[self.vocab_size],
                    dtype=self.dtype,
                    attr=fluid.ParamAttr(name="bow_out_fc.b_0"),
                    is_bias=True)
        else:
            bow_out_bias_attr = "bow_out_fc.b_0" if self.cls_bias else False
            bow_logits = layers.fc(input=bow_trans_feat,
                                   size=self.vocab_size,
                                   param_attr=fluid.ParamAttr(
                                       name="bow_out_fc.w_0",
                                       initializer=self.param_initializer),
                                   bias_attr=bow_out_bias_attr)
        return bow_logits
Пример #20
0
def encoder_wrapper(x_emb,
                    vocab_size,
                    emb_size,
                    init_hidden=None,
                    init_cell=None,
                    para_name='',
                    args=None):
    """
    encoder_wrapper
    """
    rnn_input = x_emb
    rnn_outs = []
    rnn_outs_ori = []
    cells = []
    projs = []
    num_layers = 2
    for i in range(num_layers):
        if init_hidden and init_cell:
            h0 = layers.squeeze(layers.slice(init_hidden,
                                             axes=[0],
                                             starts=[i],
                                             ends=[i + 1]),
                                axes=[0])
            c0 = layers.squeeze(layers.slice(init_cell,
                                             axes=[0],
                                             starts=[i],
                                             ends=[i + 1]),
                                axes=[0])
        else:
            h0 = c0 = None
        rnn_out, cell, input_proj = lstmp_encoder(
            rnn_input, hidden_size, h0, c0,
            para_name + 'layer{}'.format(i + 1), emb_size, args)
        rnn_out_ori = rnn_out
        if i > 0:
            rnn_out = rnn_out + rnn_input
        rnn_out.stop_gradient = True
        rnn_outs.append(rnn_out)
        rnn_outs_ori.append(rnn_out_ori)
    return rnn_outs, rnn_outs_ori
Пример #21
0
    def forward(self, x):
        """Compute Conv1DTranspose by unsqueeze the input and squeeze the output.

        Args:
            x (Variable): shape(B, C_in, T_in), dtype float32, input of Conv1DTranspose.

        Returns:
            Variable: shape(B, C_out, T_out), dtype float32, output of Conv1DTranspose.
        """
        x = F.unsqueeze(x, [2])
        x = super(Conv1DTranspose, self).forward(x)  # maybe risky here
        x = F.squeeze(x, [2])
        return x
Пример #22
0
    def expand(self, batch, predicted, alpha):
        out = []
        time_steps = batch.shape[1]
        fertilities = predicted.numpy()
        batch = layers.squeeze(batch, [0])

        for i in range(time_steps):
            if fertilities[0, i] == 0:
                continue
            out.append(
                layers.expand(batch[i:i + 1, :], [int(fertilities[0, i]), 1]))
        out = layers.concat(out, axis=0)
        return out
Пример #23
0
def siamLSTM(tok_ids1, tok_ids2, len1, len2, conf):

    emb = fluid.ParamAttr('embedding',
                          initializer=fluid.initializer.UniformInitializer(
                              -0.1, 0.1))
    emb1 = layers.embedding(tok_ids1,
                            size=[conf['vocab_size'], conf['hidden_size']],
                            dtype='float32',
                            is_sparse=False,
                            param_attr=emb)
    emb2 = layers.embedding(tok_ids2,
                            size=[conf['vocab_size'], conf['hidden_size']],
                            dtype='float32',
                            is_sparse=False,
                            param_attr=emb)

    w = fluid.ParamAttr('lstm_w')
    b = fluid.ParamAttr('lstm_b')
    _, enc_out1, _ = fluid.contrib.layers.basic_lstm(emb1,
                                                     None,
                                                     None,
                                                     conf['hidden_size'],
                                                     sequence_length=len1,
                                                     param_attr=w,
                                                     bias_attr=b)
    _, enc_out2, _ = fluid.contrib.layers.basic_lstm(emb2,
                                                     None,
                                                     None,
                                                     conf['hidden_size'],
                                                     sequence_length=len2,
                                                     param_attr=w,
                                                     bias_attr=b)

    enc_out1 = layers.squeeze(enc_out1, [0])
    enc_out2 = layers.squeeze(enc_out2, [0])

    sim = layers.fc(enc_out1 * enc_out2, 2)

    return sim
Пример #24
0
    def forward(self):
        """Build the skipgram model.
        """
        initrange = 1.0 / self.config['embed_dim']
        embed_init = fluid.initializer.UniformInitializer(low=-initrange,
                                                          high=initrange)
        weight_init = fluid.initializer.TruncatedNormal(
            scale=1.0 / math.sqrt(self.config['embed_dim']))

        embed_src = fl.embedding(
            input=self.train_inputs,
            size=[self.num_nodes, self.config['embed_dim']],
            param_attr=fluid.ParamAttr(name='content', initializer=embed_init))

        weight_pos = fl.embedding(
            input=self.train_labels,
            size=[self.num_nodes, self.config['embed_dim']],
            param_attr=fluid.ParamAttr(name='weight', initializer=weight_init))

        weight_negs = fl.embedding(
            input=self.train_negs,
            size=[self.num_nodes, self.config['embed_dim']],
            param_attr=fluid.ParamAttr(name='weight', initializer=weight_init))

        pos_logits = fl.matmul(embed_src, weight_pos,
                               transpose_y=True)  # [batch_size, 1, 1]

        pos_score = fl.squeeze(pos_logits, axes=[1])
        pos_score = fl.clip(pos_score, min=-10, max=10)
        pos_score = -self.neg_num * fl.logsigmoid(pos_score)

        neg_logits = fl.matmul(embed_src, weight_negs,
                               transpose_y=True)  # [batch_size, 1, neg_num]
        neg_score = fl.squeeze(neg_logits, axes=[1])
        neg_score = fl.clip(neg_score, min=-10, max=10)
        neg_score = -1.0 * fl.logsigmoid(-1.0 * neg_score)
        neg_score = fl.reduce_sum(neg_score, dim=1, keep_dim=True)

        self.loss = fl.reduce_mean(pos_score + neg_score) / self.neg_num / 2
Пример #25
0
def sag_pool(gw, feature, ratio, graph_id, dataset, name, activation=L.tanh):
    """Implementation of self-attention graph pooling (SAGPool)

    This is an implementation of the paper SELF-ATTENTION GRAPH POOLING
    (https://arxiv.org/pdf/1904.08082.pdf)

    Args:
        gw: Graph wrapper object.

        feature: A tensor with shape (num_nodes, feature_size).

        ratio: The pooling ratio of nodes we want to select.

        graph_id: The graphs that the nodes belong to. 

        dataset: To differentiate FRANKENSTEIN dataset and other datasets.

        name: The name of SAGPool layer.
        
        activation: The activation function.

    Return:
        new_feature: A tensor with shape (num_nodes, feature_size), and the unselected
                     nodes' feature is masked by zero.

        ratio_length: The selected node numbers of each graph.

    """
    if dataset == "FRANKENSTEIN":
        gcn_ = gcn
    else:
        gcn_ = norm_gcn

    score = gcn_(gw=gw,
                 feature=feature,
                 hidden_size=1,
                 activation=None,
                 norm=gw.node_feat["norm"],
                 name=name)
    score = L.squeeze(score, axes=[])
    perm, ratio_length = topk_pool(gw, score, graph_id, ratio)

    mask = L.zeros_like(score)
    mask = L.cast(mask, dtype="float32")
    updates = L.ones_like(perm)
    updates = L.cast(updates, dtype="float32")
    mask = L.scatter(mask, perm, updates)
    new_feature = L.elementwise_mul(feature, mask, axis=0)
    temp_score = activation(score)
    new_feature = L.elementwise_mul(new_feature, temp_score, axis=0)
    return new_feature, ratio_length
Пример #26
0
    def create_rnn_op(self):
        x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim],
                        dtype='float32',
                        name='x',
                        append_batch_size=False)
        x.stop_gradient = False

        emb = layers.data(
            name='emb',
            shape=[self.sent_len, self.batch_size, self.input_dim],
            dtype='float32',
            append_batch_size=False)
        emb.stop_gradient = False

        w1 = layers.data(shape=[self.input_dim, self.input_dim],
                         dtype='float32',
                         name='w1',
                         append_batch_size=False)
        w1.stop_gradient = False
        w2 = layers.data(shape=[self.input_dim * 2, self.input_dim],
                         dtype='float32',
                         name='w2',
                         append_batch_size=False)
        w2.stop_gradient = False

        rnn = layers.StaticRNN()

        def dot_attention(query, memory):
            attn = layers.matmul(query, memory, transpose_y=True)
            weight = layers.softmax(attn)
            weight_memory = layers.matmul(weight, memory)

            return weight_memory, weight

        y = layers.matmul(emb, w1)
        with rnn.step():
            pre_h = rnn.memory(shape=(self.sent_len, self.input_dim),
                               batch_ref=x,
                               init_value=0.0)
            step_in = rnn.step_input(x)
            concat_in = layers.concat([step_in, pre_h], 1)
            new_h = layers.matmul(concat_in, w2)
            new_h = layers.unsqueeze(new_h, [1])
            new_h, _ = dot_attention(new_h, y)
            new_h = layers.squeeze(new_h, [1])

            rnn.update_memory(pre_h, new_h)
            rnn.step_output(new_h)

        return rnn()
Пример #27
0
    def forward(self, x):
        """Compute the upsampled condition.

        Args:
            x (Variable): shape(B, F, T), dtype float32, the condition (mel spectrogram here.) (F means the frequency bands). In the internal Conv2DTransposes, the frequency dimension is treated as `height` dimension instead of `in_channels`.

        Returns:
            Variable: shape(B, F, T * upscale_factor), dtype float32, the upsampled condition.
        """
        x = F.unsqueeze(x, axes=[1])
        for sublayer in self.upsample_convs:
            x = F.leaky_relu(sublayer(x), alpha=.4)
        x = F.squeeze(x, [1])
        return x
Пример #28
0
 def forward(self, hidden, encoder_output, encoder_padding_mask):
     # query = self.input_proj(hidden)
     encoder_output = self.input_proj(encoder_output)
     attn_scores = layers.matmul(
         layers.unsqueeze(hidden, [1]), encoder_output, transpose_y=True)
     if encoder_padding_mask is not None:
         attn_scores = layers.elementwise_add(attn_scores,
                                              encoder_padding_mask)
     attn_scores = layers.softmax(attn_scores)
     attn_out = layers.squeeze(
         layers.matmul(attn_scores, encoder_output), [1])
     attn_out = layers.concat([attn_out, hidden], 1)
     attn_out = self.output_proj(attn_out)
     return attn_out
Пример #29
0
    def pop(cls, stack_data, mask=True, in_place=True):
        """pop data in stack_data

        Args:
            stack_data (StackData): (data, pos) with shape ([batch_size, stack_len], [batch_size, 1])
            mask (bool): 是否 mask 空栈的返回值。默认为 True
            in_place (bool): 默认为 True

        Returns: (Variable1, Variable2)
            Variable1: pop 得到的值
                       dtype=stack_data.data.dtype
                       shape=[-1]
            Variable2: 对应位置的值是否合法。入参已经为空的栈,此处为 False。
                       dtype=bool
                       shape=[-1]
        Raises: NULL
        """
        data = stack_data.data
        pos = stack_data.pos

        # 只有非空的栈才能pop(才合法)
        valid_pos = layers.logical_not(cls.empty(stack_data))
        new_pos_delta = layers.cast(valid_pos, dtype=pos.dtype)
        new_pos = layers.elementwise_sub(pos, new_pos_delta)

        # shape = [batch_size]
        output = nn_utils.batch_gather(data, new_pos)
        # mask 空栈的返回值
        if mask:
            # shape = [batch_size, 1]
            mask_tag = layers.cast(
                new_pos_delta,
                dtype=data.dtype) if data.dtype != pos.dtype else new_pos_delta
            mask_tag = layers.squeeze(mask_tag, [1])
            output = layers.elementwise_mul(output, mask_tag)

        # 出栈后原位置置为0
        updates = layers.zeros_like(output)
        new_data = nn_utils.batch_scatter(data,
                                          new_pos,
                                          updates,
                                          overwrite=True,
                                          in_place=in_place)

        if in_place:
            layers.assign(new_pos, pos)
            return output, valid_pos, stack_data
        else:
            return output, valid_pos, StackData(new_data, new_pos)
Пример #30
0
def decode(args, s_arc, s_rel, mask):
    """Decode function"""
    mask = mask.numpy()
    lens = np.sum(mask, -1)
    # prevent self-loops
    arc_preds = layers.argmax(s_arc, -1).numpy()
    bad = [not utils.istree(seq[:i + 1]) for i, seq in zip(lens, arc_preds)]
    if args.tree and any(bad):
        arc_preds[bad] = utils.eisner(s_arc.numpy()[bad], mask[bad])
    arc_preds = dygraph.to_variable(arc_preds, zero_copy=False)
    rel_preds = layers.argmax(s_rel, axis=-1)
    # batch_size, seq_len, _ = rel_preds.shape
    rel_preds = nn.index_sample(rel_preds, layers.unsqueeze(arc_preds, -1))
    rel_preds = layers.squeeze(rel_preds, axes=[-1])
    return arc_preds, rel_preds