Exemplo n.º 1
0
    def sample_from_mog(self, y):
        """Sample from the output distribution where the output distribution is a mixture of Gaussians.
        Args:
            y (Variable): shape(B, T, C_output), dtype float32, the parameterd of the output distribution. It is the concatenation of 3 parts, the logits of every distribution, the mean of each distribution and the log standard deviation of each distribution. Each part's shape is (B, T, n_mixture), where `n_mixture` means the number of Gaussians in the mixture.

        Returns:
            Variable: shape(B, T), waveform sampled from the output distribution.
        """
        batch_size, time_steps, output_dim = y.shape
        n_mixture = output_dim // 3

        w, mu, log_std = F.split(y, 3, dim=-1)

        reshaped_w = F.reshape(w, (batch_size * time_steps, n_mixture))
        prob_ids = F.sampling_id(F.softmax(reshaped_w))
        prob_ids = F.reshape(prob_ids, (batch_size, time_steps))
        prob_ids = prob_ids.numpy()

        index = np.array([[[b, t, prob_ids[b, t]] for t in range(time_steps)]
                          for b in range(batch_size)]).astype("int32")
        index_var = dg.to_variable(index)

        mu_ = F.gather_nd(mu, index_var)
        log_std_ = F.gather_nd(log_std, index_var)

        dist = D.Normal(mu_, F.exp(log_std_))
        samples = dist.sample(shape=[])
        samples = F.clip(samples, min=-1., max=1.)
        return samples
Exemplo n.º 2
0
def beam_search_step(state, logits, eos_id, beam_width, is_first_step,
                     length_penalty):
    """logits.shape == [B*W, V]"""
    _, vocab_size = logits.shape

    bsz, beam_width = state.log_probs.shape
    onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size),
                        'int64')  #[1, V]

    probs = L.log(L.softmax(logits))  #[B*W, V]
    probs = mask_prob(probs, onehot_eos, state.finished)  #[B*W, V]
    allprobs = L.reshape(state.log_probs, [-1, 1]) + probs  #[B*W, V]

    not_finished = 1 - L.reshape(state.finished, [-1, 1])  #[B*W,1]
    not_eos = 1 - onehot_eos
    length_to_add = not_finished * not_eos  #[B*W,V]
    alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add

    allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size])
    alllen = L.reshape(alllen, [-1, beam_width * vocab_size])
    allscore = hyp_score(allprobs, alllen, length_penalty)
    if is_first_step:
        allscore = L.reshape(
            allscore,
            [bsz, beam_width, -1])[:, 0, :]  # first step only consiter beam 0
    scores, idx = L.topk(allscore, k=beam_width)  #[B, W]
    next_beam_id = idx // vocab_size  #[B, W]
    next_word_id = idx % vocab_size

    gather_idx = L.concat([L.where(idx != -1)[:, :1],
                           L.reshape(idx, [-1, 1])], 1)
    next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape)
    next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape)

    gather_idx = L.concat(
        [L.where(next_beam_id != -1)[:, :1],
         L.reshape(next_beam_id, [-1, 1])], 1)
    next_finished = L.reshape(
        L.gather_nd(state.finished, gather_idx), state.finished.shape
    )  #[gather new beam state according to new beam id]
    #log.debug(gather_idx.numpy())
    #log.debug(state.finished.numpy())
    #log.debug(next_finished.numpy())

    next_finished += L.cast(next_word_id == eos_id, 'int64')
    next_finished = L.cast(next_finished > 0, 'int64')

    #log.debug(next_word_id.numpy())
    #log.debug(next_beam_id.numpy())
    next_state = BeamSearchState(log_probs=next_probs,
                                 lengths=next_len,
                                 finished=next_finished)
    output = BeamSearchOutput(scores=scores,
                              predicted_ids=next_word_id,
                              beam_parent_ids=next_beam_id)

    return output, next_state
Exemplo n.º 3
0
def beam_search_step(state, logits, eos_id, beam_width, is_first_step,
                     length_penalty):
    """logits.shape == [B*W, V]"""
    beam_size, vocab_size = logits.shape  # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size
    logits_np = logits.numpy()
    for i in range(beam_size):
        logits_np[i][17963] = 0  # make [UNK] prob = 0
    logits = D.to_variable(logits_np)

    bsz, beam_width = state.log_probs.shape
    onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size),
                        'int64')  #[1, V]

    probs = L.log(L.softmax(logits))  #[B*W, V]
    probs = mask_prob(probs, onehot_eos, state.finished)  #[B*W, V]
    allprobs = L.reshape(state.log_probs, [-1, 1]) + probs  #[B*W, V]

    not_finished = 1 - L.reshape(state.finished, [-1, 1])  #[B*W,1]
    not_eos = 1 - onehot_eos
    length_to_add = not_finished * not_eos  #[B*W,V]
    alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add

    allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size])
    alllen = L.reshape(alllen, [-1, beam_width * vocab_size])
    allscore = hyp_score(allprobs, alllen, length_penalty)
    if is_first_step:
        allscore = L.reshape(
            allscore,
            [bsz, beam_width, -1])[:, 0, :]  # first step only consiter beam 0
    scores, idx = L.topk(allscore, k=beam_width)  #[B, W]
    next_beam_id = idx // vocab_size  #[B, W]
    next_word_id = idx % vocab_size

    gather_idx = L.concat([L.where(idx != -1)[:, :1],
                           L.reshape(idx, [-1, 1])], 1)
    next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape)
    next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape)

    gather_idx = L.concat(
        [L.where(next_beam_id != -1)[:, :1],
         L.reshape(next_beam_id, [-1, 1])], 1)
    next_finished = L.reshape(
        L.gather_nd(state.finished, gather_idx), state.finished.shape
    )  #[gather new beam state according to new beam id]

    next_finished += L.cast(next_word_id == eos_id, 'int64')
    next_finished = L.cast(next_finished > 0, 'int64')

    next_state = BeamSearchState(log_probs=next_probs,
                                 lengths=next_len,
                                 finished=next_finished)
    output = BeamSearchOutput(scores=scores,
                              predicted_ids=next_word_id,
                              beam_parent_ids=next_beam_id)

    return output, next_state
Exemplo n.º 4
0
    def fast_nms(self, boxes, scores, masks, max_num_detections=100):
        iou_threshold = self.nms_thresh
        top_k = self.top_k

        # 同类方框根据得分降序排列
        scores, idx = P.argsort(scores, axis=1, descending=True)

        idx = idx[:, :top_k]
        scores = scores[:, :top_k]

        num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1]

        idx = P.reshape(idx, (-1, ))
        boxes = P.gather(boxes, idx)
        boxes = P.reshape(boxes, (num_classes, num_dets, 4))
        masks = P.gather(masks, idx)
        masks = P.reshape(masks, (num_classes, num_dets, -1))

        # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU
        iou = jaccard(boxes, boxes)
        # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵
        # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0
        rows = P.range(0, num_dets, 1, 'int32')
        cols = P.range(0, num_dets, 1, 'int32')
        rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1])
        cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets])
        tri_mask = P.cast(rows > cols, 'float32')
        tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)),
                            [num_classes, 1, 1])
        iou = tri_mask * iou
        iou_max = P.reduce_max(iou, dim=1)

        # Now just filter out the ones higher than the threshold
        keep = P.where(iou_max <= iou_threshold)

        # Assign each kept detection to its corresponding class
        classes = P.range(0, num_classes, 1, 'int32')
        classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets])
        classes = P.gather_nd(classes, keep)

        boxes = P.gather_nd(boxes, keep)
        masks = P.gather_nd(masks, keep)
        scores = P.gather_nd(scores, keep)

        # Only keep the top cfg.max_num_detections highest scores across all classes
        scores, idx = P.argsort(scores, axis=0, descending=True)
        idx = idx[:max_num_detections]
        scores = scores[:max_num_detections]

        classes = P.gather(classes, idx)
        boxes = P.gather(boxes, idx)
        masks = P.gather(masks, idx)

        return boxes, masks, classes, scores
Exemplo n.º 5
0
def fast_nms(boxes, scores, conf_thresh, nms_thresh, keep_top_k, nms_top_k):
    '''
    :param boxes:    [?, 4]
    :param scores:   [80, ?]
    '''

    # 同类方框根据得分降序排列
    scores, idx = P.argsort(scores, axis=1, descending=True)

    idx = idx[:, :keep_top_k]
    scores = scores[:, :keep_top_k]

    num_classes, num_dets = P.shape(idx)[0], P.shape(idx)[1]

    idx = P.reshape(idx, (-1, ))
    boxes = P.gather(boxes, idx)
    boxes = P.reshape(boxes, (num_classes, num_dets, 4))

    # 计算一个c×n×n的IOU矩阵,其中每个n×n矩阵表示对该类n个候选框,两两之间的IOU
    iou = _iou(boxes, boxes)

    # 因为自己与自己的IOU=1,IOU(A,B)=IOU(B,A),所以对上一步得到的IOU矩阵
    # 进行一次处理。具体做法是将每一个通道,的对角线元素和下三角部分置为0
    rows = P.range(0, num_dets, 1, 'int32')
    cols = P.range(0, num_dets, 1, 'int32')
    rows = P.expand(P.reshape(rows, (1, -1)), [num_dets, 1])
    cols = P.expand(P.reshape(cols, (-1, 1)), [1, num_dets])
    tri_mask = P.cast(rows > cols, 'float32')
    tri_mask = P.expand(P.reshape(tri_mask, (1, num_dets, num_dets)),
                        [num_classes, 1, 1])
    iou = tri_mask * iou
    iou_max = P.reduce_max(iou, dim=1)

    # 同一类别,n个框与“分数比它高的框”的最高iou超过nms_thresh的话,就丢弃。下标是0的框肯定被保留。
    keep = P.where(iou_max <= nms_thresh)

    # Assign each kept detection to its corresponding class
    classes = P.range(0, num_classes, 1, 'int32')
    classes = P.expand(P.reshape(classes, (-1, 1)), [1, num_dets])
    classes = P.gather_nd(classes, keep)

    boxes = P.gather_nd(boxes, keep)
    scores = P.gather_nd(scores, keep)

    # Only keep the top cfg.max_num_detections highest scores across all classes
    scores, idx = P.argsort(scores, axis=0, descending=True)
    idx = idx[:nms_top_k]
    scores = scores[:nms_top_k]

    classes = P.gather(classes, idx)
    boxes = P.gather(boxes, idx)

    return boxes, scores, classes
Exemplo n.º 6
0
    def _get_pooled_output(self, enc_out, idx=None, name="pooled"):
        """Get pooled output of the last output embedding in Transformer.

        Args:
            enc_out: the output embeddings of Transformer, shape is [batch_size, max_seq_len, hidden_size]
            idx (optional): the selected indices in pooling operator, shape is [batch_size, 1] or [batch_size, 2].
            name: a string, the name of the pooling layer.

        Returns:
            pooled_out: the pooled output embedding, shape is [batch_size, hidden_size].
        """
        if idx is None:
            feat = enc_out[:, 0]
        elif len(idx.shape) == 2 and idx.shape[1] == 1:
            enc_out = layers.squeeze(enc_out, [1])
            feat = layers.gather(input=enc_out, index=idx)
        elif len(idx.shape) == 2 and idx.shape[1] == 2:
            feat = layers.gather_nd(input=enc_out, index=idx)
        else:
            raise ValueError(f"Invalid indices shape {idx.shape} is used")

        pooled_out = layers.fc(
            input=feat,
            size=self.hidden_size,
            act="tanh",
            param_attr=fluid.ParamAttr(name=f"{name}_fc.w_0", initializer=self.param_initializer),
            bias_attr=f"{name}_fc.b_0")
        return pooled_out
Exemplo n.º 7
0
def no_nms(bboxes,
           scores,
           score_threshold,
           keep_top_k):
    scores = L.transpose(scores, [1, 0])
    inds = L.where(scores > score_threshold)
    if len(inds) == 0:
        return L.zeros((0, 6), 'float32') - 1.0

    cate_scores = L.gather_nd(scores, inds)
    cate_labels = inds[:, 1]
    bboxes = L.gather(bboxes, inds[:, 0])

    # sort and keep top keep_top_k
    _, sort_inds = L.argsort(cate_scores, descending=True)
    if keep_top_k > 0 and len(sort_inds) > keep_top_k:
        sort_inds = sort_inds[:keep_top_k]
    bboxes = L.gather(bboxes, sort_inds)
    cate_scores = L.gather(cate_scores, sort_inds)
    cate_labels = L.gather(cate_labels, sort_inds)

    cate_scores = L.unsqueeze(cate_scores, 1)
    cate_labels = L.unsqueeze(cate_labels, 1)
    cate_labels = L.cast(cate_labels, 'float32')
    pred = L.concat([cate_labels, cate_scores, bboxes], 1)

    return pred
Exemplo n.º 8
0
def masked_select(input, mask):
    """Select the input value according to the mask
    
    Arags:
        input: input matrix
        mask: mask matrix

    Returns:
        output

    >>> input
    [
        [1, 2, 3],
        [4, 5, 6]
    ]
    >>> mask
    [
        [True, True, False],
        [True, False, False]
    ]
    >>> masked_select(input, mask)
    [1, 2, 4]
    """
    select = layers.where(mask)
    output = layers.gather_nd(input, select)
    return output
Exemplo n.º 9
0
 def forward(self, src_ids, *args, **kwargs):
     tgt_labels = kwargs.pop('tgt_labels', None)
     tgt_pos = kwargs.pop('tgt_pos', None)
     encode_only = kwargs.pop('encode_only', False)
     _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs)
     #log.debug('hidden_-1 %r'% L.reduce_mean(info['hiddens'][0]).numpy())
     #log.debug('hidden_0 %r'% L.reduce_mean(info['hiddens'][1]).numpy())
     if encode_only:
         return None, None, info
     elif tgt_labels is None:
         encoded = self.mlm(encoded)
         encoded = self.mlm_ln(encoded)
         logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias
         output_ids = L.argmax(logits, -1)
         return output_ids, logits, info
     else:
         encoded_2d = L.gather_nd(encoded, tgt_pos)
         #log.debug('input shape %s' % repr(src_ids.shape))
         #log.debug(L.gather_nd(src_ids, tgt_pos).numpy())
         encoded_2d = self.mlm(encoded_2d)
         encoded_2d = self.mlm_ln(encoded_2d)
         logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias
         if len(tgt_labels.shape) == 1:
             tgt_labels = L.reshape(tgt_labels, [-1, 1])
         
         loss = L.reduce_mean(
                 L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))
                 )
         return loss, logits_2d, info
def batch_scatter(ref, indices, updates, in_place=False, overwrite=False):
    """Scatter updates to ref, according to corrensponding index in indices
    in each batch. Currently, it only support 2d Tensor.

    Args:
        ref (Variable): with shape [batch_size, ...]
        indices (Variable): with shape [batch_size, 1]
        updates (Variable): with shape [batch_size]
        in_place (bool): if True, scatter result will be assign to ref. otherwise,
                         a new Tensor will be returned. Default is False.
        overwrite (bool): if True, scatter will over write corrensponding elements.
                          Default is False.

    Returns: TODO

    Raises: NULL

    Examples:
        ref
            [[1, 1, 1],
             [1, 1, 1]]
        indices
            [[2], [1]]
        updates
            [2, 3]

        return
            [[1, 1, 2],
             [1, 3, 1]]

    """
    ref_dtype = ref.dtype
    if ref_dtype not in PaddleVarType.floats:
        ref_in = layers.cast(ref, dtype='float32')
    else:
        ref_in = ref

    if updates.dtype != ref_in.dtype:
        updates = layers.cast(updates, dtype=ref_in.dtype)

    batch_size = layers.cast(layers.shape(ref_in)[0], dtype=indices.dtype)
    zero = layers.fill_constant(shape=[1], dtype=indices.dtype, value=0)
    one = layers.fill_constant(shape=[1], dtype=indices.dtype, value=1)
    batch_indices = layers.unsqueeze(
        layers.range(zero, batch_size, one, dtype=indices.dtype), [1])
    coord = layers.concat([batch_indices, indices], axis=1)
    if overwrite:
        mask = layers.gather_nd(ref_in, coord)
        mask = layers.elementwise_sub(layers.zeros_like(mask), mask)
        ref_in = layers.scatter_nd_add(ref_in, coord, mask)

    output = layers.scatter_nd_add(ref_in, coord, updates)
    if ref_dtype not in PaddleVarType.floats:
        output = layers.cast(output, dtype=ref_dtype)
    if in_place:
        layers.assign(output, ref)
        return ref
    else:
        return output
Exemplo n.º 11
0
 def forward(self, src_ids, *args, **kwargs):
     pooled, encoded = ErnieModel.forward(self, src_ids, *args, **kwargs)
     encoded_2d = L.gather_nd(encoded, L.where(src_ids == mask_id))
     encoded_2d = self.mlm(encoded_2d)
     encoded_2d = self.mlm_ln(encoded_2d)
     logits_2d = L.matmul(
         encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias
     return logits_2d
Exemplo n.º 12
0
def seq_gather(seq, idxs):
    """seq是[None, seq_len, s_size]的格式,
    idxs是[None, 1]的格式,
    在seq的第i个序列中选出第idxs[i]个向量,
    最终输出[None, s_size]的向量。
    """
    idxs = layers.cast(idxs, dtype="int32")
    batch_idxs = layers.arange(0, seq.shape[0], dtype="int32")
    batch_idxs = layers.unsqueeze(batch_idxs, 1)
    idxs = layers.concat([batch_idxs, idxs], 1)
    return layers.gather_nd(seq, idxs)
Exemplo n.º 13
0
    def _calc_logits(self, enc_out, tgt_idx=None, name=""):
        """Get the logits of generation task.

        The network may share weight with token embeddings.

        Args:
            enc_out: the output embeddings of Transformer, shape is [batch_size, max_seq_len, hidden_size]
            tgt_idx (optional): the indices of prediction tokens, shape is [num_predictions, 2].

        Returns:
            logits: the logits of prediction task, shape is [num_predictions, vocab_size].
        """
        if tgt_idx is None:
            seq_feat = layers.reshape(x=enc_out, shape=[-1, self.hidden_size])
        elif len(tgt_idx.shape) == 2 and tgt_idx.shape[1] == 2:
            seq_feat = layers.gather_nd(input=enc_out, index=tgt_idx)
        else:
            raise ValueError(f"Invalid indices shape {tgt_idx.shape} is used")

        seq_trans_feat = layers.fc(
            input=seq_feat,
            size=self.emb_size,
            act=self.hidden_act,
            param_attr=fluid.ParamAttr(
                name="mask_lm_trans_fc.w_0",
                initializer=self.param_initializer),
            bias_attr="mask_lm_trans_fc.b_0")

        seq_trans_feat = pre_process_layer(
            seq_trans_feat, self.post_cls_cmd, name="mask_lm_trans")

        if self.weight_sharing:
            logits = layers.matmul(
                x=seq_trans_feat,
                y=fluid.default_main_program().global_block().var(
                    name + self.token_emb_name),
                transpose_y=True)
            if self.cls_bias:
                logits += layers.create_parameter(
                    shape=[self.vocab_size],
                    dtype=self.dtype,
                    attr=fluid.ParamAttr(name="mask_lm_out_fc.b_0"),
                    is_bias=True)
        else:
            seq_out_bias_attr = "mask_lm_out_fc.b_0" if self.cls_bias else False
            logits = layers.fc(
                input=seq_trans_feat,
                size=self.vocab_size,
                param_attr=fluid.ParamAttr(
                    name="mask_lm_out_fc.w_0",
                    initializer=self.param_initializer),
                bias_attr=seq_out_bias_attr)
        return logits
Exemplo n.º 14
0
    def _calc_bow_logits(self, enc_out, bow_idx):
        """Get the logits of BoW task.

        The network may share weight with token embeddings.

        Args:
            enc_out: the output embeddings of Transformer, shape is [batch_size, max_seq_len, hidden_dim]
            bow_idx: the indices of prediction tokens, shape is [num_predictions, 1] or [num_predictions, 2].

        Returns:
            logits: the logits of prediction task, shape is [num_predictions, vocab_size].
        """
        if len(bow_idx.shape) == 2 and bow_idx.shape[1] == 1:
            enc_out = layers.squeeze(enc_out, [1])
            bow_feat = layers.gather(input=enc_out, index=bow_idx, overwrite=False)
        elif len(bow_idx.shape) == 2 and bow_idx.shape[1] == 2:
            bow_feat = layers.gather_nd(input=enc_out, index=bow_idx)
        else:
            raise ValueError(f"Invalid indices shape {bow_idx.shape} is used")

        bow_trans_feat = layers.fc(
            input=bow_feat,
            size=self.emb_size,
            act=self.hidden_act,
            param_attr=fluid.ParamAttr(
                name="bow_trans_fc.w_0",
                initializer=self.param_initializer),
            bias_attr="bow_trans_fc.b_0")

        bow_trans_feat = pre_process_layer(
            bow_trans_feat, self.post_cls_cmd, name="bow_trans")

        if self.weight_sharing:
            bow_logits = layers.matmul(
                x=bow_trans_feat,
                y=fluid.default_main_program().global_block().var(
                    self.token_emb_name),
                transpose_y=True)
            if self.cls_bias:
                bow_logits += layers.create_parameter(
                    shape=[self.vocab_size],
                    dtype=self.dtype,
                    attr=fluid.ParamAttr(name="bow_out_fc.b_0"),
                    is_bias=True)
        else:
            bow_out_bias_attr = "bow_out_fc.b_0" if self.cls_bias else False
            bow_logits = layers.fc(input=bow_trans_feat,
                                   size=self.vocab_size,
                                   param_attr=fluid.ParamAttr(
                                       name="bow_out_fc.w_0",
                                       initializer=self.param_initializer),
                                   bias_attr=bow_out_bias_attr)
        return bow_logits
    def _birnn_encoder(self, inputs, input_len, name_lens, name_pos,
                       name_tok_len):
        """forward

        Args:
            inputs (Variable): shape=[batch_size, max_seq_len, hidden_size]
            input_len (Variable): shape=[batch_size]
            name_lens (Variable): shape=[batch_size]
            name_pos (Variable): shape=[batch_size, max_name_len, max_tokens]
            name_tok_len (Variable): shape=[batch_size, max_name_len]

        Returns: TODO

        Raises: NULL

        """
        rnn_output, rnn_final_state = self._rnn_encoder.forward(
            inputs, input_len)

        max_name_len = name_pos.shape[1]
        name_begin = name_pos[:, :, 0]

        name_repr_mask = layers.sequence_mask(name_lens,
                                              max_name_len,
                                              dtype=name_tok_len.dtype)
        len_delta = layers.elementwise_mul(name_tok_len - 1,
                                           name_repr_mask,
                                           axis=0)
        name_end = name_begin + len_delta

        if self._bidirectional:
            name_fwd_repr_gathered = nn_utils.batch_gather_2d(
                rnn_output, name_end)[:, :, :self._hidden_size]
            name_bwd_repr_gathered = nn_utils.batch_gather_2d(
                rnn_output, name_begin)[:, :, self._hidden_size:]
            name_repr_gathered = layers.concat(
                input=[name_fwd_repr_gathered, name_bwd_repr_gathered],
                axis=-1)
            new_hidden_size = self._hidden_size * 2
        else:
            name_repr_gathered = layers.gather_nd(rnn_output, name_end)
            new_hidden_size = self._hidden_size

        name_repr_tmp = layers.reshape(
            name_repr_gathered, shape=[-1, max_name_len, new_hidden_size])
        name_repr_mask = layers.cast(name_repr_mask, dtype=name_repr_tmp.dtype)
        name_repr = layers.elementwise_mul(name_repr_tmp,
                                           name_repr_mask,
                                           axis=0)

        return name_repr, None
Exemplo n.º 16
0
def matrix_nms(bboxes,
               scores,
               score_threshold,
               post_threshold,
               nms_top_k,
               keep_top_k,
               use_gaussian=False,
               gaussian_sigma=2.):
    scores = L.transpose(scores, [1, 0])
    inds = L.where(scores > score_threshold)
    if len(inds) == 0:
        return L.zeros((0, 6), 'float32') - 1.0

    cate_scores = L.gather_nd(scores, inds)
    cate_labels = inds[:, 1]
    bboxes = L.gather(bboxes, inds[:, 0])

    # sort and keep top nms_top_k
    _, sort_inds = L.argsort(cate_scores, descending=True)
    if nms_top_k > 0 and len(sort_inds) > nms_top_k:
        sort_inds = sort_inds[:nms_top_k]
    bboxes = L.gather(bboxes, sort_inds)
    cate_scores = L.gather(cate_scores, sort_inds)
    cate_labels = L.gather(cate_labels, sort_inds)

    # Matrix NMS
    kernel = 'gaussian' if use_gaussian else 'linear'
    cate_scores = _matrix_nms(bboxes, cate_labels, cate_scores, kernel=kernel, sigma=gaussian_sigma)

    # filter.
    keep = L.where(cate_scores >= post_threshold)
    if len(keep) == 0:
        return L.zeros((0, 6), 'float32') - 1.0
    bboxes = L.gather(bboxes, keep)
    cate_scores = L.gather(cate_scores, keep)
    cate_labels = L.gather(cate_labels, keep)

    # sort and keep keep_top_k
    _, sort_inds = L.argsort(cate_scores, descending=True)
    if len(sort_inds) > keep_top_k:
        sort_inds = sort_inds[:keep_top_k]
    bboxes = L.gather(bboxes, sort_inds)
    cate_scores = L.gather(cate_scores, sort_inds)
    cate_labels = L.gather(cate_labels, sort_inds)

    cate_scores = L.unsqueeze(cate_scores, 1)
    cate_labels = L.unsqueeze(cate_labels, 1)
    cate_labels = L.cast(cate_labels, 'float32')
    pred = L.concat([cate_labels, cate_scores, bboxes], 1)

    return pred
def batch_gather(var, indices):
    """Gather slices from var in each batch, according to corrensponding
    index in indices. Currently, it only support 2d Tensor.

    Args:
        var (Variable): with shape [batch_size, ...]
        indices (Variable): with shape [batch_size, 1] or [batch_size]

    Returns: Variable with shape [batch_size]

    Raises: NULL

    Examples:
        var
            [[1, 2, 3],
             [4, 5, 6]]
        indices
            [[2], [1]]

        return
            [[3], [5]]

    """
    if len(indices.shape) >= 2 and indices.shape[-1] != 1:
        raise ValueError(
            'shape of indices error. it should be a 1-D layers, or a 2-D layers which '
            'the 2nd dimension is 1. but got shape = %s' %
            (str(indices.shape), ))

    if len(indices.shape) == 1:
        indices = layers.reshape(indices, shape=[-1, 1])

    reshape_input = len(var.shape) == 1
    if reshape_input:
        var = PaddleFluidWrapper.reshape(var, shape=[-1, 1])

    batch_size = layers.cast(layers.shape(indices)[0], dtype=indices.dtype)
    zero = layers.fill_constant(shape=[1], dtype=indices.dtype, value=0)
    one = layers.fill_constant(shape=[1], dtype=indices.dtype, value=1)
    batch_indices = layers.unsqueeze(
        layers.range(zero, batch_size, one, dtype=indices.dtype), [1])

    coord = layers.concat([batch_indices, indices], axis=1)
    coord.stop_gradient = True
    output = layers.gather_nd(var, coord)
    if reshape_input:
        output = PaddleFluidWrapper.reshape(output, shape=[-1])
    return output
Exemplo n.º 18
0
def index_sample(x, index):
    """Select input value according to index
    
    Arags:
        input: input matrix
        index: index matrix

    Returns:
        output

    >>> input
    [
        [1, 2, 3],
        [4, 5, 6]
    ]
    >>> index
    [
        [1, 2],
        [0, 1]
    ]
    >>> index_sample(input, index)
    [
        [2, 3],
        [4, 5]
    ]
    """
    x_s = x.shape
    dim = len(index.shape) - 1
    assert x_s[:dim] == index.shape[:dim]
    r_x = layers.reshape(x, shape=(-1, *x_s[dim:]))
    index = layers.reshape(index, shape=(index.shape[0], index.shape[1], 1))
    # generate arange index, shape like index
    # arr_index = layers.arange(start=0, end=layers.cast(layers.shape(x)[0], ), dtype=index.dtype)
    batch_size = layers.cast(layers.shape(index)[0], dtype=index.dtype)
    zero = layers.fill_constant(shape=[1], dtype=index.dtype, value=0)
    one = layers.fill_constant(shape=[1], dtype=index.dtype, value=1)
    arr_index = layers.unsqueeze(
        layers.range(zero, batch_size, one, dtype=index.dtype), [1, 2])

    arr_index = layers.expand_as(arr_index, index)
    #  genrate new index
    new_index = layers.concat([arr_index, index], -1)
    new_index = layers.reshape(new_index, (-1, 2))
    # get output
    out = layers.gather_nd(r_x, new_index)
    out = layers.reshape(out, (-1, x_s[-1] * 2))
    return out
def batch_gather_2d(var, indices):
    """Gather slices from var in each batch, according to corrensponding
    index in indices. Currently, it only support 2d Tensor.

    Args:
        var (Variable): with shape [batch_size, ...]
        indices (Variable): with shape [batch_size, max_len]

    Returns: Variable with shape [batch_size]

    Raises: NULL

    Examples:
        var
            [[1, 2, 3],
             [4, 5, 6]]
        indices
            [[2, 0], [1, 2]]

        return
            [[3, 1], [5, 6]]

    """
    if len(indices.shape) != 2:
        raise ValueError('shape of indices error. it should be a 2-D layers. '
                         'but got shape = %s' % (str(indices.shape), ))

    batch_size = layers.shape(indices)[0]

    zero = layers.fill_constant(shape=[1], dtype=indices.dtype, value=0)
    one = layers.fill_constant(shape=[1], dtype=indices.dtype, value=1)
    end = layers.cast(batch_size, dtype=indices.dtype)
    batch_indices_1d = layers.unsqueeze(
        layers.range(zero, end, one, dtype=indices.dtype), [1])

    seq_len = indices.shape[1]
    batch_indices = layers.expand(batch_indices_1d, [1, seq_len])

    coord_2d = layers.concat(
        [layers.unsqueeze(batch_indices, [2]),
         layers.unsqueeze(indices, [2])],
        axis=2)
    coord_2d.stop_gradient = True
    coord_1d = layers.reshape(coord_2d, shape=[-1, 2])
    output_1d = layers.gather_nd(var, coord_1d)
    output_2d = layers.reshape(output_1d, [batch_size, seq_len, var.shape[-1]])
    return output_2d
Exemplo n.º 20
0
    def forward(self, *args, **kwargs):
        """
        Args
            tgt_labels(`Variable` of shape [batch_size, seqlen] or [batch, seqlen, vocab_size]):
                ground trouth target sequence id (hard label) or distribution (soft label)
            tgt_pos(`Variable` of shape [n_targets, 2]):
                index of tgt_labels in `src_ids`, can be obtained from `fluid.layers.where(src_ids==mask_id)`
            encoder_only(Bool):
                if set, will not return loss, logits_2d
        Returns:
            loss(`Variable` of shape []):
                cross entropy loss mean over every target label. if `encode_only`, returns None.
            logits(`Variable` of shape [n_targets, vocab_size]):
                logits for every targets. if `encode_only`, returns None.
            info(Dictionary): see `ErnieModel`
        """
        tgt_labels = kwargs.pop('tgt_labels', None)
        tgt_pos = kwargs.pop('tgt_pos', None)
        encode_only = kwargs.pop('encode_only', False)
        _, encoded, info = ErnieModel.forward(self, *args, **kwargs)
        if encode_only:
            return None, None, info
        elif tgt_labels is None or tgt_pos is None:
            encoded = self.mlm(encoded)
            encoded = self.mlm_ln(encoded)
            logits = L.matmul(encoded, self.word_emb.weight,
                              transpose_y=True) + self.mlm_bias
            output_ids = L.argmax(logits, -1)
            return output_ids, logits, info
        else:
            encoded_2d = L.gather_nd(encoded, tgt_pos)
            encoded_2d = self.mlm(encoded_2d)
            encoded_2d = self.mlm_ln(encoded_2d)
            logits_2d = L.matmul(encoded_2d,
                                 self.word_emb.weight,
                                 transpose_y=True) + self.mlm_bias
            if len(tgt_labels.shape) == 1:
                tgt_labels = L.reshape(tgt_labels, [-1, 1])

            loss = L.reduce_mean(
                L.softmax_with_cross_entropy(
                    logits_2d,
                    tgt_labels,
                    soft_label=(tgt_labels.shape[-1] != 1)))
            return loss, logits_2d, info
Exemplo n.º 21
0
def index_sample(x, index):
    """Select input value according to index
    
    Arags:
        input: input matrix
        index: index matrix

    Returns:
        output

    >>> input
    [
        [1, 2, 3],
        [4, 5, 6]
    ]
    >>> index
    [
        [1, 2],
        [0, 1]
    ]
    >>> index_sample(input, index)
    [
        [2, 3],
        [4, 5]
    ]
    """
    x_s = x.shape
    dim = len(index.shape) - 1
    assert x_s[:dim] == index.shape[:dim]
    r_x = layers.reshape(x, shape=(-1, *x_s[dim:]))
    index = layers.reshape(index, shape=(len(r_x), -1, 1))
    # generate arange index, shape like index
    arr_index = layers.arange(start=0, end=len(index), dtype=index.dtype)
    arr_index = layers.unsqueeze(arr_index, axes=[1, 2])
    arr_index = layers.expand_as(arr_index, index)
    #  genrate new index
    new_index = layers.concat((arr_index, index), -1)
    new_index = layers.reshape(new_index, (-1, 2))
    # get output
    out = layers.gather_nd(r_x, new_index)
    out = layers.reshape(out, (*x_s[:dim], -1))
    return out
def build_and_run_program(place, batch_size, beam_size, stop_gradient=False):
    fluid.default_startup_program().random_seed = 1
    fluid.default_main_program().random_seed = 1
    np.random.seed(2)

    x = layers.assign(
        np.random.rand(batch_size, beam_size, 32).astype("float32"))
    indices = fluid.data(shape=[None, beam_size], dtype="int64", name="indices")
    step_idx = layers.fill_constant(
        shape=[1], dtype="int64", value=0, force_cpu=True)
    max_len = layers.fill_constant(
        shape=[1], dtype="int64", value=10, force_cpu=True)
    cond = layers.less_than(x=step_idx, y=max_len)
    while_op = layers.While(cond)
    scores = layers.array_write(x, step_idx)
    with while_op.block():
        bs = layers.cast(layers.shape(x)[0], "int64")
        for _ in range(20):
            bs = layers.cast(bs, 'int64')
        bs.stop_gradient = stop_gradient
        batch_pos = layers.expand(
            layers.unsqueeze(
                layers.range(
                    0, bs, 1, dtype=bs.dtype), [1]), [1, beam_size])
        topk_coordinates = layers.stack([batch_pos, indices], axis=2)
        topk_coordinates.stop_gradient = stop_gradient
        score = layers.gather_nd(x, topk_coordinates)
        layers.increment(x=step_idx, value=1.0, in_place=True)
        layers.array_write(score, i=step_idx, array=scores)
        length_cond = layers.less_than(x=step_idx, y=max_len)
        layers.assign(length_cond, cond)

    out = layers.tensor_array_to_tensor(scores, axis=0, use_stack=True)[0]
    loss = layers.reduce_mean(out)
    opt = fluid.optimizer.Adam(0.01)
    opt.minimize(loss)
    exe = fluid.Executor(place)
    data = np.random.random_integers(
        low=0, high=beam_size - 1, size=(batch_size, beam_size)).astype("int64")
    loss_val, = exe.run(feed={"indices": data}, fetch_list=[loss])

    return loss_val
Exemplo n.º 23
0
    def forward(self, indices, speaker_position_rate=None):
        """
        Args:
            indices (Variable): shape (B, T), dtype: int64, position
                indices, where B means the batch size, T means the time steps.
            speaker_position_rate (Variable | float, optional), position
                rate. It can be a float point number or a Variable with 
                shape (1,), then this speaker_position_rate is used for every 
                example. It can also be a Variable with shape (B, ), which 
                contains a speaker position rate for each utterance.
        Returns:
            out (Variable): shape(B, T, C_pos), dtype float32, position embedding, where C_pos 
                means position embedding size.
        """
        batch_size, time_steps = indices.shape

        # convert speaker_position_rate to a Variable with shape(B, )
        if isinstance(speaker_position_rate, float):
            speaker_position_rate = dg.to_variable(
                np.array([speaker_position_rate]).astype("float32"))
            speaker_position_rate = F.expand(speaker_position_rate,
                                             [batch_size])
        elif isinstance(speaker_position_rate, fluid.framework.Variable) \
            and list(speaker_position_rate.shape) == [1]:
            speaker_position_rate = F.expand(speaker_position_rate,
                                             [batch_size])
        assert len(speaker_position_rate.shape) == 1 and \
            list(speaker_position_rate.shape) == [batch_size]

        weight = compute_position_embedding(self.weight,
                                            speaker_position_rate)  # (B, V, C)
        # make indices for gather_nd
        batch_id = F.expand(
            F.unsqueeze(
                F.range(
                    0, batch_size, 1, dtype="int64"), [1]), [1, time_steps])
        # (B, T, 2)
        gather_nd_id = F.stack([batch_id, indices], -1)

        out = F.gather_nd(weight, gather_nd_id)
        return out
Exemplo n.º 24
0
    def forward(self, *args, **kwargs):
        """
        Args:
            nsp_labels (optional, `Variable` of shape [batch_size]): 
                labels for `next sentence prediction` tasks
            mlm_pos (optional, `Variable` of shape [n_mask, 2]): 
                index of mask_id in `src_ids`, can be obtained from `fluid.layers.where(src_ids==mask_id)`
            labels (optional, `Variable` of shape [n_mask]): 
                labels for `mask language model` tasks, the original token indices in masked position in `src_ids`
        Returns:
            loss (`Variable` of shape []):
                total_loss of `next sentence prediction` and `masked language model`
            mlm_loss (`Variable` of shape []):
                loss for `masked language model` task
            nsp_loss (`Variable` of shape []):
                loss for `next sentence prediction` task
        """

        mlm_labels = kwargs.pop('labels')
        mlm_pos = kwargs.pop('mlm_pos')
        nsp_labels = kwargs.pop('nsp_labels')
        pooled, encoded = super(ErnieModelForPretraining,
                                self).forward(*args, **kwargs)
        if len(mlm_labels.shape) == 1:
            mlm_labels = L.reshape(mlm_labels, [-1, 1])
        if len(nsp_labels.shape) == 1:
            nsp_labels = L.reshape(nsp_labels, [-1, 1])

        nsp_loss = self.pooler_heads[0](pooled, nsp_labels)

        encoded_2d = L.gather_nd(encoded, mlm_pos)
        encoded_2d = self.mlm(encoded_2d)
        encoded_2d = self.mlm_ln(encoded_2d)
        logits_2d = L.matmul(
            encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias
        mlm_loss = L.reduce_mean(
            L.softmax_with_cross_entropy(logits_2d, mlm_labels))
        total_loss = mlm_loss + nsp_loss
        return total_loss, mlm_loss, nsp_loss
Exemplo n.º 25
0
    def net(self, inputs, is_infer=False):
        if is_infer:
            bs = self.evaluate_batch_size
        else:
            bs = self.train_batch_size

        stdv = 1.0 / math.sqrt(self.hidden_size)

        def embedding_layer(input,
                            table_name,
                            emb_dim,
                            initializer_instance=None):
            emb = fluid.embedding(
                input=input,
                size=[self.dict_size, emb_dim],
                param_attr=fluid.ParamAttr(
                    name=table_name, initializer=initializer_instance))
            return emb

        sparse_initializer = fluid.initializer.Uniform(low=-stdv, high=stdv)
        items_emb = embedding_layer(inputs[0], "emb", self.hidden_size,
                                    sparse_initializer)
        pre_state = items_emb
        for i in range(self.step):
            pre_state = layers.reshape(
                x=pre_state, shape=[bs, -1, self.hidden_size])
            state_in = layers.fc(
                input=pre_state,
                name="state_in",
                size=self.hidden_size,
                act=None,
                num_flatten_dims=2,
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)),
                bias_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)))  # [batch_size, uniq_max, h]
            state_out = layers.fc(
                input=pre_state,
                name="state_out",
                size=self.hidden_size,
                act=None,
                num_flatten_dims=2,
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)),
                bias_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-stdv, high=stdv)))  # [batch_size, uniq_max, h]

            state_adj_in = layers.matmul(inputs[3],
                                         state_in)  # [batch_size, uniq_max, h]
            state_adj_out = layers.matmul(
                inputs[4], state_out)  # [batch_size, uniq_max, h]

            gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)

            gru_input = layers.reshape(
                x=gru_input, shape=[-1, self.hidden_size * 2])
            gru_fc = layers.fc(input=gru_input,
                               name="gru_fc",
                               size=3 * self.hidden_size,
                               bias_attr=False)
            pre_state, _, _ = fluid.layers.gru_unit(
                input=gru_fc,
                hidden=layers.reshape(
                    x=pre_state, shape=[-1, self.hidden_size]),
                size=3 * self.hidden_size)

        final_state = layers.reshape(
            pre_state, shape=[bs, -1, self.hidden_size])
        seq = layers.gather_nd(final_state, inputs[1])
        last = layers.gather_nd(final_state, inputs[2])

        seq_fc = layers.fc(
            input=seq,
            name="seq_fc",
            size=self.hidden_size,
            bias_attr=False,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  # [batch_size, seq_max, h]
        last_fc = layers.fc(input=last,
                            name="last_fc",
                            size=self.hidden_size,
                            bias_attr=False,
                            act=None,
                            num_flatten_dims=1,
                            param_attr=fluid.ParamAttr(
                                initializer=fluid.initializer.Uniform(
                                    low=-stdv, high=stdv)))  # [bathc_size, h]

        seq_fc_t = layers.transpose(
            seq_fc, perm=[1, 0, 2])  # [seq_max, batch_size, h]
        add = layers.elementwise_add(seq_fc_t,
                                     last_fc)  # [seq_max, batch_size, h]
        b = layers.create_parameter(
            shape=[self.hidden_size],
            dtype='float32',
            default_initializer=fluid.initializer.Constant(value=0.0))  # [h]
        add = layers.elementwise_add(add, b)  # [seq_max, batch_size, h]

        add_sigmoid = layers.sigmoid(add)  # [seq_max, batch_size, h]
        add_sigmoid = layers.transpose(
            add_sigmoid, perm=[1, 0, 2])  # [batch_size, seq_max, h]

        weight = layers.fc(
            input=add_sigmoid,
            name="weight_fc",
            size=1,
            act=None,
            num_flatten_dims=2,
            bias_attr=False,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  # [batch_size, seq_max, 1]
        weight *= inputs[5]
        weight_mask = layers.elementwise_mul(
            seq, weight, axis=0)  # [batch_size, seq_max, h]
        global_attention = layers.reduce_sum(
            weight_mask, dim=1)  # [batch_size, h]

        final_attention = layers.concat(
            [global_attention, last], axis=1)  # [batch_size, 2*h]
        final_attention_fc = layers.fc(
            input=final_attention,
            name="final_attention_fc",
            size=self.hidden_size,
            bias_attr=False,
            act=None,
            param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  # [batch_size, h]

        # all_vocab = layers.create_global_var(
        #     shape=[items_num - 1],
        #     value=0,
        #     dtype="int64",
        #     persistable=True,
        #     name="all_vocab")
        all_vocab = np.arange(1, self.dict_size).reshape((-1)).astype('int32')
        all_vocab = fluid.layers.cast(
            x=fluid.layers.assign(all_vocab), dtype='int64')

        all_emb = fluid.embedding(
            input=all_vocab,
            param_attr=fluid.ParamAttr(
                name="emb",
                initializer=fluid.initializer.Uniform(
                    low=-stdv, high=stdv)),
            size=[self.dict_size, self.hidden_size])  # [all_vocab, h]

        logits = layers.matmul(
            x=final_attention_fc, y=all_emb,
            transpose_y=True)  # [batch_size, all_vocab]
        softmax = layers.softmax_with_cross_entropy(
            logits=logits, label=inputs[6])  # [batch_size, 1]
        self.loss = layers.reduce_mean(softmax)  # [1]
        self.acc = layers.accuracy(input=logits, label=inputs[6], k=20)

        self._cost = self.loss
        if is_infer:
            self._infer_results['acc'] = self.acc
            self._infer_results['loss'] = self.loss
            return

        self._metrics["LOSS"] = self.loss
        self._metrics["train_acc"] = self.acc
Exemplo n.º 26
0
def network(items_num, hidden_size, step, bs):
    stdv = 1.0 / math.sqrt(hidden_size)

    items = fluid.data(name="items", shape=[bs, -1],
                       dtype="int64")  #[batch_size, uniq_max]
    seq_index = fluid.data(name="seq_index", shape=[bs, -1, 2],
                           dtype="int32")  #[batch_size, seq_max, 2]
    last_index = fluid.data(name="last_index", shape=[bs, 2],
                            dtype="int32")  #[batch_size, 2]
    adj_in = fluid.data(name="adj_in", shape=[bs, -1, -1],
                        dtype="float32")  #[batch_size, seq_max, seq_max]
    adj_out = fluid.data(name="adj_out", shape=[bs, -1, -1],
                         dtype="float32")  #[batch_size, seq_max, seq_max]
    mask = fluid.data(name="mask", shape=[bs, -1, 1],
                      dtype="float32")  #[batch_size, seq_max, 1]
    label = fluid.data(name="label", shape=[bs, 1],
                       dtype="int64")  #[batch_size, 1]

    datas = [items, seq_index, last_index, adj_in, adj_out, mask, label]
    py_reader = fluid.io.DataLoader.from_generator(capacity=256,
                                                   feed_list=datas,
                                                   iterable=False)
    feed_datas = datas

    items_emb = fluid.embedding(
        input=items,
        param_attr=fluid.ParamAttr(name="emb",
                                   initializer=fluid.initializer.Uniform(
                                       low=-stdv, high=stdv)),
        size=[items_num, hidden_size])  #[batch_size, uniq_max, h]

    pre_state = items_emb
    for i in range(step):
        pre_state = layers.reshape(x=pre_state, shape=[bs, -1, hidden_size])
        state_in = layers.fc(
            input=pre_state,
            name="state_in",
            size=hidden_size,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-stdv, high=stdv)),
            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  #[batch_size, uniq_max, h]
        state_out = layers.fc(
            input=pre_state,
            name="state_out",
            size=hidden_size,
            act=None,
            num_flatten_dims=2,
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Uniform(low=-stdv, high=stdv)),
            bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
                low=-stdv, high=stdv)))  #[batch_size, uniq_max, h]

        state_adj_in = layers.matmul(adj_in,
                                     state_in)  #[batch_size, uniq_max, h]
        state_adj_out = layers.matmul(adj_out,
                                      state_out)  #[batch_size, uniq_max, h]

        gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)

        gru_input = layers.reshape(x=gru_input, shape=[-1, hidden_size * 2])
        gru_fc = layers.fc(input=gru_input,
                           name="gru_fc",
                           size=3 * hidden_size,
                           bias_attr=False)
        pre_state, _, _ = fluid.layers.gru_unit(input=gru_fc,
                                                hidden=layers.reshape(
                                                    x=pre_state,
                                                    shape=[-1, hidden_size]),
                                                size=3 * hidden_size)

    final_state = layers.reshape(pre_state, shape=[bs, -1, hidden_size])
    seq = layers.gather_nd(final_state, seq_index)
    last = layers.gather_nd(final_state, last_index)

    seq_fc = layers.fc(
        input=seq,
        name="seq_fc",
        size=hidden_size,
        bias_attr=False,
        act=None,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[batch_size, seq_max, h]
    last_fc = layers.fc(
        input=last,
        name="last_fc",
        size=hidden_size,
        bias_attr=False,
        act=None,
        num_flatten_dims=1,
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[bathc_size, h]

    seq_fc_t = layers.transpose(seq_fc, perm=[1, 0,
                                              2])  #[seq_max, batch_size, h]
    add = layers.elementwise_add(seq_fc_t, last_fc)  #[seq_max, batch_size, h]
    b = layers.create_parameter(
        shape=[hidden_size],
        dtype='float32',
        default_initializer=fluid.initializer.Constant(value=0.0))  #[h]
    add = layers.elementwise_add(add, b)  #[seq_max, batch_size, h]

    add_sigmoid = layers.sigmoid(add)  #[seq_max, batch_size, h]
    add_sigmoid = layers.transpose(add_sigmoid,
                                   perm=[1, 0, 2])  #[batch_size, seq_max, h]

    weight = layers.fc(
        input=add_sigmoid,
        name="weight_fc",
        size=1,
        act=None,
        num_flatten_dims=2,
        bias_attr=False,
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[batch_size, seq_max, 1]
    weight *= mask
    weight_mask = layers.elementwise_mul(seq, weight,
                                         axis=0)  #[batch_size, seq_max, h]
    global_attention = layers.reduce_sum(weight_mask, dim=1)  #[batch_size, h]

    final_attention = layers.concat([global_attention, last],
                                    axis=1)  #[batch_size, 2*h]
    final_attention_fc = layers.fc(
        input=final_attention,
        name="final_attention_fc",
        size=hidden_size,
        bias_attr=False,
        act=None,
        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
            low=-stdv, high=stdv)))  #[batch_size, h]

    all_vocab = layers.create_global_var(shape=[items_num - 1],
                                         value=0,
                                         dtype="int64",
                                         persistable=True,
                                         name="all_vocab")

    all_emb = fluid.embedding(input=all_vocab,
                              param_attr=fluid.ParamAttr(
                                  name="emb",
                                  initializer=fluid.initializer.Uniform(
                                      low=-stdv, high=stdv)),
                              size=[items_num, hidden_size])  #[all_vocab, h]

    logits = layers.matmul(x=final_attention_fc, y=all_emb,
                           transpose_y=True)  #[batch_size, all_vocab]
    softmax = layers.softmax_with_cross_entropy(logits=logits,
                                                label=label)  #[batch_size, 1]
    loss = layers.reduce_mean(softmax)  # [1]
    acc = layers.accuracy(input=logits, label=label, k=50)
    return loss, acc, py_reader, feed_datas, logits
Exemplo n.º 27
0
 def to_2d(t_3d):
     t_2d = L.gather_nd(t_3d, pad_idx)
     return t_2d
Exemplo n.º 28
0
 def gather(x, indices, batch_pos):
     topk_coordinates = fluid.layers.stack([batch_pos, indices], axis=2)
     return layers.gather_nd(x, topk_coordinates)
Exemplo n.º 29
0
    def __call__(self, kernel_preds, cls_preds, mask_protos,
                 batch_gt_objs_tensors, batch_gt_clss_tensors,
                 batch_gt_masks_tensors, batch_gt_pos_idx_tensors):
        '''
        :param kernel_preds:  kernel_preds里每个元素形状是[N, 256, seg_num_grid, seg_num_grid],  每个格子的预测卷积核。      从 小感受野 到 大感受野。
        :param cls_preds:     cls_preds里每个元素形状是   [N,  80, seg_num_grid, seg_num_grid],  每个格子的预测概率,未进行sigmoid()激活。  从 小感受野 到 大感受野。
        :param mask_protos:   [bs, 256, s4, s4]   掩码原型
        :param batch_gt_objs_tensors:   里每个元素形状是[N, seg_num_grid, seg_num_grid, 1],   每个格子的objness。           从 小感受野 到 大感受野。
        :param batch_gt_clss_tensors:   里每个元素形状是[N, seg_num_grid, seg_num_grid, 80],  每个格子真实类别onehot。      从 小感受野 到 大感受野。
        :param batch_gt_masks_tensors:     里每个元素形状是[N, -1, s4, s4],   真实掩码。  从 小感受野 到 大感受野。
        :param batch_gt_pos_idx_tensors:   里每个元素形状是[N, -1, 3],    正样本的下标。  从 小感受野 到 大感受野。
        :return:
        '''

        batch_size = self.batch_size
        num_layers = len(kernel_preds)

        # ================= 计算损失 =================
        num_ins = 0.  # 记录这一批图片的正样本个数
        loss_clss, loss_masks = [], []
        for bid in range(batch_size):
            for lid in range(num_layers):
                # ================ 掩码损失 ======================
                mask_proto = mask_protos[bid]  # [256, s4, s4]   这张图片产生的掩码原型。
                kernel_pred = kernel_preds[lid][
                    bid]  # [256, seg_num_grid, seg_num_grid]   格子预测的卷积核(yolact中的“掩码系数”)
                kernel_pred = L.transpose(
                    kernel_pred, perm=[1, 2, 0]
                )  # [seg_num_grid, seg_num_grid, 256]   格子预测的卷积核(yolact中的“掩码系数”)

                gt_objs = batch_gt_objs_tensors[lid][
                    bid]  # [seg_num_grid, seg_num_grid, 1]
                gt_masks = batch_gt_masks_tensors[lid][bid]  # [-1, s4, s4]
                pmidx = batch_gt_pos_idx_tensors[lid][bid]  # [-1, 3]
                gt_objs.stop_gradient = True
                gt_masks.stop_gradient = True
                pmidx.stop_gradient = True

                idx_sum = L.reduce_sum(pmidx, dim=1)
                keep = L.where(idx_sum > -1)
                keep = L.reshape(keep, (-1, ))
                keep.stop_gradient = True
                pmidx = L.gather(pmidx, keep)  # [M, 3]

                yx_idx = pmidx[:, :2]  # [M, 2]
                m_idx = pmidx[:, 2]  # [M, ]
                yx_idx.stop_gradient = True
                m_idx.stop_gradient = True

                # 抽出来
                gt_obj = L.gather_nd(gt_objs,
                                     yx_idx)  # [M, 1]        是否是真正的正样本。
                pos_krn = L.gather_nd(kernel_pred,
                                      yx_idx)  # [M, 256]      正样本的卷积核(掩码系数)。
                gt_mask = L.gather(gt_masks, m_idx)  # [M, s4, s4]   真实掩码。

                # 正样本数量
                num_ins += L.reduce_sum(gt_obj)

                # 生成预测掩码
                mask_proto = L.transpose(mask_proto, perm=[1, 2,
                                                           0])  # [s4, s4, 256]
                masks = L.matmul(mask_proto, pos_krn,
                                 transpose_y=True)  # [s4, s4, M]
                masks = L.sigmoid(masks)  # [s4, s4, M]
                masks = L.transpose(masks, perm=[2, 0, 1])  # [M, s4, s4]
                loss_mask = self.dice_loss(masks, gt_mask, gt_obj)
                loss_masks.append(loss_mask)

                # ================ 分类损失。sigmoid_focal_loss() ======================
                gamma = self.loss_gamma
                alpha = self.loss_alpha
                pred_conf = cls_preds[lid][
                    bid]  # [80, seg_num_grid, seg_num_grid]    未进行sigmoid()激活。
                pred_conf = L.transpose(pred_conf, perm=[
                    1, 2, 0
                ])  # [seg_num_grid, seg_num_grid, 80]    未进行sigmoid()激活。
                pred_conf = L.sigmoid(
                    pred_conf
                )  # [seg_num_grid, seg_num_grid, 80]    已进行sigmoid()激活。
                gt_clss = batch_gt_clss_tensors[lid][
                    bid]  # [seg_num_grid, seg_num_grid, 80]    真实类别onehot
                gt_clss.stop_gradient = True
                pos_loss = gt_clss * (0 - L.log(pred_conf + 1e-9)) * L.pow(
                    1 - pred_conf, gamma) * alpha
                neg_loss = (
                    1.0 - gt_clss) * (0 - L.log(1 - pred_conf + 1e-9)) * L.pow(
                        pred_conf, gamma) * (1 - alpha)
                focal_loss = pos_loss + neg_loss
                focal_loss = L.reduce_sum(focal_loss, dim=[0, 1])
                loss_clss.append(focal_loss)
        loss_masks = L.concat(loss_masks, axis=0)
        loss_masks = L.reduce_sum(loss_masks) * self.ins_loss_weight
        loss_masks = loss_masks / L.elementwise_max(
            L.ones((1, ), dtype='float32'), num_ins)

        loss_clss = L.concat(loss_clss, axis=0)
        loss_clss = L.reduce_sum(loss_clss) * self.clss_loss_weight
        loss_clss = loss_clss / L.elementwise_max(
            L.ones((1, ), dtype='float32'), num_ins)

        loss_all = {"loss_masks": loss_masks, "loss_clss": loss_clss}
        return loss_all
Exemplo n.º 30
0
    def __compute_graph_bias(q, graph_attn_mask, pos_win):
        """
        :param q: (batch_size, n_heads, query_len, dim_per_head)
        :param graph_attn_mask: (batch_size, n_head, key_s_len, key_s_len)
        :param pos_win:
        :return:
        """
        # (batch_size, n_heads, query_len, dim_per_head)
        pos_v = layers.fc(input=q,
                          size=d_value,
                          num_flatten_dims=3,
                          param_attr=fluid.ParamAttr(
                              name=name + '_pos_fc.w_0',
                              initializer=param_initializer),
                          bias_attr=name + '_pos_fc.b_0')

        # (batch_size, n_heads, query_len, 1)
        pos_s = layers.fc(input=layers.tanh(pos_v),
                          size=1,
                          num_flatten_dims=3,
                          param_attr=fluid.ParamAttr(
                              name=name + '_pos_score_fc.w_0',
                              initializer=param_initializer),
                          bias_attr=False)

        # (batch_size, n_heads, query_len, 1)
        pos = layers.sigmoid(pos_s) * (key_s_len - 1)

        # (batch_size, n_heads, query_len, 1)
        pos_up = layers.cast(layers.ceil(pos), dtype='int64')
        # print("pos_up.shape = %s" % str(pos_up.shape))
        pos_down = layers.cast(layers.floor(pos), dtype='int64')
        # print("pos_down.shape = %s" % str(pos_down.shape))

        batch_ind = layers.range(0, layers.cast(batch_size, dtype='int64'), 1,
                                 'int64')
        # print("batch_ind.shape = %s" % str(batch_ind.shape))
        batch_ind = layers.unsqueeze(batch_ind,
                                     axes=[1, 2, 3])  # (batch_size, 1, 1, 1)
        batch_ind = layers.expand(
            batch_ind, expand_times=[1, n_head, query_len,
                                     1])  # (batch_size, n_heads, query_len, 1)
        # print("batch_ind.shape = %s" % str(batch_ind.shape))

        head_ind = layers.range(0, n_head, 1, 'int64')
        # print("head_ind.shape = %s" % str(head_ind.shape))
        head_ind = layers.unsqueeze(head_ind, axes=[0, 2,
                                                    3])  # (1, n_heads, 1, 1)
        head_ind = layers.expand(head_ind,
                                 expand_times=[batch_size, 1, query_len, 1])
        # print("head_ind.shape = %s" % str(head_ind.shape))

        query_ind = layers.range(0, layers.cast(query_len, dtype='int64'), 1,
                                 'int64')
        # print("query_ind.shape = %s" % str(query_ind.shape))
        query_ind = layers.unsqueeze(query_ind,
                                     axes=[0, 1, 3])  # (1, 1, query_len, 1)
        query_ind = layers.expand(query_ind,
                                  expand_times=[batch_size, n_head, 1, 1])
        # print("query_ind.shape = %s" % str(query_ind.shape))

        # (batch_size, n_heads, query_len, 4)
        pos_up_ind = layers.concat(
            input=[batch_ind, head_ind, query_ind, pos_up], axis=-1)
        # print("pos_up_ind.shape = %s" % str(pos_up_ind.shape))
        pos_up_ind.stop_gradient = True
        pos_down_ind = layers.concat(
            input=[batch_ind, head_ind, query_ind, pos_down], axis=-1)
        # print("pos_down_ind.shape = %s" % str(pos_down_ind.shape))
        pos_down_ind.stop_gradient = True

        # (batch_size, n_heads, query_len, key_s_len, key_s_len)
        graph_attn_mask = layers.unsqueeze(graph_attn_mask, axes=[2])
        # print("graph_attn_mask.shape = %s" % str(graph_attn_mask.shape))
        graph_attn_mask = layers.expand(graph_attn_mask,
                                        expand_times=[1, 1, query_len, 1, 1])
        # print("graph_attn_mask.shape = %s" % str(graph_attn_mask.shape))

        # (batch_size, n_heads, query_len, key_s_len)
        graph_attn_mask_up = layers.gather_nd(input=graph_attn_mask,
                                              index=pos_up_ind)
        graph_attn_mask_down = layers.gather_nd(input=graph_attn_mask,
                                                index=pos_down_ind)

        # print("graph_attn_mask_up.shape = %s" % str(graph_attn_mask_up.shape))
        # print("graph_attn_mask_down.shape = %s" % str(graph_attn_mask_down.shape))
        # print("pos_up.shape = %s" % str(pos_up.shape))
        # print("pos_down.shape = %s" % str(pos_down.shape))

        # linearly combine up and down (batch_size, n_heads, query_len, key_s_len)
        graph_attn_mask_select = graph_attn_mask_up * (1.0 - (layers.cast(pos_up, dtype='float32') - pos)) + \
                                 graph_attn_mask_down * (1.0 - (pos - layers.cast(pos_down, dtype='float32')))
        # print("graph_attn_mask_select.shape = %s" % str(graph_attn_mask_select.shape))
        # re-weight the attention score with gaussian weights
        gaussian_w = (
            -0.5 * graph_attn_mask_select * graph_attn_mask_select) / (
                (0.5 * pos_win)**2)  # [batch, n_heads, query_len, key_s_len]
        # print("gaussian_w.shape = %s" % str(gaussian_w.shape))

        return gaussian_w