def concat_coord(x):
    ins_feat = x  # [N, c, h, w]

    batch_size = L.shape(x)[0]
    h = L.shape(x)[2]
    w = L.shape(x)[3]
    float_h = L.cast(h, 'float32')
    float_w = L.cast(w, 'float32')

    y_range = L.range(0., float_h, 1., dtype='float32')  # [h, ]
    y_range = 2.0 * y_range / (float_h - 1.0) - 1.0
    x_range = L.range(0., float_w, 1., dtype='float32')  # [w, ]
    x_range = 2.0 * x_range / (float_w - 1.0) - 1.0
    x_range = L.reshape(x_range, (1, -1))  # [1, w]
    y_range = L.reshape(y_range, (-1, 1))  # [h, 1]
    x = L.expand(x_range, [h, 1])  # [h, w]
    y = L.expand(y_range, [1, w])  # [h, w]

    x = L.reshape(x, (1, 1, h, w))  # [1, 1, h, w]
    y = L.reshape(y, (1, 1, h, w))  # [1, 1, h, w]
    x = L.expand(x, [batch_size, 1, 1, 1])  # [N, 1, h, w]
    y = L.expand(y, [batch_size, 1, 1, 1])  # [N, 1, h, w]

    ins_kernel_feat = L.concat([ins_feat, x, y], axis=1)  # [N, c+2, h, w]

    return ins_kernel_feat
    def is_finished(self, step_idx, source_length, alive_log_probs, finished_scores, finished_in_finished):
        """
            is_finished
        """
        base_1 = layers.cast(source_length, 'float32') + 55.0
        base_1 /= 6.0
        max_length_penalty = layers.pow(base_1, self.alpha)

        flat_alive_log_probs = layers.reshape(alive_log_probs, [-1])
        lower_bound_alive_scores_1 = layers.gather(flat_alive_log_probs, [self.get_alive_index])
        
        lower_bound_alive_scores = lower_bound_alive_scores_1 / max_length_penalty
        
        lowest_score_of_finished_in_finish = layers.reduce_min(finished_scores * finished_in_finished, dim=1)

        finished_in_finished = layers.cast(finished_in_finished, 'bool')
        lowest_score_of_finished_in_finish += \
                        ((1.0 - layers.cast(layers.reduce_any(finished_in_finished, 1), 'float32')) * -INF)
        
        #print lowest_score_of_finished_in_finish
        bound_is_met = layers.reduce_all(layers.greater_than(lowest_score_of_finished_in_finish, 
                                                             lower_bound_alive_scores))

        decode_length = source_length + 50
        length_cond = layers.less_than(x=step_idx, y=decode_length)

        return layers.logical_and(x=layers.logical_not(bound_is_met), y=length_cond)
def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.,
                           name=''):
    """
    Add residual connection, layer normalization and droput to the out tensor
    optionally according to the value of process_cmd.
    This will be used before or after multi-head attention and position-wise
    feed-forward networks.
    """
    for cmd in process_cmd:
        if cmd == "a":  # add residual connection
            out = out + prev_out if prev_out else out
        elif cmd == "n":  # add layer normalization
            out_dtype = out.dtype
            if out_dtype == fluid.core.VarDesc.VarType.FP16:
                out = layers.cast(x=out, dtype="float32")
            out = layers.layer_norm(
                out,
                begin_norm_axis=len(out.shape) - 1,
                param_attr=fluid.ParamAttr(
                    name=name + '_layer_norm_scale',
                    initializer=fluid.initializer.Constant(1.)),
                bias_attr=fluid.ParamAttr(
                    name=name + '_layer_norm_bias',
                    initializer=fluid.initializer.Constant(0.)))
            if out_dtype == fluid.core.VarDesc.VarType.FP16:
                out = layers.cast(x=out, dtype="float16")
        elif cmd == "d":  # add dropout
            if dropout_rate:
                out = layers.dropout(
                    out,
                    dropout_prob=dropout_rate,
                    dropout_implementation="upscale_in_train",
                    is_test=False)
    return out
示例#4
0
def pre_post_process_layer(prev_out,
                           out,
                           process_cmd,
                           dropout_rate=0.,
                           name='',
                           is_test=False):

    for cmd in process_cmd:
        if cmd == "a":  # 两个输入相加
            out = out + prev_out if prev_out else out
        elif cmd == "n":  # 进行normalization
            out_type = out.dtype
            if out_type == fluid.core.VarDesc.VarType.FP16:
                out = layers.cast(x=out, dtype="float32")
            out = layers.layer_norm(
                out,
                begin_norm_axis=len(out.shape) - 1,
                param_attr=fluid.ParamAttr(
                    name=name + '_layer_norm_scale',
                    initializer=fluid.initializer.Constant(1.)),
                bias_attr=fluid.ParamAttr(
                    name=name + '_layer_norm_bias',
                    initializer=fluid.initializer.Constant(0.)))
            if out_type == fluid.core.VarDesc.VarType.FP16:
                out = layers.cast(x=out, dtype="float16")
        elif cmd == "d":  # 进行dropout
            if dropout_rate:
                out = layers.dropout(out,
                                     dropout_prob=dropout_rate,
                                     dropout_implementation="upscale_in_train",
                                     is_test=is_test)
    return out
示例#5
0
def input_true(x, condition, reverse=False):
    """input instances in x, while corrensponding condition is true

    Args:
        x (Variable): shape = [batch_size, ...]
        condition (Variable): shape = [batch_size, 1]
        reverse (Variable): Default is False

    Returns: TODO

    Raises: NULL
    """
    x_dtype = x.dtype
    if x_dtype == PaddleVarType.bool:
        x = layers.cast(x, dtype='int32')

    if condition.dtype != x.dtype:
        condition = layers.cast(condition, dtype=x.dtype)

    if reverse:
        condition = 1.0 - condition

    output = layers.elementwise_mul(x, condition, axis=0)

    if x_dtype == PaddleVarType.bool:
        output = layers.cast(output, dtype=x_dtype)

    return output
示例#6
0
        def __call__(self, msg):
            alpha = msg["alpha"]  # lod-tensor (batch_size, num_heads)
            if attn_drop:
                old_h = alpha
                dropout = F.data(name='attn_drop', shape=[1], dtype="int64")
                u = L.uniform_random(shape=L.cast(L.shape(alpha)[:1], 'int64'),
                                     min=0.,
                                     max=1.)
                keeped = L.cast(u > dropout, dtype="float32")
                self_attn_mask = L.scale(x=keeped,
                                         scale=10000.0,
                                         bias=-1.0,
                                         bias_after_scale=False)
                n_head_self_attn_mask = L.stack(x=[self_attn_mask] * num_heads,
                                                axis=1)
                n_head_self_attn_mask.stop_gradient = True
                alpha = n_head_self_attn_mask + alpha
                alpha = L.lod_reset(alpha, old_h)

            h = msg["v"]
            alpha = paddle_helper.sequence_softmax(alpha)

            self.alpha = alpha
            old_h = h
            h = h * alpha
            h = L.lod_reset(h, old_h)
            h = L.sequence_pool(h, "sum")

            if concat:
                h = L.reshape(h, [-1, num_heads * hidden_size])
            else:
                h = L.reduce_mean(h, dim=1)
            return h
示例#7
0
    def elementwise_op_wrapper(cls, op, x, y, *args, force=False, axis=-1, act=None, name=None):
        """wrapper of elementwise op

        Args:
            op (TYPE): NULL
            x (TYPE): NULL
            y (TYPE): NULL
            *args (TYPE): NULL
            force (TYPE): Default is False
            axis (TYPE): Default is -1
            act (TYPE): Default is None
            name (TYPE): Default is None

        Returns: TODO

        Raises: NULL
        """
        x_dtype = x.dtype
        if x_dtype == PaddleVarType.bool:
            x = layers.cast(x, dtype='int32')
        tmp = x
        extras = [y] + list(args)
        for var in extras:
            if var.dtype != tmp.dtype and force:
                var = layers.cast(var, dtype=x.dtype)
            elif var.dtype == PaddleVarType.bool and x_dtype == PaddleVarType.bool:
                var = layers.cast(var, dtype=x.dtype)
            tmp = op(x=tmp, y=var, axis=axis, act=act, name=name)
        if x_dtype == PaddleVarType.bool:
            tmp = layers.cast(tmp, dtype=x_dtype)
        return tmp
示例#8
0
def mask_fill(input, mask, value):
    """Fill value to input according to mask
    
    Args:
        input: input matrix
        mask: mask matrix
        value: Fill value

    Returns:
        output

    >>> input
    [
        [1, 2, 3],
        [4, 5, 6]
    ]
    >>> mask
    [
        [True, True, False],
        [True, False, False]
    ]
    >>> mask_fill(input, mask, 0)
    [
        [1, 2, 0],
        [4, 0, 0]
    ]
    """
    return input * layers.cast(layers.logical_not(
        mask), input.dtype) + layers.cast(mask, input.dtype) * value
示例#9
0
    def _build_position_ids(self, src_ids):
        src_shape = L.shape(src_ids)
        src_seqlen = src_shape[1]
        src_batch = src_shape[0]

        slot_seqlen = self.slot_seqlen

        num_b = (src_seqlen / slot_seqlen) - 1
        a_position_ids = L.reshape(L.range(0, slot_seqlen, 1, dtype='int32'),
                                   [1, slot_seqlen, 1],
                                   inplace=True)  # [1, slot_seqlen, 1]
        a_position_ids = L.expand(
            a_position_ids, [src_batch, 1, 1])  # [B, slot_seqlen * num_b, 1]

        zero = L.fill_constant([1], dtype='int64', value=0)
        input_mask = L.cast(L.equal(src_ids[:, :slot_seqlen], zero),
                            "int32")  # assume pad id == 0 [B, slot_seqlen, 1]
        a_pad_len = L.reduce_sum(input_mask, 1)  # [B, 1, 1]

        b_position_ids = L.reshape(L.range(slot_seqlen,
                                           2 * slot_seqlen,
                                           1,
                                           dtype='int32'), [1, slot_seqlen, 1],
                                   inplace=True)  # [1, slot_seqlen, 1]
        b_position_ids = L.expand(
            b_position_ids,
            [src_batch, num_b, 1])  # [B, slot_seqlen * num_b, 1]
        b_position_ids = b_position_ids - a_pad_len  # [B, slot_seqlen * num_b, 1]

        position_ids = L.concat([a_position_ids, b_position_ids], 1)
        position_ids = L.cast(position_ids, 'int64')
        position_ids.stop_gradient = True
        return position_ids
    def build_program(self, dtype):
        with fluid.program_guard(self.main_program, self.startup_program):
            self.feed_vars = self._prepare_feed_vars([32, 128], dtype, 2)
            self.feed_vars.append(
                fluid.data(
                    name="data2", shape=[128, 128], dtype=dtype))

            # subgraph with 2 op nodes
            tmp_0 = self.feed_vars[0] * self.feed_vars[1]
            tmp_1 = layers.cast(tmp_0, dtype="float16")
            zero = layers.fill_constant(shape=[128], dtype="float16", value=0)
            # TODO(xreki): fix precision problem when using softmax of float16.
            # tmp_2 = layers.softmax(tmp_1)
            tmp_2 = layers.elementwise_add(tmp_1, zero)
            tmp_3 = layers.mul(tmp_0, self.feed_vars[2])
            # subgraph with 4 op nodes
            tmp_3 = layers.cast(tmp_2, dtype="float16")
            tmp_4 = layers.relu(tmp_1 + tmp_3)
            tmp_5 = layers.cast(tmp_4, dtype=dtype)
            tmp_3 = layers.cast(tmp_2, dtype=dtype)

        self.append_gradients(tmp_5)

        self.num_fused_ops = 4
        self.fetch_list = [tmp_5, self.grad(tmp_0)]
示例#11
0
        def build_position_ids(src_ids, dst_ids):
            src_shape = L.shape(src_ids)
            src_batch = src_shape[0]
            src_seqlen = src_shape[1]
            dst_seqlen = src_seqlen - 1 # without cls

            src_position_ids = L.reshape(
                L.range(
                    0, src_seqlen, 1, dtype='int32'), [1, src_seqlen, 1],
                inplace=True) # [1, slot_seqlen, 1]
            src_position_ids = L.expand(src_position_ids, [src_batch, 1, 1]) # [B, slot_seqlen * num_b, 1]
            zero = L.fill_constant([1], dtype='int64', value=0)
            input_mask = L.cast(L.equal(src_ids, zero), "int32")  # assume pad id == 0 [B, slot_seqlen, 1]
            src_pad_len = L.reduce_sum(input_mask, 1, keep_dim=True) # [B, 1, 1]

            dst_position_ids = L.reshape(
                L.range(
                    src_seqlen, src_seqlen+dst_seqlen, 1, dtype='int32'), [1, dst_seqlen, 1],
                inplace=True) # [1, slot_seqlen, 1]
            dst_position_ids = L.expand(dst_position_ids, [src_batch, 1, 1]) # [B, slot_seqlen, 1]
            dst_position_ids = dst_position_ids - src_pad_len # [B, slot_seqlen, 1]

            position_ids = L.concat([src_position_ids, dst_position_ids], 1)
            position_ids = L.cast(position_ids, 'int64')
            position_ids.stop_gradient = True
            return position_ids
            def grow_top_k(step_idx, alive_seq, alive_log_prob, parant_idx):
                pre_ids = alive_seq

                dec_step_emb = layers.embedding(
                    input=pre_ids,
                    size=[self.tar_vocab_size, self.hidden_size],
                    dtype='float32',
                    is_sparse=False,
                    param_attr=fluid.ParamAttr(
                        name='target_embedding',
                        initializer=fluid.initializer.UniformInitializer(
                            low=-self.init_scale, high=self.init_scale)))

                dec_att_out, new_hidden_array, new_cell_array = decoder_step(
                    dec_step_emb, pre_feed, pre_hidden_array, pre_cell_array,
                    enc_memory)

                projection = layers.matmul(dec_att_out, softmax_weight)

                logits = layers.softmax(projection)
                current_log = layers.elementwise_add(x=layers.log(logits),
                                                     y=alive_log_prob,
                                                     axis=0)
                base_1 = layers.cast(step_idx, 'float32') + 6.0
                base_1 /= 6.0
                length_penalty = layers.pow(base_1, alpha)

                len_pen = layers.pow(
                    ((5. + layers.cast(step_idx + 1, 'float32')) / 6.), alpha)

                current_log = layers.reshape(current_log, shape=[1, -1])

                current_log = current_log / length_penalty
                topk_scores, topk_indices = layers.topk(input=current_log,
                                                        k=beam_size)

                topk_scores = layers.reshape(topk_scores, shape=[-1])

                topk_log_probs = topk_scores * length_penalty

                generate_id = layers.reshape(topk_indices,
                                             shape=[-1]) % self.tar_vocab_size

                selected_beam = layers.reshape(
                    topk_indices, shape=[-1]) // self.tar_vocab_size

                topk_finished = layers.equal(generate_id, eos_ids)

                topk_finished = layers.cast(topk_finished, 'float32')

                generate_id = layers.reshape(generate_id, shape=[-1, 1])

                pre_tokens_list = layers.gather(tokens, selected_beam)

                full_tokens_list = layers.concat(
                    [pre_tokens_list, generate_id], axis=1)


                return full_tokens_list, topk_log_probs, topk_scores, topk_finished, selected_beam, generate_id, \
                        dec_att_out, new_hidden_array, new_cell_array
def batch_scatter(ref, indices, updates, in_place=False, overwrite=False):
    """Scatter updates to ref, according to corrensponding index in indices
    in each batch. Currently, it only support 2d Tensor.

    Args:
        ref (Variable): with shape [batch_size, ...]
        indices (Variable): with shape [batch_size, 1]
        updates (Variable): with shape [batch_size]
        in_place (bool): if True, scatter result will be assign to ref. otherwise,
                         a new Tensor will be returned. Default is False.
        overwrite (bool): if True, scatter will over write corrensponding elements.
                          Default is False.

    Returns: TODO

    Raises: NULL

    Examples:
        ref
            [[1, 1, 1],
             [1, 1, 1]]
        indices
            [[2], [1]]
        updates
            [2, 3]

        return
            [[1, 1, 2],
             [1, 3, 1]]

    """
    ref_dtype = ref.dtype
    if ref_dtype not in PaddleVarType.floats:
        ref_in = layers.cast(ref, dtype='float32')
    else:
        ref_in = ref

    if updates.dtype != ref_in.dtype:
        updates = layers.cast(updates, dtype=ref_in.dtype)

    batch_size = layers.cast(layers.shape(ref_in)[0], dtype=indices.dtype)
    zero = layers.fill_constant(shape=[1], dtype=indices.dtype, value=0)
    one = layers.fill_constant(shape=[1], dtype=indices.dtype, value=1)
    batch_indices = layers.unsqueeze(
        layers.range(zero, batch_size, one, dtype=indices.dtype), [1])
    coord = layers.concat([batch_indices, indices], axis=1)
    if overwrite:
        mask = layers.gather_nd(ref_in, coord)
        mask = layers.elementwise_sub(layers.zeros_like(mask), mask)
        ref_in = layers.scatter_nd_add(ref_in, coord, mask)

    output = layers.scatter_nd_add(ref_in, coord, updates)
    if ref_dtype not in PaddleVarType.floats:
        output = layers.cast(output, dtype=ref_dtype)
    if in_place:
        layers.assign(output, ref)
        return ref
    else:
        return output
示例#14
0
 def _debug_summary(self, input_mask):
     #histogram
     seqlen_before_pad = L.cast(L.reduce_sum(input_mask, dim=1),
                                dtype='float32')
     seqlen_after_pad = L.reduce_sum(
         L.cast(L.zeros_like(input_mask), dtype='float32') + 1.0, dim=1)
     pad_num = seqlen_after_pad - seqlen_before_pad
     pad_rate = pad_num / seqlen_after_pad
示例#15
0
def beam_search_step(state, logits, eos_id, beam_width, is_first_step,
                     length_penalty):
    """logits.shape == [B*W, V]"""
    _, vocab_size = logits.shape

    bsz, beam_width = state.log_probs.shape
    onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size),
                        'int64')  #[1, V]

    probs = L.log(L.softmax(logits))  #[B*W, V]
    probs = mask_prob(probs, onehot_eos, state.finished)  #[B*W, V]
    allprobs = L.reshape(state.log_probs, [-1, 1]) + probs  #[B*W, V]

    not_finished = 1 - L.reshape(state.finished, [-1, 1])  #[B*W,1]
    not_eos = 1 - onehot_eos
    length_to_add = not_finished * not_eos  #[B*W,V]
    alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add

    allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size])
    alllen = L.reshape(alllen, [-1, beam_width * vocab_size])
    allscore = hyp_score(allprobs, alllen, length_penalty)
    if is_first_step:
        allscore = L.reshape(
            allscore,
            [bsz, beam_width, -1])[:, 0, :]  # first step only consiter beam 0
    scores, idx = L.topk(allscore, k=beam_width)  #[B, W]
    next_beam_id = idx // vocab_size  #[B, W]
    next_word_id = idx % vocab_size

    gather_idx = L.concat([L.where(idx != -1)[:, :1],
                           L.reshape(idx, [-1, 1])], 1)
    next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape)
    next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape)

    gather_idx = L.concat(
        [L.where(next_beam_id != -1)[:, :1],
         L.reshape(next_beam_id, [-1, 1])], 1)
    next_finished = L.reshape(
        L.gather_nd(state.finished, gather_idx), state.finished.shape
    )  #[gather new beam state according to new beam id]
    #log.debug(gather_idx.numpy())
    #log.debug(state.finished.numpy())
    #log.debug(next_finished.numpy())

    next_finished += L.cast(next_word_id == eos_id, 'int64')
    next_finished = L.cast(next_finished > 0, 'int64')

    #log.debug(next_word_id.numpy())
    #log.debug(next_beam_id.numpy())
    next_state = BeamSearchState(log_probs=next_probs,
                                 lengths=next_len,
                                 finished=next_finished)
    output = BeamSearchOutput(scores=scores,
                              predicted_ids=next_word_id,
                              beam_parent_ids=next_beam_id)

    return output, next_state
示例#16
0
def unsqueeze(input, axes):
    """Increase the number of axes of input"""
    input_dtype = input.dtype
    if input_dtype == VarDesc.VarType.BOOL:
        input = layers.cast(input, 'int32')
    output = layers.unsqueeze(input, axes=axes)
    if input_dtype == VarDesc.VarType.BOOL:
        output = layers.cast(output, 'bool')
    return output
示例#17
0
def get_enc_bias(source_inputs):
    """
        get_enc_bias
    """
    source_inputs = layers.cast(source_inputs, 'float32')
    emb_sum = layers.reduce_sum(layers.abs(source_inputs), dim=-1)
    zero = layers.fill_constant([1], 'float32', value=0) 
    bias = layers.cast(layers.equal(emb_sum, zero), 'float32') * -1e9
    return layers.unsqueeze(layers.unsqueeze(bias, axes=[1]), axes=[1])
示例#18
0
def matrix_nms(seg_masks, cate_labels, cate_scores, kernel='gaussian', sigma=2.0, sum_masks=None):
    """Matrix NMS for multi-class masks.

    Args:
        seg_masks (Tensor): shape (n, h, w)   0、1组成的掩码
        cate_labels (Tensor): shape (n), mask labels in descending order
        cate_scores (Tensor): shape (n), mask scores in descending order
        kernel (str):  'linear' or 'gauss'
        sigma (float): std in gaussian method
        sum_masks (Tensor):  shape (n, )      n个物体的面积

    Returns:
        Tensor: cate_scores_update, tensors of shape (n)
    """
    n_samples = L.shape(cate_labels)[0]   # 物体数
    seg_masks = L.reshape(seg_masks, (n_samples, -1))   # [n, h*w]
    # inter.
    inter_matrix = L.matmul(seg_masks, seg_masks, transpose_y=True)   # [n, n] 自己乘以自己的转置。两两之间的交集面积。
    # union.
    sum_masks_x = L.expand(L.reshape(sum_masks, (1, -1)), [n_samples, 1])     # [n, n]  sum_masks重复了n行得到sum_masks_x
    # iou.
    iou_matrix = inter_matrix / (sum_masks_x + L.transpose(sum_masks_x, [1, 0]) - inter_matrix)
    rows = L.range(0, n_samples, 1, 'int32')
    cols = L.range(0, n_samples, 1, 'int32')
    rows = L.expand(L.reshape(rows, (1, -1)), [n_samples, 1])
    cols = L.expand(L.reshape(cols, (-1, 1)), [1, n_samples])
    tri_mask = L.cast(rows > cols, 'float32')
    iou_matrix = tri_mask * iou_matrix   # [n, n]   只取上三角部分

    # label_specific matrix.
    cate_labels_x = L.expand(L.reshape(cate_labels, (1, -1)), [n_samples, 1])     # [n, n]  cate_labels重复了n行得到cate_labels_x
    label_matrix = L.cast(L.equal(cate_labels_x, L.transpose(cate_labels_x, [1, 0])), 'float32')
    label_matrix = tri_mask * label_matrix   # [n, n]   只取上三角部分

    # IoU compensation
    compensate_iou = L.reduce_max(iou_matrix * label_matrix, dim=0)
    compensate_iou = L.expand(L.reshape(compensate_iou, (1, -1)), [n_samples, 1])     # [n, n]
    compensate_iou = L.transpose(compensate_iou, [1, 0])      # [n, n]

    # IoU decay
    decay_iou = iou_matrix * label_matrix

    # # matrix nms
    if kernel == 'gaussian':
        decay_matrix = L.exp(-1 * sigma * (decay_iou ** 2))
        compensate_matrix = L.exp(-1 * sigma * (compensate_iou ** 2))
        decay_coefficient = L.reduce_min((decay_matrix / compensate_matrix), dim=0)
    elif kernel == 'linear':
        decay_matrix = (1-decay_iou)/(1-compensate_iou)
        decay_coefficient = L.reduce_min(decay_matrix, dim=0)
    else:
        raise NotImplementedError

    # update the score.
    cate_scores_update = cate_scores * decay_coefficient
    return cate_scores_update
示例#19
0
def beam_search_step(state, logits, eos_id, beam_width, is_first_step,
                     length_penalty):
    """logits.shape == [B*W, V]"""
    beam_size, vocab_size = logits.shape  # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size
    logits_np = logits.numpy()
    for i in range(beam_size):
        logits_np[i][17963] = 0  # make [UNK] prob = 0
    logits = D.to_variable(logits_np)

    bsz, beam_width = state.log_probs.shape
    onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size),
                        'int64')  #[1, V]

    probs = L.log(L.softmax(logits))  #[B*W, V]
    probs = mask_prob(probs, onehot_eos, state.finished)  #[B*W, V]
    allprobs = L.reshape(state.log_probs, [-1, 1]) + probs  #[B*W, V]

    not_finished = 1 - L.reshape(state.finished, [-1, 1])  #[B*W,1]
    not_eos = 1 - onehot_eos
    length_to_add = not_finished * not_eos  #[B*W,V]
    alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add

    allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size])
    alllen = L.reshape(alllen, [-1, beam_width * vocab_size])
    allscore = hyp_score(allprobs, alllen, length_penalty)
    if is_first_step:
        allscore = L.reshape(
            allscore,
            [bsz, beam_width, -1])[:, 0, :]  # first step only consiter beam 0
    scores, idx = L.topk(allscore, k=beam_width)  #[B, W]
    next_beam_id = idx // vocab_size  #[B, W]
    next_word_id = idx % vocab_size

    gather_idx = L.concat([L.where(idx != -1)[:, :1],
                           L.reshape(idx, [-1, 1])], 1)
    next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape)
    next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape)

    gather_idx = L.concat(
        [L.where(next_beam_id != -1)[:, :1],
         L.reshape(next_beam_id, [-1, 1])], 1)
    next_finished = L.reshape(
        L.gather_nd(state.finished, gather_idx), state.finished.shape
    )  #[gather new beam state according to new beam id]

    next_finished += L.cast(next_word_id == eos_id, 'int64')
    next_finished = L.cast(next_finished > 0, 'int64')

    next_state = BeamSearchState(log_probs=next_probs,
                                 lengths=next_len,
                                 finished=next_finished)
    output = BeamSearchOutput(scores=scores,
                              predicted_ids=next_word_id,
                              beam_parent_ids=next_beam_id)

    return output, next_state
示例#20
0
def norm(param, dim, power):
    powered = F.pow(param, power)
    in_dtype = powered.dtype
    if in_dtype == fluid.core.VarDesc.VarType.FP16:
        powered = F.cast(powered, "float32")
    powered_norm = F.reduce_sum(powered, dim=dim, keep_dim=False)
    norm_ = F.pow(powered_norm, 1. / power)
    if in_dtype == fluid.core.VarDesc.VarType.FP16:
        norm_ = F.cast(norm_, "float16")
    return norm_
示例#21
0
    def build_model(self):
        node_features = self.graph_wrapper.node_feat["feat"]

        output = self.gcn(gw=self.graph_wrapper,
                          feature=node_features,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_1")
        output1 = output
        output = self.gcn(gw=self.graph_wrapper,
                          feature=output,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_2")
        output2 = output
        output = self.gcn(gw=self.graph_wrapper,
                          feature=output,
                          hidden_size=self.hidden_size,
                          activation="relu",
                          norm=self.graph_wrapper.node_feat["norm"],
                          name="gcn_layer_3")

        output = L.concat(input=[output1, output2, output], axis=-1)

        output, ratio_length = sag_pool(gw=self.graph_wrapper,
                                        feature=output,
                                        ratio=self.pooling_ratio,
                                        graph_id=self.graph_id,
                                        dataset=self.args.dataset_name,
                                        name="sag_pool_1")
        output = L.lod_reset(output, self.graph_wrapper.graph_lod)
        cat1 = L.sequence_pool(output, "sum")
        ratio_length = L.cast(ratio_length, dtype="float32")
        cat1 = L.elementwise_div(cat1, ratio_length, axis=-1)
        cat2 = L.sequence_pool(output, "max")
        output = L.concat(input=[cat2, cat1], axis=-1)

        output = L.fc(output, size=self.hidden_size, act="relu")
        output = L.dropout(output, dropout_prob=self.dropout_ratio)
        output = L.fc(output, size=self.hidden_size // 2, act="relu")
        output = L.fc(output,
                      size=self.num_classes,
                      act=None,
                      param_attr=fluid.ParamAttr(name="final_fc"))

        self.labels = L.cast(self.labels, dtype="float32")
        loss = L.sigmoid_cross_entropy_with_logits(x=output, label=self.labels)
        self.loss = L.mean(loss)
        pred = L.sigmoid(output)
        self.pred = L.argmax(x=pred, axis=-1)
        correct = L.equal(self.pred, self.labels_1dim)
        correct = L.cast(correct, dtype="int32")
        self.correct = L.reduce_sum(correct)
示例#22
0
def fluid_get_offset(seq_len):
    """
    args:
        seq_len: (-1)
    return:
        offset: the same shape as seq_len,
            cumsum(seq_len) - seq_len 
    """
    assert len(seq_len.shape) == 1
    csum = layers.cumsum(layers.cast(seq_len, 'float32'), exclusive=True)
    return layers.cast(csum, 'int64')
示例#23
0
def uniq_edges(src, dst, num_nodes):
    sorted_dst = L.cast(dst, dtype="int64")
    sorted_src = L.cast(src, dtype="int64")
    num_nodes = L.cast(num_nodes, dtype="int64")
    edge_hash = sorted_dst * num_nodes + sorted_src
    edge_hash, _ = L.argsort(edge_hash)
    edge_hash, _ = L.unique(edge_hash, dtype="int64")
    sorted_src = L.elementwise_mod(edge_hash, num_nodes)
    sorted_dst = L.elementwise_div(edge_hash, num_nodes)
    sorted_src = L.cast(sorted_src, dtype="int32")
    sorted_dst = L.cast(sorted_dst, dtype="int32")
    return sorted_src, sorted_dst
示例#24
0
    def forward(self, features):
        src_ids, sent_ids = features
        dtype = 'float16' if self.hparam['fp16'] else 'float32'
        zero = L.fill_constant([1], dtype='int64', value=0)
        input_mask = L.cast(L.logical_not(L.equal(src_ids, zero)), dtype) # assume pad id == 0
        #input_mask = L.unsqueeze(input_mask, axes=[2])
        d_shape = L.shape(src_ids)
        seqlen = d_shape[1]
        batch_size = d_shape[0]
        pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0])
        pos_ids = L.expand(pos_ids, [batch_size, 1])
        pos_ids = L.unsqueeze(pos_ids, axes=[2])
        pos_ids = L.cast(pos_ids, 'int64')
        pos_ids.stop_gradient = True
        input_mask.stop_gradient = True
        task_ids = L.zeros_like(src_ids) + self.hparam.task_id #this shit wont use at the moment
        task_ids.stop_gradient = True

        bert = ErnieModel(
            src_ids=src_ids,
            position_ids=pos_ids,
            sentence_ids=sent_ids,
            task_ids=task_ids,
            input_mask=input_mask,
            config=self.hparam,
            use_fp16=self.hparam['fp16']
        )

        cls_feats = bert.get_pooled_output()

        cls_feats = L.dropout(
            x=cls_feats,
            dropout_prob=0.1,
            dropout_implementation="upscale_in_train"
        )

        logits = L.fc(
            input=cls_feats,
            size=self.hparam['num_label'],
            param_attr=F.ParamAttr(
                name="cls_out_w",
                initializer=F.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=F.ParamAttr(
                name="cls_out_b", initializer=F.initializer.Constant(0.))
        )

        propeller.summary.histogram('pred', logits)

        if self.mode is propeller.RunMode.PREDICT:
            probs = L.softmax(logits)
            return probs
        else:
            return logits
    def build_program(self, dtype):
        with fluid.program_guard(self.main_program, self.startup_program):
            self.feed_vars = self._prepare_feed_vars([2, 2], dtype, 2)

            tmp_0 = layers.elementwise_add(self.feed_vars[0], self.feed_vars[1])
            tmp_1 = layers.cast(tmp_0, dtype="float64")
            tmp_2 = layers.cast(tmp_1, dtype="float32")

        self.append_gradients(tmp_2)

        self.num_fused_ops = 2
        self.fetch_list = [tmp_2, self.grad(tmp_0)]
示例#26
0
    def forward(self, q, k, v, lengths, speaker_embed, start_index, 
                force_monotonic=False, prev_coeffs=None, window=None):
        # add position encoding as an inductive bias 
        if self.has_bias: # multi-speaker model
            omega_q = 2 * F.sigmoid(
                F.squeeze(self.q_pos_affine(speaker_embed), axes=[-1]))
            omega_k = 2 * self.omega_initial * F.sigmoid(F.squeeze(
                self.k_pos_affine(speaker_embed), axes=[-1]))
        else: # single-speaker case
            batch_size = q.shape[0]
            omega_q = F.ones((batch_size, ), dtype="float32")
            omega_k = F.ones((batch_size, ), dtype="float32") * self.omega_default
        q += self.position_encoding_weight * positional_encoding(q, start_index, omega_q)
        k += self.position_encoding_weight * positional_encoding(k, 0, omega_k)

        q, k, v = self.q_affine(q), self.k_affine(k), self.v_affine(v)
        activations = F.matmul(q, k, transpose_y=True)
        activations /= np.sqrt(self.attention_dim)

        if self.training:
            # mask the <pad> parts from the encoder
            mask = F.sequence_mask(lengths, dtype="float32")
            attn_bias = F.scale(1. - mask, -1000)
            activations += F.unsqueeze(attn_bias, [1])
        elif force_monotonic:
            assert window is not None
            backward_step, forward_step = window
            T_enc = k.shape[1]
            batch_size, T_dec, _ = q.shape

            # actually T_dec = 1 here
            alpha = F.fill_constant((batch_size, T_dec), value=0, dtype="int64") \
                   if prev_coeffs is None \
                   else F.argmax(prev_coeffs, axis=-1)
            backward = F.sequence_mask(alpha - backward_step, maxlen=T_enc, dtype="bool")
            forward = F.sequence_mask(alpha + forward_step, maxlen=T_enc, dtype="bool")
            mask = F.cast(F.logical_xor(backward, forward), "float32")
            # print("mask's shape:", mask.shape)
            attn_bias = F.scale(1. - mask, -1000)
            activations += attn_bias

        # softmax
        coefficients = F.softmax(activations, axis=-1)
        # context vector
        coefficients = F.dropout(coefficients, 1. - self.keep_prob,
                                 dropout_implementation='upscale_in_train')
        contexts = F.matmul(coefficients, v)
        # context normalization
        enc_lengths = F.cast(F.unsqueeze(lengths, axes=[1, 2]), "float32")
        contexts *= F.sqrt(enc_lengths)
        # out affine
        contexts = self.out_affine(contexts)
        return contexts, coefficients
示例#27
0
    def forward(self, features):
        def FC(inputs, name, i, act):
            return L.fc(inputs,
                        self.hidden_size,
                        act=act,
                        param_attr=F.ParamAttr(
                            name='%s.fc.w_%d' % (name, i),
                            initializer=F.initializer.XavierInitializer(
                                fan_in=self.hidden_size,
                                fan_out=self.hidden_size)),
                        bias_attr=F.ParamAttr(
                            name='%s.fc.b_%d' % (name, i),
                            initializer=F.initializer.Constant(0.)))

        title_ids, comment_ids = features

        embedding_attr = F.ParamAttr(
            name='emb',
            initializer=F.initializer.XavierInitializer(
                fan_in=self.vocab_size, fan_out=self.embedding_size))

        title_encoded = L.embedding(title_ids,
                                    [self.vocab_size, self.embedding_size],
                                    param_attr=embedding_attr)
        comment_encoded = L.embedding(comment_ids,
                                      [self.vocab_size, self.embedding_size],
                                      param_attr=embedding_attr)

        # Vsum
        zero = L.fill_constant(shape=[1], dtype='int64', value=0)
        title_pad = L.cast(L.logical_not(L.equal(title_ids, zero)), 'float32')
        comment_pad = L.cast(L.logical_not(L.equal(comment_ids, zero)),
                             'float32')

        title_encoded = L.reduce_sum(title_encoded * title_pad, dim=1)
        title_encoded = L.softsign(title_encoded)
        comment_encoded = L.reduce_sum(comment_encoded * comment_pad, dim=1)
        comment_encoded = L.softsign(comment_encoded)

        for i in range(self.num_layers):
            title_encoded = FC(title_encoded, 'title', i, 'tanh')

        for i in range(self.num_layers):
            comment_encoded = FC(comment_encoded, 'comment', i, 'tanh')

        score = L.reduce_sum(title_encoded * comment_encoded,
                             dim=1,
                             keep_dim=True) / np.sqrt(self.hidden_size)
        if self.mode is propeller.RunMode.PREDICT:
            probs = L.sigmoid(score)
            return probs
        else:
            return score
示例#28
0
    def ffffffffffffffffffff(self, pred, target):
        '''
        输入矩形的格式是cx cy w h
        '''
        assert pred.shape[0] == target.shape[0]

        pred = L.reshape(pred, [-1, 4])
        target = L.reshape(target, [-1, 4])

        pred = L.cast(pred, 'float32')
        target = L.cast(target, 'float32')

        # 相交矩形左上角坐标
        tl = L.elementwise_max((pred[:, :2] - pred[:, 2:] / 2),
                               (target[:, :2] - target[:, 2:] / 2))
        # 相交矩形右下角坐标
        br = L.elementwise_min((pred[:, :2] + pred[:, 2:] / 2),
                               (target[:, :2] + target[:, 2:] / 2))

        area_p = paddle.prod(pred[:, 2:], 1)  # 预测框的面积
        area_g = paddle.prod(target[:, 2:], 1)  # gt框的面积

        # 相交矩形是否存在?
        # en = (tl < br).type(tl.type()).prod(dim=1)
        en = L.cast(tl < br, 'float32')
        en = paddle.prod(en, 1)  # 相交矩形是否存在?

        area_i = paddle.prod(br - tl, 1) * en
        area_u = area_p + area_g - area_i
        iou = (area_i) / (area_u + 1e-16)

        if self.loss_type == "iou":
            loss = 1 - iou**2
        elif self.loss_type == "giou":
            c_tl = L.elementwise_min((pred[:, :2] - pred[:, 2:] / 2),
                                     (target[:, :2] - target[:, 2:] / 2))
            c_br = L.elementwise_max((pred[:, :2] + pred[:, 2:] / 2),
                                     (target[:, :2] + target[:, 2:] / 2))
            area_c = paddle.prod(c_br - c_tl, 1)

            # area_c限制在区间[1e-16, np.inf]内
            area_c = L.clip(area_c, 1e-16, np.inf)
            giou = iou - (area_c - area_u) / area_c
            # giou限制在区间[-1.0, 1.0]内
            giou = L.clip(giou, -1.0, 1.0)
            loss = 1 - giou
        if self.reduction == "mean":
            loss = loss.mean()
        elif self.reduction == "sum":
            loss = loss.sum()

        return loss
示例#29
0
def gen_bias(encoder_inputs, decoder_inputs, step):
    decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2]
    attn_bias = L.reshape(L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1])
    decoder_bias = L.cast((L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.),
                          'float32')  #[1, 1, decoderlen, decoderlen]
    encoder_bias = L.unsqueeze(L.cast(L.ones_like(encoder_inputs), 'float32'), [1])  #[bsz, 1, encoderlen]
    encoder_bias = L.expand(encoder_bias, [1, decoder_seqlen, 1])  #[bsz,decoderlen, encoderlen]
    decoder_bias = L.expand(decoder_bias, [decoder_bsz, 1, 1])  #[bsz, decoderlen, decoderlen]
    if step > 0:
        bias = L.concat([encoder_bias, L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias], -1)
    else:
        bias = L.concat([encoder_bias, decoder_bias], -1)
    return bias
示例#30
0
文件: layers.py 项目: Yelrose/PGL
def sag_pool(gw, feature, ratio, graph_id, dataset, name, activation=L.tanh):
    """Implementation of self-attention graph pooling (SAGPool)

    This is an implementation of the paper SELF-ATTENTION GRAPH POOLING
    (https://arxiv.org/pdf/1904.08082.pdf)

    Args:
        gw: Graph wrapper object.

        feature: A tensor with shape (num_nodes, feature_size).

        ratio: The pooling ratio of nodes we want to select.

        graph_id: The graphs that the nodes belong to. 

        dataset: To differentiate FRANKENSTEIN dataset and other datasets.

        name: The name of SAGPool layer.
        
        activation: The activation function.

    Return:
        new_feature: A tensor with shape (num_nodes, feature_size), and the unselected
                     nodes' feature is masked by zero.

        ratio_length: The selected node numbers of each graph.

    """
    if dataset == "FRANKENSTEIN":
        gcn_ = gcn
    else:
        gcn_ = norm_gcn

    score = gcn_(gw=gw,
                 feature=feature,
                 hidden_size=1,
                 activation=None,
                 norm=gw.node_feat["norm"],
                 name=name)
    score = L.squeeze(score, axes=[])
    perm, ratio_length = topk_pool(gw, score, graph_id, ratio)

    mask = L.zeros_like(score)
    mask = L.cast(mask, dtype="float32")
    updates = L.ones_like(perm)
    updates = L.cast(updates, dtype="float32")
    mask = L.scatter(mask, perm, updates)
    new_feature = L.elementwise_mul(feature, mask, axis=0)
    temp_score = activation(score)
    new_feature = L.elementwise_mul(new_feature, temp_score, axis=0)
    return new_feature, ratio_length