Exemplo n.º 1
0
def rpn_anchor_target(anchors,
                      gt_boxes,
                      rpn_batch_size_per_im,
                      rpn_positive_overlap,
                      rpn_negative_overlap,
                      rpn_fg_fraction,
                      use_random=True,
                      batch_size=1,
                      weights=[1., 1., 1., 1.]):
    tgt_labels = []
    tgt_bboxes = []

    tgt_deltas = []
    for i in range(batch_size):
        gt_bbox = gt_boxes[i]

        # Step1: match anchor and gt_bbox
        matches, match_labels, matched_vals = label_box(
            anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True)
        # Step2: sample anchor 
        fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im,
                                            rpn_fg_fraction, 0, use_random)
        # Fill with the ignore label (-1), then set positive and negative labels
        labels = paddle.full(match_labels.shape, -1, dtype='int32')
        labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds))
        labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds))
        # Step3: make output  
        matched_gt_boxes = paddle.gather(gt_bbox, matches)

        tgt_delta = bbox2delta(anchors, matched_gt_boxes, weights)
        labels.stop_gradient = True
        matched_gt_boxes.stop_gradient = True
        tgt_delta.stop_gradient = True
        tgt_labels.append(labels)
        tgt_bboxes.append(matched_gt_boxes)
        tgt_deltas.append(tgt_delta)

    return tgt_labels, tgt_bboxes, tgt_deltas
Exemplo n.º 2
0
def batch_distance2bbox(points, distance, max_shapes=None):
    """Decode distance prediction to bounding box for batch.
    Args:
        points (Tensor): [B, ..., 2], "xy" format
        distance (Tensor): [B, ..., 4], "ltrb" format
        max_shapes (Tensor): [B, 2], "h,w" format, Shape of the image.
    Returns:
        Tensor: Decoded bboxes, "x1y1x2y2" format.
    """
    lt, rb = paddle.split(distance, 2, -1)
    # while tensor add parameters, parameters should be better placed on the second place
    x1y1 = -lt + points
    x2y2 = rb + points
    out_bbox = paddle.concat([x1y1, x2y2], -1)
    if max_shapes is not None:
        max_shapes = max_shapes.flip(-1).tile([1, 2])
        delta_dim = out_bbox.ndim - max_shapes.ndim
        for _ in range(delta_dim):
            max_shapes.unsqueeze_(1)
        out_bbox = paddle.where(out_bbox < max_shapes, out_bbox, max_shapes)
        out_bbox = paddle.where(out_bbox > 0, out_bbox,
                                paddle.zeros_like(out_bbox))
    return out_bbox
Exemplo n.º 3
0
def aggr(batch, y, nxt_y, y0, alpha):
    pred = graph.predecessor(batch.numpy())
    self_label = paddle.to_tensor(y[batch.numpy()])
    self_label0 = paddle.to_tensor(y0[batch.numpy()])
    pred_id = []
    for n, p in enumerate(pred):
        if len(p) > 0:
            pred_id.append(np.ones(len(p)) * n)
    pred_cat = np.concatenate(pred)
    pred_id_cat = paddle.to_tensor(np.concatenate(pred_id), dtype="int64")
    pred_cat_pd = paddle.to_tensor(pred_cat)

    pred_label = paddle.to_tensor(y[pred_cat])

    pred_norm = paddle.gather(indegree, pred_cat_pd)
    self_norm = paddle.gather(indegree, paddle.to_tensor(batch, dtype="int64"))

    others = paddle.zeros_like(self_label)
    others = paddle.scatter(others, pred_id_cat, pred_label)
    others = (1 - alpha) * (others + self_label
                            ) * self_norm + alpha * self_label0
    others = others / paddle.sum(others, -1, keepdim=True)
    nxt_y[batch] = others.numpy()
Exemplo n.º 4
0
 def __call__(self, bboxes, scores):
     attrs = {
         'background_label': -1,
         'score_threshold': self.score_threshold,
         'nms_top_k': -1,
         'nms_threshold': self.nms_threshold,
         'keep_top_k': self.keep_top_k,
         'nms_eta': 1.0,
         'normalized': False,
         'return_index': True
     }
     output, nms_rois_num, index = multiclass_nms(bboxes, scores, **attrs)
     clas = paddle.slice(output, axes=[1], starts=[0], ends=[1])
     clas = paddle.cast(clas, dtype="int64")
     index = paddle.cast(index, dtype="int64")
     if bboxes.shape[0] == 1:
         batch = paddle.zeros_like(clas, dtype="int64")
     else:
         bboxes_count = bboxes.shape[1]
         batch = paddle.divide(index, bboxes_count)
         index = paddle.mod(index, bboxes_count)
     res = paddle.concat([batch, clas, index], axis=1)
     return res
Exemplo n.º 5
0
 def forward(self, inputs):
     querys, keys, sess_length = inputs
     #assert(type(sess_length) == paddle.Tensor), f"At Attention SequencePoolingLayer expected inputs[2]'s type is paddle.Tensor, but got {type(sess_length)}"
     keys_length = keys.shape[1]
     key_masks = nn.functional.sequence_mask(sess_length, keys_length)
     querys = paddle.tile(querys.unsqueeze(1), [1, keys_length, 1])
     att_input = paddle.concat([querys, keys, querys - keys, querys * keys],
                               axis=-1)
     for i, layer in enumerate(self.layers):
         att_input = layer(att_input)
         #att_input = self.bn_layer[i](att_input)  # BatchNomalization
         att_input = self.activation(att_input)  # activation
     att_score = self.dnn(att_input)  # (N, 50, 1)
     att_score = paddle.transpose(att_score, [0, 2, 1])  # (N, 1, 50)
     if self.weight_normalization:
         paddings = paddle.ones_like(att_score) * (-2**32 + 1)
     else:
         paddings = paddle.zeros_like(att_score)
     att_score = paddle.where(
         key_masks.unsqueeze(1) == 1, att_score, paddings
     )  # key_masks.unsqueeze in order to keep shape same as att_score
     att_score = self.soft(att_score)
     out = paddle.matmul(att_score, keys)
     return out
Exemplo n.º 6
0
def drop_head(x, drop_n_heads, training=True):
    """Drop n context vectors from multiple ones.

    Parameters
    ----------
    x : Tensor [shape=(batch_size, num_heads, time_steps, channels)]
        The input, multiple context vectors.
        
    drop_n_heads : int [0<= drop_n_heads <= num_heads]
        Number of vectors to drop.
        
    training : bool
        A flag indicating whether it is in training. If `False`, no dropout is 
        applied.

    Returns
    -------
    Tensor
        The output.
    """
    if not training or (drop_n_heads == 0):
        return x

    batch_size, num_heads, _, _ = x.shape
    # drop all heads
    if num_heads == drop_n_heads:
        return paddle.zeros_like(x)

    mask = np.ones([batch_size, num_heads])
    mask[:, :drop_n_heads] = 0
    for subarray in mask:
        np.random.shuffle(subarray)
    scale = float(num_heads) / (num_heads - drop_n_heads)
    mask = scale * np.reshape(mask, [batch_size, num_heads, 1, 1])
    out = x * paddle.to_tensor(mask)
    return out
Exemplo n.º 7
0
Arquivo: model.py Projeto: Yelrose/PGL
    def forward(self, src, dsts):
        # src [b, 1]
        # dsts [b, 1+neg]

        src_embed = self.emb(src)
        dsts_embed = self.emb(dsts)

        pos_embed = dsts_embed[:, 0:1]
        neg_embed = dsts_embed[:, 1:]

        pos_logits = paddle.matmul(src_embed, pos_embed,
                                   transpose_y=True)  # [batch_size, 1, 1]

        neg_logits = paddle.matmul(
            src_embed, neg_embed, transpose_y=True)  # [batch_size, 1, neg_num]

        ones_label = paddle.ones_like(pos_logits)
        pos_loss = self.loss(pos_logits, ones_label)

        zeros_label = paddle.zeros_like(neg_logits)
        neg_loss = self.loss(neg_logits, zeros_label)

        loss = (pos_loss + neg_loss) / 2
        return loss
Exemplo n.º 8
0
    def post_process(self, bboxes, bbox_num, im_shape, scale_factor):
        """
        Rescale, clip and filter the bbox from the output of NMS to
        get final prediction.

        Args:
            bboxes(Tensor): bboxes [N, 8]
            bbox_num(Tensor): bbox_num
            im_shape(Tensor): [1 2]
            scale_factor(Tensor): [1 2]
        Returns:
            bbox_pred(Tensor): The output is the prediction with shape [N, 8]
                               including labels, scores and bboxes. The size of
                               bboxes are corresponding to the original image.
        """

        origin_shape = paddle.floor(im_shape / scale_factor + 0.5)

        origin_h = origin_shape[0]
        origin_w = origin_shape[1]

        bboxes[:, 0::2] = bboxes[:, 0::2] / scale_factor[0]
        bboxes[:, 1::2] = bboxes[:, 1::2] / scale_factor[1]

        zeros = paddle.zeros_like(origin_h)
        x1 = paddle.maximum(paddle.minimum(bboxes[:, 0], origin_w - 1), zeros)
        y1 = paddle.maximum(paddle.minimum(bboxes[:, 1], origin_h - 1), zeros)
        x2 = paddle.maximum(paddle.minimum(bboxes[:, 2], origin_w - 1), zeros)
        y2 = paddle.maximum(paddle.minimum(bboxes[:, 3], origin_h - 1), zeros)
        x3 = paddle.maximum(paddle.minimum(bboxes[:, 4], origin_w - 1), zeros)
        y3 = paddle.maximum(paddle.minimum(bboxes[:, 5], origin_h - 1), zeros)
        x4 = paddle.maximum(paddle.minimum(bboxes[:, 6], origin_w - 1), zeros)
        y4 = paddle.maximum(paddle.minimum(bboxes[:, 7], origin_h - 1), zeros)
        bbox = paddle.stack([x1, y1, x2, y2, x3, y3, x4, y4], axis=-1)
        bboxes = (bbox, bbox_num)
        return bboxes
Exemplo n.º 9
0
    def forward(self, input_ids, token_type_ids=None, position_ids=None):
        if position_ids is None:
            # maybe need use shape op to unify static graph and dynamic graph
            ones = paddle.ones_like(input_ids, dtype="int64")
            seq_length = paddle.cumsum(ones, axis=-1)
            position_ids = seq_length - ones
            position_ids.stop_gradient = True

        input_embedings = self.word_embeddings(input_ids)
        position_embeddings = self.position_embeddings(position_ids)
        embeddings = input_embedings + position_embeddings
        if self.type_vocab_size != 0:
            if token_type_ids is None:
                token_type_ids = paddle.zeros_like(input_ids, dtype="int64")
            token_type_embeddings = self.token_type_embeddings(token_type_ids)
            embeddings += token_type_embeddings
        elif token_type_ids is not None:
            logger.warning(
                "There is no need to pass the token type ids to SKEP based on RoBERTa model."
                "The input token type ids will be ignored.")

        embeddings = self.layer_norm(embeddings)
        embeddings = self.dropout(embeddings)
        return embeddings
Exemplo n.º 10
0
    def forward(self, input_ids, token_type_ids=None, position_ids=None):
        if position_ids is None:
            ones = paddle.ones_like(input_ids, dtype="int64")
            seq_length = paddle.cumsum(ones, axis=-1)

            content_len = paddle.shape(input_ids)[1] - self.cls_num
            position_ids = paddle.concat([
                paddle.zeros(shape=[self.cls_num], dtype="int64"),
                paddle.linspace(1, content_len, content_len, dtype="int64")
            ])
            position_ids.stop_gradient = True
        if token_type_ids is None:
            token_type_ids = paddle.zeros_like(input_ids, dtype="int64")

        input_embedings = self.word_embeddings(input_ids)
        position_embeddings = self.position_embeddings(position_ids)
        token_type_embeddings = self.token_type_embeddings(token_type_ids)

        embeddings = input_embedings + token_type_embeddings + position_embeddings

        embeddings = self.layer_norm(embeddings)
        embeddings = self.dropout(embeddings)

        return embeddings
Exemplo n.º 11
0
    def forward(self, features, num_voxels, coors):

        # Find distance of x, y, and z from cluster center
        points_mean = features[:, :, :3].sum(
            axis=1, keepdim=True) / num_voxels.astype(features.dtype).reshape(
                (-1, 1, 1))
        f_cluster = features[:, :, :3] - points_mean

        # Find distance of x, y, and z from pillar center
        f_center = paddle.zeros_like(features[:, :, :2])
        f_center[:, :, 0] = features[:, :, 0] - (
            coors[:, 3].astype("float32").unsqueeze(1) * self.vx +
            self.x_offset)
        f_center[:, :, 1] = features[:, :, 1] - (
            coors[:, 2].astype("float32").unsqueeze(1) * self.vy +
            self.y_offset)

        # Combine together feature decorations
        features_ls = [features, f_cluster, f_center]
        if self._with_distance:
            points_dist = paddle.norm(features[:, :, :3], 2, 2, keepdim=True)
            features_ls.append(points_dist)
        features = paddle.concat(features_ls, axis=-1)

        # The feature decorations were calculated without regard to whether pillar was empty. Need to ensure that
        # empty pillars remain set to zeros.
        voxel_count = features.shape[1]
        mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
        mask = paddle.unsqueeze(mask, -1).astype(features.dtype)
        features *= mask

        # Forward pass through PFNLayers
        for pfn in self.pfn_layers:
            features = pfn(features)

        return features.squeeze()
Exemplo n.º 12
0
 def forward(self, inputs):
     """
     forward
     """
     x = paddle.zeros_like(inputs, dtype=None, name=None)
     return x
Exemplo n.º 13
0
 def _criterion(loss_func, logit, is_real):
     if is_real:
         target = paddle.ones_like(logit)
     else:
         target = paddle.zeros_like(logit)
     return loss_func(logit, target)
Exemplo n.º 14
0
for epoch in range(EPOCHS):
    for data in tqdm(data_loader_train):
        real_A, real_B = data
        
        optimizerD.clear_grad()
        # D([real_A, real_B])
        real_AB = paddle.concat((real_A, real_B), 1)
        d_real_predict = discriminator(real_AB)
        d_real_loss = bce_loss(d_real_predict, paddle.ones_like(d_real_predict))

        # D([real_A, fake_B])
        fake_B = generator(real_A).detach()
        fake_AB = paddle.concat((real_A, fake_B), 1)
        d_fake_predict = discriminator(fake_AB)
        d_fake_loss = bce_loss(d_fake_predict, paddle.zeros_like(d_fake_predict))
        
        # train D
        d_loss = (d_real_loss + d_fake_loss) / 2.
        d_loss.backward()
        optimizerD.step()

        optimizerG.clear_grad()
        # D([real_A, fake_B])
        fake_B = generator(real_A)
        fake_AB = paddle.concat((real_A, fake_B), 1)
        g_fake_predict = discriminator(fake_AB)
        g_bce_loss = bce_loss(g_fake_predict, paddle.ones_like(g_fake_predict))
        g_l1_loss = l1_loss(fake_B, real_B) * 100.
        g_loss = g_bce_loss + g_l1_loss *1.
        
Exemplo n.º 15
0
    def int_seghead(self,
                    ref_frame_embedding=None,
                    ref_scribble_label=None,
                    prev_round_label=None,
                    normalize_nearest_neighbor_distances=True,
                    global_map_tmp_dic=None,
                    local_map_dics=None,
                    interaction_num=None,
                    seq_names=None,
                    gt_ids=None,
                    k_nearest_neighbors=1,
                    frame_num=None,
                    first_inter=True):
        dic_tmp = {}
        bs, c, h, w = ref_frame_embedding.shape
        scale_ref_scribble_label = paddle.nn.functional.interpolate(
            float_(ref_scribble_label), size=(h, w), mode='nearest')
        scale_ref_scribble_label = int_(scale_ref_scribble_label)
        if not first_inter:
            scale_prev_round_label = paddle.nn.functional.interpolate(
                float_(prev_round_label), size=(h, w), mode='nearest')
            scale_prev_round_label = int_(scale_prev_round_label)
        n_chunks = 500
        for n in range(bs):

            gt_id = paddle.arange(0, gt_ids[n] + 1)

            gt_id = int_(gt_id)

            seq_ref_frame_embedding = ref_frame_embedding[n]

            ########################Local dist map
            seq_ref_frame_embedding = paddle.transpose(seq_ref_frame_embedding,
                                                       [1, 2, 0])
            seq_ref_scribble_label = paddle.transpose(
                scale_ref_scribble_label[n], [1, 2, 0])
            nn_features_n = local_previous_frame_nearest_neighbor_features_per_object(
                prev_frame_embedding=seq_ref_frame_embedding,
                query_embedding=seq_ref_frame_embedding,
                prev_frame_labels=seq_ref_scribble_label,
                gt_ids=gt_id,
                max_distance=self.cfg['model_max_local_distance'])

            #######
            ######################Global map update
            if seq_names[n] not in global_map_tmp_dic:
                global_map_tmp_dic[seq_names[n]] = paddle.ones_like(
                    nn_features_n).tile([1000, 1, 1, 1, 1])
            nn_features_n_ = paddle.where(
                nn_features_n <=
                global_map_tmp_dic[seq_names[n]][frame_num[n]].unsqueeze(0),
                nn_features_n,
                global_map_tmp_dic[seq_names[n]][frame_num[n]].unsqueeze(0))

            ###

            ###
            #             print('detach 3')
            # nn_features_n_ = nn_features_n_.detach()
            global_map_tmp_dic[seq_names[n]][
                frame_num[n]] = nn_features_n_.detach()[0]
            ##################Local map update
            if local_map_dics is not None:
                local_map_tmp_dic, local_map_dist_dic = local_map_dics
                if seq_names[n] not in local_map_dist_dic:
                    local_map_dist_dic[seq_names[n]] = paddle.zeros([1000, 9])
                if seq_names[n] not in local_map_tmp_dic:
                    local_map_tmp_dic[seq_names[n]] = paddle.ones_like(
                        nn_features_n).unsqueeze(0).tile([1000, 9, 1, 1, 1, 1])
                local_map_dist_dic[seq_names[n]][frame_num[n]][interaction_num
                                                               - 1] = 0

                local_map_dics = (local_map_tmp_dic, local_map_dist_dic)

            ##################
            to_cat_current_frame_embedding = ref_frame_embedding[n].unsqueeze(
                0).tile((gt_id.shape[0], 1, 1, 1))
            to_cat_nn_feature_n = nn_features_n.squeeze(0).transpose(
                [2, 3, 0, 1])

            to_cat_scribble_mask_to_cat = (
                float_(seq_ref_scribble_label) == float_(gt_id)
            )  # float comparision?
            to_cat_scribble_mask_to_cat = float_(
                to_cat_scribble_mask_to_cat.unsqueeze(-1).transpose(
                    [2, 3, 0, 1]))
            if not first_inter:
                seq_prev_round_label = scale_prev_round_label[n].transpose(
                    [1, 2, 0])

                to_cat_prev_round_to_cat = (
                    float_(seq_prev_round_label) == float_(gt_id)
                )  # float comparision?
                to_cat_prev_round_to_cat = float_(
                    to_cat_prev_round_to_cat.unsqueeze(-1).transpose(
                        [2, 3, 0, 1]))
            else:
                to_cat_prev_round_to_cat = paddle.zeros_like(
                    to_cat_scribble_mask_to_cat)
                to_cat_prev_round_to_cat[0] = 1.

            to_cat = paddle.concat(
                (to_cat_current_frame_embedding, to_cat_scribble_mask_to_cat,
                 to_cat_prev_round_to_cat), 1)

            pred_ = self.inter_seghead(to_cat)
            pred_ = pred_.transpose([1, 0, 2, 3])
            dic_tmp[seq_names[n]] = pred_
        if local_map_dics is None:
            return dic_tmp
        else:
            return dic_tmp, local_map_dics
Exemplo n.º 16
0
    def get_pred(self, bboxes, bbox_num, im_shape, scale_factor):
        """
        Rescale, clip and filter the bbox from the output of NMS to 
        get final prediction. 
        
        Notes:
        Currently only support bs = 1.

        Args:
            bboxes (Tensor): The output bboxes with shape [N, 6] after decode
                and NMS, including labels, scores and bboxes.
            bbox_num (Tensor): The number of prediction boxes of each batch with
                shape [1], and is N.
            im_shape (Tensor): The shape of the input image.
            scale_factor (Tensor): The scale factor of the input image.
        Returns:
            pred_result (Tensor): The final prediction results with shape [N, 6]
                including labels, scores and bboxes.
        """

        bboxes_list = []
        bbox_num_list = []
        id_start = 0
        # add fake bbox when output is empty for each batch
        for i in range(bbox_num.shape[0]):
            if bbox_num[i] == 0:
                bboxes_i = self.fake_bboxes
                bbox_num_i = self.fake_bbox_num
                id_start += 1
            else:
                bboxes_i = bboxes[id_start:id_start + bbox_num[i], :]
                bbox_num_i = bbox_num[i]
                id_start += bbox_num[i]
            bboxes_list.append(bboxes_i)
            bbox_num_list.append(bbox_num_i)
        bboxes = paddle.concat(bboxes_list)
        bbox_num = paddle.concat(bbox_num_list)

        origin_shape = paddle.floor(im_shape / scale_factor + 0.5)

        origin_shape_list = []
        scale_factor_list = []
        # scale_factor: scale_y, scale_x
        for i in range(bbox_num.shape[0]):
            expand_shape = paddle.expand(origin_shape[i:i + 1, :],
                                         [bbox_num[i], 2])
            scale_y, scale_x = scale_factor[i][0], scale_factor[i][1]
            scale = paddle.concat([scale_x, scale_y, scale_x, scale_y])
            expand_scale = paddle.expand(scale, [bbox_num[i], 4])
            origin_shape_list.append(expand_shape)
            scale_factor_list.append(expand_scale)

        self.origin_shape_list = paddle.concat(origin_shape_list)
        scale_factor_list = paddle.concat(scale_factor_list)

        # bboxes: [N, 6], label, score, bbox
        pred_label = bboxes[:, 0:1]
        pred_score = bboxes[:, 1:2]
        pred_bbox = bboxes[:, 2:]
        # rescale bbox to original image
        scaled_bbox = pred_bbox / scale_factor_list
        origin_h = self.origin_shape_list[:, 0]
        origin_w = self.origin_shape_list[:, 1]
        zeros = paddle.zeros_like(origin_h)
        # clip bbox to [0, original_size]
        x1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 0], origin_w), zeros)
        y1 = paddle.maximum(paddle.minimum(scaled_bbox[:, 1], origin_h), zeros)
        x2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 2], origin_w), zeros)
        y2 = paddle.maximum(paddle.minimum(scaled_bbox[:, 3], origin_h), zeros)
        pred_bbox = paddle.stack([x1, y1, x2, y2], axis=-1)
        # filter empty bbox
        keep_mask = nonempty_bbox(pred_bbox, return_mask=True)
        keep_mask = paddle.unsqueeze(keep_mask, [1])
        pred_label = paddle.where(keep_mask, pred_label,
                                  paddle.ones_like(pred_label) * -1)
        pred_result = paddle.concat([pred_label, pred_score, pred_bbox], axis=1)
        return pred_result
Exemplo n.º 17
0
def where(condition, x=None, y=None, name=None):
    r"""
    Return a tensor of elements selected from either $x$ or $y$, depending on $condition$.

    **Note**:
        ``paddle.where(condition)`` is identical to ``paddle.nonzero(condition, as_tuple=True)``.

    .. math::

      out_i =
      \begin{cases}
      x_i, \quad  \text{if}  \ condition_i \  is \ True \\
      y_i, \quad  \text{if}  \ condition_i \  is \ False \\
      \end{cases}


    Args:
        condition(Tensor): The condition to choose x or y. When True(nonzero), yield x, otherwise yield y.
        x(Tensor or Scalar, optional): x is a Tensor or Scalar with data type float32, float64, int32, int64. Either both or neither of x and y should be given.
        y(Tensor or Scalar, optional): y is a Tensor or Scalar with data type float32, float64, int32, int64. Either both or neither of x and y should be given.

        name(str, optional): The default value is None. Normally there is no
            need for user to set this property. For more information, please
            refer to :ref:`api_guide_Name`.

    Returns:
        Tensor: A Tensor with the same data dype as x. 

    Examples:
        .. code-block:: python

          import paddle

          x = paddle.to_tensor([0.9383, 0.1983, 3.2, 1.2])
          y = paddle.to_tensor([1.0, 1.0, 1.0, 1.0])
          out = paddle.where(x>1, x, y)

          print(out)
          #out: [1.0, 1.0, 3.2, 1.2]

          out = paddle.where(x>1)
          print(out)
          #out: (Tensor(shape=[2, 1], dtype=int64, place=CPUPlace, stop_gradient=True,
          #            [[2],
          #             [3]]),)
    """
    if np.isscalar(x):
        x = paddle.full([1], x, np.array([x]).dtype.name)

    if np.isscalar(y):
        y = paddle.full([1], y, np.array([y]).dtype.name)

    if x is None and y is None:
        return nonzero(condition, as_tuple=True)

    if x is None or y is None:
        raise ValueError("either both or neither of x and y should be given")

    if not paddle.in_dynamic_mode():
        check_variable_and_dtype(condition, 'condition', ['bool'], 'where')
        check_variable_and_dtype(x, 'x',
                                 ['float32', 'float64', 'int32', 'int64'],
                                 'where')
        check_variable_and_dtype(y, 'y',
                                 ['float32', 'float64', 'int32', 'int64'],
                                 'where')

    condition_shape = list(condition.shape)
    x_shape = list(x.shape)
    y_shape = list(y.shape)

    if x_shape == y_shape and condition_shape == x_shape:
        broadcast_condition = condition
        broadcast_x = x
        broadcast_y = y
    else:
        if core.is_compiled_with_xpu():
            cond_int = paddle.cast(condition, x.dtype)
            cond_not_int = paddle.cast(logical_not(condition), x.dtype)
            out1 = paddle.multiply(x, cond_int)
            out2 = paddle.multiply(y, cond_not_int)
            out = paddle.add(out1, out2)
            return out

        zeros_like_x = paddle.zeros_like(x)
        zeros_like_y = paddle.zeros_like(y)
        zeros_like_condition = paddle.zeros_like(condition)
        zeros_like_condition = paddle.cast(zeros_like_condition, x.dtype)
        cast_cond = paddle.cast(condition, x.dtype)

        broadcast_zeros = paddle.add(zeros_like_x, zeros_like_y)
        broadcast_zeros = paddle.add(broadcast_zeros, zeros_like_condition)
        broadcast_x = paddle.add(x, broadcast_zeros)
        broadcast_y = paddle.add(y, broadcast_zeros)
        broadcast_condition = paddle.add(cast_cond, broadcast_zeros)
        broadcast_condition = paddle.cast(broadcast_condition, 'bool')

    if in_dygraph_mode():
        return _C_ops.final_state_where(broadcast_condition, broadcast_x,
                                        broadcast_y)
    else:
        if _in_legacy_dygraph():
            return _C_ops.where(broadcast_condition, broadcast_x, broadcast_y)
        else:
            helper = LayerHelper("where", **locals())
            out = helper.create_variable_for_type_inference(dtype=x.dtype)

            helper.append_op(type='where',
                             inputs={
                                 'Condition': broadcast_condition,
                                 'X': broadcast_x,
                                 'Y': broadcast_y
                             },
                             outputs={'Out': [out]})

            return out
Exemplo n.º 18
0
    def forward(self,
                src_ids,
                sent_ids=None,
                pos_ids=None,
                input_mask=None,
                attn_bias=None,
                past_cache=None,
                use_causal_mask=False):
        """
        Args:
            src_ids (`Variable` of shape `[batch_size, seq_len]`):
                Indices of input sequence tokens in the vocabulary.
            sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`):
                aka token_type_ids, Segment token indices to indicate first and second portions of the inputs.
                if None, assume all tokens come from `segment_a`
            pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`):
                Indices of positions of each input sequence tokens in the position embeddings.
            input_mask(optional `Variable` of shape `[batch_size, seq_len]`):
                Mask to avoid performing attention on the padding token indices of the encoder input.
            attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`):
                3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask
            past_cache(optional, tuple of two lists: cached key and cached value,
                each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`):
                cached key/value tensor that will be concated to generated key/value when performing self attention.
                if set, `attn_bias` should not be None.
        Returns:
            pooled (`Variable` of shape `[batch_size, hidden_size]`):
                output logits of pooler classifier
            encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`):
                output logits of transformer stack
            info (Dictionary):
                addtional middle level info, inclues: all hidden stats, k/v caches.
        """
        assert len(
            src_ids.shape
        ) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr(
            src_ids.shape))
        assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None'
        d_seqlen = paddle.shape(src_ids)[1]
        if pos_ids is None:
            pos_ids = paddle.arange(0, d_seqlen, 1,
                                    dtype='int32').reshape([1,
                                                            -1]).cast('int64')
        if attn_bias is None:
            if input_mask is None:
                input_mask = paddle.cast(src_ids != 0, 'float32')
            assert len(input_mask.shape) == 2
            input_mask = input_mask.unsqueeze(-1)
            attn_bias = input_mask.matmul(input_mask, transpose_y=True)
            if use_causal_mask:
                sequence = paddle.reshape(
                    paddle.arange(0, d_seqlen, 1, dtype='float32') + 1.,
                    [1, 1, -1, 1])
                causal_mask = (sequence.matmul(1. / sequence, transpose_y=True)
                               >= 1.).cast('float32')
                attn_bias *= causal_mask
        else:
            assert len(
                attn_bias.shape
            ) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape
        attn_bias = (1. - attn_bias) * -10000.0
        attn_bias = attn_bias.unsqueeze(1).tile([1, self.n_head, 1,
                                                 1])  # avoid broadcast =_=

        if sent_ids is None:
            sent_ids = paddle.zeros_like(src_ids)

        src_embedded = self.word_emb(src_ids)
        pos_embedded = self.pos_emb(pos_ids)
        sent_embedded = self.sent_emb(sent_ids)
        embedded = src_embedded + pos_embedded + sent_embedded

        embedded = self.dropout(self.ln(embedded))

        encoded, hidden_list, cache_list = self.encoder_stack(
            embedded, attn_bias, past_cache=past_cache)

        additional_info = {
            'hiddens': hidden_list,
            'caches': cache_list,
        }

        return encoded, additional_info
Exemplo n.º 19
0
def scatter_add_prim2orig(op, index_t, x, y):
    assert op.attr('axis') == 0, 'Only support axis==0 currently'
    zeros = paddle.zeros_like(x=x, dtype=x.dtype)
    tmp = paddle.scatter(x=zeros, index=index_t, updates=y, overwrite=False)
    return paddle.add(x, tmp)
Exemplo n.º 20
0
boxmul = (boxmax - boxmin) / 2.
boxplus = (boxmin + boxmax) / 2.

# output = model(paddle.to_tensor(img, dtype="float32", place=paddle.CUDAPlace(0)))
output = model(paddle.to_tensor(img, dtype="float32"))
orig_label = np.argmax(output)
print("orig_label={}".format(orig_label), type(orig_label))  #345

succ_flag = False
for outer_step in range(binary_search_steps):
    print("o_bestl2={} confidence={}".format(o_bestl2, confidence))

    #把原始图像转换成图像数据和扰动的形态
    timg = paddle.to_tensor(np.arctanh((img - boxplus) / boxmul * 0.999999),
                            dtype='float32')
    modifier = paddle.zeros_like(timg, dtype='float32')
    #print (type(modifier))
    # modifier = paddle.to_tensor(modifier, dtype='float32', place=paddle.CUDAPlace(0))
    #print (type(modifier))
    #图像数据的扰动量梯度可以获取
    import pdb
    pdb.set_trace()
    modifier.stop_gradient = False

    #设置为不保存梯度值 自然也无法修改
    for param in model.parameters():
        param.stop_gradient = True

    #定义优化器 仅优化modifier
    #optimizer = torch.optim.Adam([modifier], lr=learning_rate)
    optimizer = paddle.optimizer.Adam(learning_rate=learning_rate,
Exemplo n.º 21
0
    def forward(self, hidden, target, keep_order=False):
        assert (hidden.shape[0] == target.shape[0])

        if self.num_clusters == 0:
            logit = self._compute_logits(hidden, self.out_layers_weight[0],
                                         self.out_layers_bias[0],
                                         self.out_projs[0])
            nll = -paddle.log(F.softmax(logit, axis=-1))
            idx = paddle.concat(
                [
                    paddle.arange(0, nll.shape[0]).unsqueeze([1]),
                    target.unsqueeze(1)
                ],
                axis=1)
            nll = paddle.gather_nd(nll, idx)
        else:
            weights, biases = [], []
            for i in range(len(self.cutoffs)):
                if self.div_val == 1:
                    l_idx, r_idx = self.cutoff_ends[i], self.cutoff_ends[i + 1]
                    weight_i = self.out_layers_weight[0][l_idx:r_idx]
                    bias_i = self.out_layers_bias[0][l_idx:r_idx]
                else:
                    weight_i = self.out_layers_weight[i]
                    bias_i = self.out_layers_bias[i]

                if i == 0:
                    weight_i = paddle.concat(
                        [weight_i, self.cluster_weight], axis=0)
                    bias_i = paddle.concat([bias_i, self.cluster_bias], axis=0)

                weights.append(weight_i)
                biases.append(bias_i)

            head_weight, head_bias, head_proj = weights[0], biases[
                0], self.out_projs[0]

            head_logit = self._compute_logits(hidden, head_weight, head_bias,
                                              head_proj)
            head_logprob = paddle.log(F.softmax(head_logit, axis=-1))

            nll = paddle.zeros_like(target, dtype=hidden.dtype)

            offset = 0
            cutoff_values = [0] + self.cutoffs
            for i in range(len(cutoff_values) - 1):
                l_idx, r_idx = cutoff_values[i], cutoff_values[i + 1]

                mask_i = paddle.cast(
                    target >= l_idx,
                    dtype=paddle.get_default_dtype()) * paddle.cast(
                        target < r_idx, dtype="int64")
                indices_i = paddle.nonzero(mask_i).squeeze([1])

                if paddle.numel(indices_i) == 0:
                    continue
                target_i = paddle.gather(target, indices_i, axis=0) - l_idx
                head_logprob_i = paddle.gather(head_logprob, indices_i, axis=0)
                if i == 0:
                    target_i_idx = paddle.concat(
                        [
                            paddle.arange(0, head_logprob_i.shape[0]).unsqueeze(
                                [1]), target_i.unsqueeze([1])
                        ],
                        axis=1)
                    logprob_i = head_logprob_i.gather_nd(target_i_idx)
                else:
                    weight_i, bias_i, proj_i = weights[i], biases[
                        i], self.out_projs[i].weight if self.out_projs[
                            i] is not None else None

                    hidden_i = paddle.gather(hidden, indices_i, axis=0)

                    tail_logit_i = self._compute_logits(hidden_i, weight_i,
                                                        bias_i, proj_i)
                    tail_logprob_i = paddle.log(
                        F.softmax(
                            tail_logit_i, axis=-1))

                    target_i_idx = paddle.concat(
                        [
                            paddle.arange(0, tail_logprob_i.shape[0]).unsqueeze(
                                [1]), target_i.unsqueeze([1])
                        ],
                        axis=1)
                    logprob_i = tail_logprob_i.gather_nd(target_i_idx)

                    logprob_i = head_logprob_i[:, -i] + logprob_i

                if self.keep_order or keep_order:
                    nll = paddle.scatter(nll, indices_i, -logprob_i)
                else:
                    index = paddle.arange(offset, offset + logprob_i.shape[0],
                                          1)
                    nll = paddle.scatter(nll, index, -logprob_i)

                offset += logprob_i.shape[0]

        return nll
Exemplo n.º 22
0
def compute_fbank_matrix(sr: int,
                         n_fft: int,
                         n_mels: int = 128,
                         f_min: float = 0.0,
                         f_max: Optional[float] = None,
                         htk: bool = False,
                         norm: Union[str, float] = 'slaney',
                         dtype: str = 'float64') -> Tensor:
    """Compute fbank matrix.

    Parameters:
        sr(int): the audio sample rate.
        n_fft(int): the number of fft bins.
        n_mels(int): the number of Mel bins.
        f_min(float): the lower cut-off frequency, below which the filter response is zero.
        f_max(float): the upper cut-off frequency, above which the filter response is zero.
        htk: whether to use htk formula.
        return_complex(bool): whether to return complex matrix. If True, the matrix will
            be complex type. Otherwise, the real and image part will be stored in the last
            axis of returned tensor.
        dtype(str): the datatype of the returned fbank matrix.

    Returns:
        The fbank matrix of shape (n_mels, int(1+n_fft//2)).
    Shape:
        output: (n_mels, int(1+n_fft//2))
    Notes:
        This function is consistent with librosa.filters.mel().

    Examples:

        .. code-block:: python

        import paddle
        import paddleaudio.functional as F
        m = F.compute_fbank_matrix(16000, 512)
        print(m.shape)
        >>[128, 257]

    """

    if f_max is None:
        f_max = float(sr) / 2

    # Initialize the weights
    weights = paddle.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)

    # Center freqs of each FFT bin
    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft, dtype=dtype)

    # 'Center freqs' of mel bands - uniformly spaced between limits
    mel_f = mel_frequencies(n_mels + 2,
                            f_min=f_min,
                            f_max=f_max,
                            htk=htk,
                            dtype=dtype)

    fdiff = mel_f[1:] - mel_f[:-1]  #np.diff(mel_f)
    ramps = mel_f.unsqueeze(1) - fftfreqs.unsqueeze(0)
    #ramps = np.subtract.outer(mel_f, fftfreqs)

    for i in range(n_mels):
        # lower and upper slopes for all bins
        lower = -ramps[i] / fdiff[i]
        upper = ramps[i + 2] / fdiff[i + 1]

        # .. then intersect them with each other and zero
        weights[i] = paddle.maximum(paddle.zeros_like(lower),
                                    paddle.minimum(lower, upper))

    # Slaney-style mel is scaled to be approx constant energy per channel
    if norm == 'slaney':
        enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels])
        weights *= enorm.unsqueeze(1)
    elif isinstance(norm, int) or isinstance(norm, float):
        weights = paddle.nn.functional.normalize(weights, p=norm, axis=-1)

    return weights
Exemplo n.º 23
0
    def forward(self,
                anchor_bboxes,
                num_anchors_list,
                gt_labels,
                gt_bboxes,
                pad_gt_mask,
                bg_index,
                gt_scores=None,
                pred_bboxes=None):
        r"""This code is based on
            https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py

        The assignment is done in following steps
        1. compute iou between all bbox (bbox of all pyramid levels) and gt
        2. compute center distance between all bbox and gt
        3. on each pyramid level, for each gt, select k bbox whose center
           are closest to the gt center, so we total select k*l bbox as
           candidates for each gt
        4. get corresponding iou for the these candidates, and compute the
           mean and std, set mean + std as the iou threshold
        5. select these candidates whose iou are greater than or equal to
           the threshold as positive
        6. limit the positive sample's center in gt
        7. if an anchor box is assigned to multiple gts, the one with the
           highest iou will be selected.
        Args:
            anchor_bboxes (Tensor, float32): pre-defined anchors, shape(L, 4),
                    "xmin, xmax, ymin, ymax" format
            num_anchors_list (List): num of anchors in each level
            gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
            gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
            pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
            bg_index (int): background index
            gt_scores (Tensor|None, float32) Score of gt_bboxes,
                    shape(B, n, 1), if None, then it will initialize with one_hot label
            pred_bboxes (Tensor, float32, optional): predicted bounding boxes, shape(B, L, 4)
        Returns:
            assigned_labels (Tensor): (B, L)
            assigned_bboxes (Tensor): (B, L, 4)
            assigned_scores (Tensor): (B, L, C), if pred_bboxes is not None, then output ious
        """
        assert gt_labels.ndim == gt_bboxes.ndim and \
               gt_bboxes.ndim == 3

        num_anchors, _ = anchor_bboxes.shape
        batch_size, num_max_boxes, _ = gt_bboxes.shape

        # negative batch
        if num_max_boxes == 0:
            assigned_labels = paddle.full([batch_size, num_anchors],
                                          bg_index,
                                          dtype=gt_labels.dtype)
            assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
            assigned_scores = paddle.zeros(
                [batch_size, num_anchors, self.num_classes])
            return assigned_labels, assigned_bboxes, assigned_scores

        # 1. compute iou between gt and anchor bbox, [B, n, L]
        ious = iou_similarity(gt_bboxes.reshape([-1, 4]), anchor_bboxes)
        ious = ious.reshape([batch_size, -1, num_anchors])

        # 2. compute center distance between all anchors and gt, [B, n, L]
        gt_centers = bbox_center(gt_bboxes.reshape([-1, 4])).unsqueeze(1)
        anchor_centers = bbox_center(anchor_bboxes)
        gt2anchor_distances = (gt_centers - anchor_centers.unsqueeze(0)) \
            .norm(2, axis=-1).reshape([batch_size, -1, num_anchors])

        # 3. on each pyramid level, selecting topk closest candidates
        # based on the center distance, [B, n, L]
        is_in_topk, topk_idxs = self._gather_topk_pyramid(
            gt2anchor_distances, num_anchors_list, pad_gt_mask)

        # 4. get corresponding iou for the these candidates, and compute the
        # mean and std, 5. set mean + std as the iou threshold
        iou_candidates = ious * is_in_topk
        iou_threshold = paddle.index_sample(
            iou_candidates.flatten(stop_axis=-2),
            topk_idxs.flatten(stop_axis=-2))
        iou_threshold = iou_threshold.reshape([batch_size, num_max_boxes, -1])
        iou_threshold = iou_threshold.mean(axis=-1, keepdim=True) + \
                        iou_threshold.std(axis=-1, keepdim=True)
        is_in_topk = paddle.where(
            iou_candidates > iou_threshold.tile([1, 1, num_anchors]),
            is_in_topk, paddle.zeros_like(is_in_topk))

        # 6. check the positive sample's center in gt, [B, n, L]
        is_in_gts = check_points_inside_bboxes(anchor_centers, gt_bboxes)

        # select positive sample, [B, n, L]
        mask_positive = is_in_topk * is_in_gts * pad_gt_mask

        # 7. if an anchor box is assigned to multiple gts,
        # the one with the highest iou will be selected.
        mask_positive_sum = mask_positive.sum(axis=-2)
        if mask_positive_sum.max() > 1:
            mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
                [1, num_max_boxes, 1])
            is_max_iou = compute_max_iou_anchor(ious)
            mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
                                         mask_positive)
            mask_positive_sum = mask_positive.sum(axis=-2)
        # 8. make sure every gt_bbox matches the anchor
        if self.force_gt_matching:
            is_max_iou = compute_max_iou_gt(ious) * pad_gt_mask
            mask_max_iou = (is_max_iou.sum(-2, keepdim=True) == 1).tile(
                [1, num_max_boxes, 1])
            mask_positive = paddle.where(mask_max_iou, is_max_iou,
                                         mask_positive)
            mask_positive_sum = mask_positive.sum(axis=-2)
        assigned_gt_index = mask_positive.argmax(axis=-2)

        # assigned target
        batch_ind = paddle.arange(end=batch_size,
                                  dtype=gt_labels.dtype).unsqueeze(-1)
        assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
        assigned_labels = paddle.gather(gt_labels.flatten(),
                                        assigned_gt_index.flatten(),
                                        axis=0)
        assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
        assigned_labels = paddle.where(
            mask_positive_sum > 0, assigned_labels,
            paddle.full_like(assigned_labels, bg_index))

        assigned_bboxes = paddle.gather(gt_bboxes.reshape([-1, 4]),
                                        assigned_gt_index.flatten(),
                                        axis=0)
        assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])

        assigned_scores = F.one_hot(assigned_labels, self.num_classes + 1)
        ind = list(range(self.num_classes + 1))
        ind.remove(bg_index)
        assigned_scores = paddle.index_select(assigned_scores,
                                              paddle.to_tensor(ind),
                                              axis=-1)
        if pred_bboxes is not None:
            # assigned iou
            ious = batch_iou_similarity(gt_bboxes, pred_bboxes) * mask_positive
            ious = ious.max(axis=-2).unsqueeze(-1)
            assigned_scores *= ious
        elif gt_scores is not None:
            gather_scores = paddle.gather(gt_scores.flatten(),
                                          assigned_gt_index.flatten(),
                                          axis=0)
            gather_scores = gather_scores.reshape([batch_size, num_anchors])
            gather_scores = paddle.where(mask_positive_sum > 0, gather_scores,
                                         paddle.zeros_like(gather_scores))
            assigned_scores *= gather_scores.unsqueeze(-1)

        return assigned_labels, assigned_bboxes, assigned_scores
Exemplo n.º 24
0
    def __init__(self,
                 rank,
                 local_rank,
                 world_size,
                 batch_size,
                 resume,
                 margin_softmax,
                 num_classes,
                 sample_rate=1.0,
                 embedding_size=512,
                 prefix="./"):
        super(PartialFC, self).__init__()
        self.num_classes: int = num_classes
        self.rank: int = rank
        self.local_rank: int = local_rank
        self.world_size: int = world_size
        self.batch_size: int = batch_size
        self.margin_softmax: callable = margin_softmax
        self.sample_rate: float = sample_rate
        self.embedding_size: int = embedding_size
        self.prefix: str = prefix
        self.num_local: int = num_classes // world_size + int(
            rank < num_classes % world_size)
        self.class_start: int = num_classes // world_size * rank + min(
            rank, num_classes % world_size)
        self.num_sample: int = int(self.sample_rate * self.num_local)

        self.weight_name = os.path.join(
            self.prefix, "rank:{}_softmax_weight.pkl".format(self.rank))
        self.weight_mom_name = os.path.join(
            self.prefix, "rank:{}_softmax_weight_mom.pkl".format(self.rank))

        if resume:
            try:
                self.weight: paddle.Tensor = paddle.load(self.weight_name)
                print("softmax weight resume successfully!")
            except (FileNotFoundError, KeyError, IndexError):
                self.weight = paddle.normal(
                    0, 0.01, (self.num_local, self.embedding_size))
                print("softmax weight resume fail!")

            try:
                self.weight_mom: paddle.Tensor = paddle.load(
                    self.weight_mom_name)
                print("softmax weight mom resume successfully!")
            except (FileNotFoundError, KeyError, IndexError):
                self.weight_mom: paddle.Tensor = paddle.zeros_like(self.weight)
                print("softmax weight mom resume fail!")
        else:
            self.weight = paddle.normal(0, 0.01,
                                        (self.num_local, self.embedding_size))
            self.weight_mom: paddle.Tensor = paddle.zeros_like(self.weight)
            print("softmax weight init successfully!")
            print("softmax weight mom init successfully!")

        self.index = None
        if int(self.sample_rate) == 1:
            self.update = lambda: 0
            self.sub_weight = paddle.create_parameter(
                shape=self.weight.shape,
                dtype='float32',
                default_initializer=paddle.nn.initializer.Assign(self.weight))
            self.sub_weight_mom = self.weight_mom
        else:
            self.sub_weight = paddle.create_parameter(
                shape=[1, 1],
                dtype='float32',
                default_initializer=paddle.nn.initializer.Assign(
                    paddle.empty((1, 1))))
Exemplo n.º 25
0
 def test_device_error4():
     with fluid.program_guard(fluid.Program(), fluid.Program()):
         data = fluid.data(name="data", shape=[10], dtype="float32")
         paddle.zeros_like(data, dtype="float")
Exemplo n.º 26
0
def zero_(tensor: Tensor):
    return tensor.set_value(paddle.zeros_like(tensor))
Exemplo n.º 27
0
def _no_grad_zero_(tensor):
    with paddle.no_grad():
        tensor.set_value(paddle.zeros_like(tensor))
        return tensor
Exemplo n.º 28
0
 def reset(self):
     self.step = 0
     self.epoch = 0
     for k, v in self.state_dict.items():
         self.state_dict[k] = paddle.zeros_like(v)
Exemplo n.º 29
0
    def _get_band_mask(self, blocked_query_mask, blocked_key_mask, batch_size,
                       sequence_length):
        '''
        Return second mask: [B, 1, L-G, bs, G+W]
        '''
        GB = self.num_global_blocks_back
        GF = self.num_global_blocks_front
        G = self.num_global_blocks
        R = self.num_rand_blocks
        W = self.window_size
        bs = self.block_size
        T = sequence_length
        L = T // bs  # blocked length
        B = batch_size
        H = self.num_heads
        # G+W+R
        # query_mask: [B, L, bs]
        # key_mask: [B, L, bs]
        # [B, L-G, bs, 1] * [B, L-G, 1, G*bs] -> [B, L-G, bs, G*bs]
        temp_query_mask = paddle.reshape(blocked_query_mask[:, GF:-GB],
                                         [B, L - G, bs, 1])
        temp_key_mask_front = paddle.reshape(blocked_key_mask[:, :GF],
                                             [B, 1, 1, GF * bs])
        global_block_mask_front = paddle.matmul(temp_query_mask,
                                                temp_key_mask_front)

        temp_key_mask_back = paddle.reshape(blocked_key_mask[:, -GB:],
                                            [B, 1, 1, GB * bs])
        global_block_mask_back = paddle.matmul(temp_query_mask,
                                               temp_key_mask_back)

        # create window block mask
        key_mask_list = []
        for query_block_id in range(GF, GF + W // 2):
            left_block_id = query_block_id - W // 2
            right_block_id = query_block_id + W // 2
            zero_key_mask = paddle.zeros_like(
                blocked_key_mask[:, -(W - (right_block_id + 1 - G)):-GB])
            temp_key_mask = paddle.concat(
                [blocked_key_mask[:, GF:(right_block_id + 1)], zero_key_mask],
                axis=1)
            temp_key_mask = paddle.unsqueeze(temp_key_mask, 1)
            key_mask_list.append(temp_key_mask)
        roll_key_mask1 = paddle.concat(key_mask_list, axis=1)
        roll_key_mask1 = paddle.reshape(roll_key_mask1, [0, 0, W * bs])
        key_mask_list = []

        band_length = L - G - W // 2 * 2
        for query_block_id in range(GF + W // 2, GF + W // 2 + W):
            left_block_id = query_block_id - W // 2
            right_block_id = query_block_id + W // 2
            key_mask_list.append(
                blocked_key_mask[:, left_block_id:left_block_id + band_length])
        window_key_mask = paddle.concat(key_mask_list, axis=2)
        window_key_mask = paddle.reshape(window_key_mask, [0, 0, W * bs])

        key_mask_list = []
        for query_block_id in range((L - GB) - W // 2, L - GB):
            left_block_id = query_block_id - W // 2
            right_block_id = query_block_id + W // 2
            zero_key_mask = paddle.zeros_like(
                blocked_key_mask[:, GF:GF + W - (L - left_block_id - GB)])
            temp_key_mask = paddle.concat(
                [zero_key_mask, blocked_key_mask[:, left_block_id:-GB]],
                axis=1)
            temp_key_mask = paddle.unsqueeze(temp_key_mask, 1)
            key_mask_list.append(temp_key_mask)
        roll_key_mask2 = paddle.concat(key_mask_list, axis=1)
        roll_key_mask2 = paddle.reshape(roll_key_mask2, [0, 0, W * bs])

        window_key_mask = paddle.concat(
            [roll_key_mask1, window_key_mask, roll_key_mask2], axis=1)
        window_key_mask = paddle.unsqueeze(window_key_mask, axis=2)
        # [B, L-G, bs, 1] * [B, L-G, 1, W*bs] -> [B, L-G, bs, W*bs]
        window_block_mask = paddle.matmul(temp_query_mask, window_key_mask)

        band_mask = paddle.concat([
            global_block_mask_front, window_block_mask, global_block_mask_back
        ],
                                  axis=3)
        band_mask = paddle.unsqueeze(band_mask, 1)  # for head
        band_mask = paddle.expand(band_mask, [B, H, L - G, bs, -1])
        return band_mask
Exemplo n.º 30
0
    def prop_seghead(
        self,
        ref_frame_embedding=None,
        previous_frame_embedding=None,
        current_frame_embedding=None,
        ref_scribble_label=None,
        previous_frame_mask=None,
        normalize_nearest_neighbor_distances=True,
        use_local_map=True,
        seq_names=None,
        gt_ids=None,
        k_nearest_neighbors=1,
        global_map_tmp_dic=None,
        local_map_dics=None,
        interaction_num=None,
        start_annotated_frame=None,
        frame_num=None,
        dynamic_seghead=None,
    ):
        """return: feature_embedding,global_match_map,local_match_map,previous_frame_mask"""
        ###############
        cfg = self.cfg
        global_map_tmp_dic = global_map_tmp_dic
        dic_tmp = {}
        bs, c, h, w = current_frame_embedding.shape
        if cfg.get('test_mode'):
            scale_ref_scribble_label = float_(ref_scribble_label)
        else:
            scale_ref_scribble_label = paddle.nn.functional.interpolate(
                float_(ref_scribble_label), size=(h, w), mode='nearest')
        scale_ref_scribble_label = int_(scale_ref_scribble_label)
        scale_previous_frame_label = paddle.nn.functional.interpolate(
            float_(previous_frame_mask), size=(h, w), mode='nearest')
        scale_previous_frame_label = int_(scale_previous_frame_label)
        for n in range(bs):
            seq_current_frame_embedding = current_frame_embedding[n]
            seq_ref_frame_embedding = ref_frame_embedding[n]
            seq_prev_frame_embedding = previous_frame_embedding[n]
            seq_ref_frame_embedding = seq_ref_frame_embedding.transpose(
                [1, 2, 0])
            seq_current_frame_embedding = seq_current_frame_embedding.transpose(
                [1, 2, 0])
            seq_ref_scribble_label = scale_ref_scribble_label[n].transpose(
                [1, 2, 0])
            #########Global Map
            nn_features_n, ref_obj_ids = nearest_neighbor_features_per_object(
                reference_embeddings=seq_ref_frame_embedding,
                query_embeddings=seq_current_frame_embedding,
                reference_labels=seq_ref_scribble_label,
                k_nearest_neighbors=k_nearest_neighbors,
                gt_ids=gt_ids[n],
                n_chunks=10)
            if normalize_nearest_neighbor_distances:
                nn_features_n = (paddle.nn.functional.sigmoid(nn_features_n) -
                                 0.5) * 2

            #             print(nn_features_n)

            ###
            if global_map_tmp_dic is not None:  ###when testing, use global map memory
                if seq_names[n] not in global_map_tmp_dic:
                    global_map_tmp_dic[seq_names[n]] = paddle.ones_like(
                        nn_features_n).tile([1000, 1, 1, 1, 1])
                nn_features_n = paddle.where(
                    nn_features_n <= global_map_tmp_dic[seq_names[n]][
                        frame_num[n]].unsqueeze(0), nn_features_n,
                    global_map_tmp_dic[seq_names[n]][frame_num[n]].unsqueeze(
                        0))

                #                 print('detach 1')
                #                 print(nn_features_n.shape)
                # nn_features_n = nn_features_n.detach()
                global_map_tmp_dic[seq_names[n]][
                    frame_num[n]] = nn_features_n.detach()[0]

            #########################Local dist map
            seq_prev_frame_embedding = seq_prev_frame_embedding.transpose(
                [1, 2, 0])
            seq_previous_frame_label = scale_previous_frame_label[n].transpose(
                [1, 2, 0])

            if use_local_map:
                prev_frame_nn_features_n = local_previous_frame_nearest_neighbor_features_per_object(
                    prev_frame_embedding=seq_prev_frame_embedding,
                    query_embedding=seq_current_frame_embedding,
                    prev_frame_labels=seq_previous_frame_label,
                    gt_ids=ref_obj_ids,
                    max_distance=cfg['model_max_local_distance'])
            else:
                prev_frame_nn_features_n, _ = nearest_neighbor_features_per_object(
                    reference_embeddings=seq_prev_frame_embedding,
                    query_embeddings=seq_current_frame_embedding,
                    reference_labels=seq_previous_frame_label,
                    k_nearest_neighbors=k_nearest_neighbors,
                    gt_ids=gt_ids[n],
                    n_chunks=20)
                prev_frame_nn_features_n = (
                    paddle.nn.functional.sigmoid(prev_frame_nn_features_n) -
                    0.5) * 2

            #             print(prev_frame_nn_features_n.mean().item(), prev_frame_nn_features_n.shape, interaction_num)  # o
            #############
            if local_map_dics is not None:  ##When testing, use local map memory
                local_map_tmp_dic, local_map_dist_dic = local_map_dics
                if seq_names[n] not in local_map_dist_dic:
                    print(seq_names[n], 'not in local_map_dist_dic')
                    local_map_dist_dic[seq_names[n]] = paddle.zeros(1000, 9)
                if seq_names[n] not in local_map_tmp_dic:
                    print(seq_names[n], 'not in local_map_tmp_dic')
                    local_map_tmp_dic[seq_names[n]] = paddle.zeros_like(
                        prev_frame_nn_features_n).unsqueeze(0).tile(
                            [1000, 9, 1, 1, 1, 1])
                #                 print(local_map_dist_dic[seq_names[n]].shape)
                #                 print('detach 2')
                # prev_frame_nn_features_n = prev_frame_nn_features_n.detach()
                local_map_dist_dic[seq_names[n]][
                    frame_num[n], interaction_num -
                    1] = 1.0 / (abs(frame_num[n] - start_annotated_frame)
                                )  # bugs fixed.
                local_map_tmp_dic[seq_names[n]][
                    frame_num[n],
                    interaction_num - 1] = prev_frame_nn_features_n.squeeze(
                        0).detach()  # bugs fixed.
                if interaction_num == 1:
                    prev_frame_nn_features_n = local_map_tmp_dic[seq_names[n]][
                        frame_num[n]][interaction_num - 1]
                    prev_frame_nn_features_n = prev_frame_nn_features_n.unsqueeze(
                        0)
                else:
                    if local_map_dist_dic[seq_names[n]][frame_num[n]][interaction_num - 1] > \
                            local_map_dist_dic[seq_names[n]][frame_num[n]][interaction_num - 2]:
                        prev_frame_nn_features_n = local_map_tmp_dic[
                            seq_names[n]][frame_num[n]][interaction_num - 1]
                        prev_frame_nn_features_n = prev_frame_nn_features_n.unsqueeze(
                            0)
                    else:
                        prev_frame_nn_features_n = local_map_tmp_dic[
                            seq_names[n]][frame_num[n]][interaction_num - 2]
                        prev_frame_nn_features_n = prev_frame_nn_features_n.unsqueeze(
                            0)

                local_map_dics = (local_map_tmp_dic, local_map_dist_dic)

            to_cat_previous_frame = (
                float_(seq_previous_frame_label) == float_(ref_obj_ids)
            )  # float comparision?

            to_cat_current_frame_embedding = current_frame_embedding[
                n].unsqueeze(0).tile((ref_obj_ids.shape[0], 1, 1, 1))

            to_cat_nn_feature_n = nn_features_n.squeeze(0).transpose(
                [2, 3, 0, 1])
            to_cat_previous_frame = float_(
                to_cat_previous_frame.unsqueeze(-1).transpose([2, 3, 0, 1]))
            to_cat_prev_frame_nn_feature_n = prev_frame_nn_features_n.squeeze(
                0).transpose([2, 3, 0, 1])
            to_cat = paddle.concat(
                (to_cat_current_frame_embedding, to_cat_nn_feature_n,
                 to_cat_prev_frame_nn_feature_n, to_cat_previous_frame), 1)
            pred_ = dynamic_seghead(to_cat)
            pred_ = pred_.transpose([1, 0, 2, 3])
            dic_tmp[seq_names[n]] = pred_

        if global_map_tmp_dic is None:
            return dic_tmp
        else:
            if local_map_dics is None:
                return dic_tmp, global_map_tmp_dic
            else:
                return dic_tmp, global_map_tmp_dic, local_map_dics