Exemplo n.º 1
0
    def sqrt_newton_schulz_autograd(self, A, numIters):
        A_shape = A.shape
        batchSize = A_shape[0]
        dim = A_shape[1]

        normA = A * A
        normA = paddle.sum(normA, axis=1)
        normA = paddle.sum(normA, axis=1)
        normA = paddle.sqrt(normA)
        normA1 = normA.reshape([batchSize, 1, 1])
        Y = paddle.divide(A, paddle.expand_as(normA1, A))
        I = paddle.eye(dim, dim).reshape([1, dim, dim])
        l0 = []
        for i in range(batchSize):
            l0.append(I)
        I = paddle.concat(l0, axis=0)
        I.stop_gradient = False
        Z = paddle.eye(dim, dim).reshape([1, dim, dim])
        l1 = []
        for i in range(batchSize):
            l1.append(Z)
        Z = paddle.concat(l1, axis=0)
        Z.stop_gradient = False

        for i in range(numIters):
            T = 0.5 * (3.0 * I - Z.bmm(Y))
            Y = Y.bmm(T)
            Z = T.bmm(Z)
        sA = Y * paddle.sqrt(normA1).reshape([batchSize, 1, 1])
        sA = paddle.expand_as(sA, A)
        return sA
Exemplo n.º 2
0
    def forward(self, embedding, targets):
        if isinstance(embedding, dict):
            embedding = embedding['features']
        # Normalize embedding features
        embedding = F.normalize(embedding, axis=1)
        dist_mat = paddle.matmul(embedding, embedding, transpose_y=True)

        N = dist_mat.shape[0]
        is_pos = targets.reshape([N, 1]).expand([N, N]).equal(
            paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float')
        is_neg = targets.reshape([N, 1]).expand([N, N]).not_equal(
            paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float')

        # Mask scores related to itself
        is_pos = is_pos - paddle.eye(N, N)

        s_p = dist_mat * is_pos
        s_n = dist_mat * is_neg

        logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos)
        logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 -
                                                                     is_neg)

        loss = F.softplus(
            paddle.logsumexp(logit_p, axis=1) +
            paddle.logsumexp(logit_n, axis=1)).mean()

        return {"PairwiseCosface": loss}
Exemplo n.º 3
0
    def forward(self, node_feat, edge_feat):
        # get size
        num_tasks = node_feat.shape[0]
        num_data = node_feat.shape[1]

        # get eye matrix (batch_size x 2 x node_size x node_size)
        diag_mask = 1.0 - paddle.expand(
            paddle.eye(num_data),
            [num_tasks, self.edge_dim, num_data, num_data])

        # set diagonal as zero and normalize
        edge_feat = F.normalize(edge_feat * diag_mask, p=1, axis=-1)

        # compute attention and aggregate
        aggr_feat = paddle.bmm(
            paddle.concat(paddle.split(edge_feat, 2, 1),
                          self.edge_dim).squeeze(1), node_feat)

        node_feat = paddle.transpose(
            paddle.concat(
                [node_feat,
                 paddle.concat(paddle.split(aggr_feat, 2, 1), -1)], -1),
            (0, 2, 1))

        # non-linear transform
        node_feat = paddle.transpose(self.network(node_feat.unsqueeze(-1)),
                                     (0, 2, 1, 3)).squeeze(-1)
        return node_feat
Exemplo n.º 4
0
    def label2edge(self, label, mask_diag=True):
        # get size
        num_samples = label.shape[1]
        # reshape
        label_i = paddle.transpose(
            paddle.expand(label,
                          [num_samples, label.shape[0], label.shape[1]]),
            [1, 2, 0])
        label_j = label_i.transpose((0, 2, 1))
        # compute edge
        edge = paddle.cast(paddle.equal(label_i, label_j), 'float32')

        # expand
        edge = edge.unsqueeze(1)
        if self.edge_type == 'dist':
            edge = 1 - edge
        if self.edge_dim == 2:
            edge = paddle.concat([edge, 1 - edge], 1)

        if mask_diag:
            diag_mask = 1.0 - paddle.expand(
                paddle.eye(edge.shape[2]),
                [edge.shape[0], self.edge_dim, edge.shape[2], edge.shape[2]])
            edge = edge * diag_mask
        if self.edge_activation == 'softmax':
            edge = edge / edge.sum(-1).unsqueeze(-1)
        return edge
Exemplo n.º 5
0
 def build_inv_delta_C_paddle(self, C):
     """ Return inv_delta_C which is needed to calculate T """
     F = self.F
     hat_eye = paddle.eye(F, dtype='float64')  # F x F
     hat_C = paddle.norm(C.reshape([1, F, 2]) - C.reshape([F, 1, 2]),
                         axis=2) + hat_eye
     hat_C = (hat_C**2) * paddle.log(hat_C)
     delta_C = paddle.concat(  # F+3 x F+3
         [
             paddle.concat([paddle.ones((F, 1), dtype='float64'), C, hat_C],
                           axis=1),  # F x F+3
             paddle.concat([
                 paddle.zeros((2, 3), dtype='float64'),
                 paddle.transpose(C, perm=[1, 0])
             ],
                           axis=1),  # 2 x F+3
             paddle.concat([
                 paddle.zeros((1, 3), dtype='float64'),
                 paddle.ones((1, F), dtype='float64')
             ],
                           axis=1)  # 1 x F+3
         ],
         axis=0)
     inv_delta_C = paddle.inverse(delta_C)
     return inv_delta_C  # F+3 x F+3
Exemplo n.º 6
0
    def forward(self, nodes, edges, nums):
        start, cat_nodes = 0, []
        for num in nums:
            sample_nodes = nodes[start:start + num]
            cat_nodes.append(
                paddle.concat([
                    paddle.expand(sample_nodes.unsqueeze(1), [-1, num, -1]),
                    paddle.expand(sample_nodes.unsqueeze(0), [num, -1, -1])
                ], -1).reshape([num**2, -1]))
            start += num
        cat_nodes = paddle.concat([paddle.concat(cat_nodes), edges], -1)
        cat_nodes = self.relu(self.in_fc(cat_nodes))
        coefs = self.coef_fc(cat_nodes)

        start, residuals = 0, []
        for num in nums:
            residual = F.softmax(
                -paddle.eye(num).unsqueeze(-1) * 1e9 +
                coefs[start:start + num**2].reshape([num, num, -1]), 1)
            residuals.append((residual * cat_nodes[start:start + num**2]
                              .reshape([num, num, -1])).sum(1))
            start += num**2

        nodes += self.relu(self.out_fc(paddle.concat(residuals)))
        return [nodes, cat_nodes]
Exemplo n.º 7
0
    def forward(self, features, im_info, boxes=None):
        # prediction
        pred_cls_score_list = []
        pred_bbox_offsets_list = []
        for x in features:
            t = F.relu(self.rpn_conv(x))
            pred_cls_score_list.append(self.rpn_cls_score(t))
            pred_bbox_offsets_list.append(self.rpn_bbox_offsets(t))
        # get anchors
        all_anchors_list = []
        # stride: 64,32,16,8,4 p6->p2
        base_stride = 4
        off_stride = 2**(len(features) - 1)  # 16
        for fm in features:
            layer_anchors = self.anchors_generator(fm, base_stride, off_stride)
            off_stride = off_stride // 2
            all_anchors_list.append(layer_anchors)
        # sample from the predictions
        rpn_rois = find_top_rpn_proposals(self.training,
                                          pred_bbox_offsets_list,
                                          pred_cls_score_list,
                                          all_anchors_list, im_info)
        rpn_rois = rpn_rois.cast('float32')
        if self.training:
            rpn_labels, rpn_bbox_targets = fpn_anchor_target(
                boxes, im_info, all_anchors_list)
            #rpn_labels = rpn_labels.astype(np.int32)
            pred_cls_score, pred_bbox_offsets = fpn_rpn_reshape(
                pred_cls_score_list, pred_bbox_offsets_list)
            # rpn loss
            valid_masks = rpn_labels >= 0
            # objectness_loss = softmax_loss(
            #     torch.gather(pred_cls_score,torch.nonzero(valid_masks)),
            #     torch.gather(rpn_labels,torch.nonzero(valid_masks)))

            objectness_loss = F.binary_cross_entropy(
                F.softmax(
                    torch.gather(pred_cls_score, torch.nonzero(valid_masks))),
                torch.gather(
                    torch.eye(2),
                    torch.gather(rpn_labels, torch.nonzero(valid_masks))))

            pos_masks = rpn_labels > 0
            # localization_loss = smooth_l1_loss(
            #     pred_bbox_offsets[pos_masks],
            #     rpn_bbox_targets[pos_masks],
            #     config.rpn_smooth_l1_beta)
            localization_loss = \
            F.smooth_l1_loss(torch.gather(pred_bbox_offsets, torch.nonzero(pos_masks)),
                             torch.gather(rpn_bbox_targets, torch.nonzero(pos_masks)),delta=config.rcnn_smooth_l1_beta)
            normalizer = 1 / valid_masks.cast('float32').sum()
            loss_rpn_cls = objectness_loss.sum() * normalizer
            loss_rpn_loc = localization_loss.sum() * normalizer
            loss_dict = {}
            loss_dict['loss_rpn_cls'] = loss_rpn_cls
            loss_dict['loss_rpn_loc'] = loss_rpn_loc
            return rpn_rois, loss_dict
        else:
            return rpn_rois
Exemplo n.º 8
0
 def forward(self):
     """
     forward
     """
     num_rows = self.config["num_rows"]
     num_columns = self.config["num_columns"]
     dtype = self.config["dtype"]
     x = paddle.eye(num_rows, num_columns=num_columns, dtype=dtype)
     return x
Exemplo n.º 9
0
    def __init__(self, rgb_range, rgb_mean, rgb_std, sign=-1):
        super(MeanShift, self).__init__(3, 3, kernel_size=1)
        std = paddle.to_tensor(rgb_std)
        self.weight.set_value(paddle.eye(3).reshape([3, 3, 1, 1]))
        self.weight.set_value(self.weight / (std.reshape([3, 1, 1, 1])))

        mean = paddle.to_tensor(rgb_mean)
        self.bias.set_value(sign * rgb_range * mean / std)

        self.weight.trainable = False
        self.bias.trainable = False
Exemplo n.º 10
0
 def __init__(self, num_classes=16, max_point=2048):
     super(PointNet_Clas, self).__init__()
     self.input_transform_net = nn.Sequential(nn.Conv1D(3, 64, 1),
                                              nn.BatchNorm(64), nn.ReLU(),
                                              nn.Conv1D(64, 128, 1),
                                              nn.BatchNorm(128), nn.ReLU(),
                                              nn.Conv1D(128, 1024, 1),
                                              nn.BatchNorm(1024), nn.ReLU(),
                                              nn.MaxPool1D(max_point))
     self.input_fc = nn.Sequential(
         nn.Linear(1024, 512), nn.ReLU(), nn.Linear(512, 256), nn.ReLU(),
         nn.Linear(256,
                   9,
                   weight_attr=paddle.framework.ParamAttr(
                       initializer=paddle.nn.initializer.Assign(
                           paddle.zeros((256, 9)))),
                   bias_attr=paddle.framework.ParamAttr(
                       initializer=paddle.nn.initializer.Assign(
                           paddle.reshape(paddle.eye(3), [-1])))))
     self.mlp_1 = nn.Sequential(
         nn.Conv1D(3, 64, 1),
         nn.BatchNorm(64),
         nn.ReLU(),
         nn.Conv1D(64, 64, 1),
         nn.BatchNorm(64),
         nn.ReLU(),
     )
     self.feature_transform_net = nn.Sequential(nn.Conv1D(64, 64, 1),
                                                nn.BatchNorm(64), nn.ReLU(),
                                                nn.Conv1D(64, 128, 1),
                                                nn.BatchNorm(128),
                                                nn.ReLU(),
                                                nn.Conv1D(128, 1024, 1),
                                                nn.BatchNorm(1024),
                                                nn.ReLU(),
                                                nn.MaxPool1D(max_point))
     self.feature_fc = nn.Sequential(nn.Linear(1024, 512), nn.ReLU(),
                                     nn.Linear(512, 256), nn.ReLU(),
                                     nn.Linear(256, 64 * 64))
     self.mlp_2 = nn.Sequential(
         nn.Conv1D(64, 64, 1),
         nn.BatchNorm(64),
         nn.ReLU(),
         nn.Conv1D(64, 128, 1),
         nn.BatchNorm(128),
         nn.ReLU(),
         nn.Conv1D(128, 1024, 1),
         nn.BatchNorm(1024),
         nn.ReLU(),
     )
     self.fc = nn.Sequential(nn.Linear(1024, 512), nn.ReLU(),
                             nn.Linear(512, 256), nn.ReLU(),
                             nn.Dropout(p=0.7), nn.Linear(256, num_classes))
Exemplo n.º 11
0
 def resize_mat(self, x, t):
     n, c, s, s1 = x.shape
     assert s == s1
     if t <= 1:
         return x
     x = paddle.reshape(x, (n * c, -1, 1, 1))
     x = x * paddle.eye(t, t, dtype=x.dtype)
     x = paddle.reshape(x, (n * c, s, s, t, t))
     x = paddle.concat(paddle.split(x, 1, axis=1), axis=3)
     x = paddle.concat(paddle.split(x, 1, axis=2), axis=4)
     x = paddle.reshape(x, (n, c, s * t, s * t))
     return x
Exemplo n.º 12
0
    def __init__(self, mean_rgb, sub):
        super(MeanShift, self).__init__()

        sign = -1 if sub else 1
        r = mean_rgb[0] * sign
        g = mean_rgb[1] * sign
        b = mean_rgb[2] * sign

        self.shifter = nn.Conv2D(3, 3, 1, 1, 0)
        self.shifter.weight.set_value(paddle.eye(3).reshape([3, 3, 1, 1]))
        self.shifter.bias.set_value(np.array([r, g, b]).astype('float32'))
        # Freeze the mean shift layer
        for params in self.shifter.parameters():
            params.trainable = False
Exemplo n.º 13
0
    def test_out(self):
        with fluid.program_guard(fluid.Program()):
            data = paddle.eye(10)
            place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            result, = exe.run(fetch_list=[data])
            expected_result = np.eye(10, dtype="float32")
        self.assertEqual((result == expected_result).all(), True)

        with fluid.program_guard(fluid.Program()):
            data = paddle.eye(10, num_columns=7, dtype="float64")
            place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            result, = exe.run(fetch_list=[data])
            expected_result = np.eye(10, 7, dtype="float64")
        self.assertEqual((result == expected_result).all(), True)

        with fluid.program_guard(fluid.Program()):
            data = paddle.eye(10, dtype="int64")
            place = fluid.CPUPlace()
            exe = fluid.Executor(place)
            result, = exe.run(fetch_list=[data])
            expected_result = np.eye(10, dtype="int64")
        self.assertEqual((result == expected_result).all(), True)
Exemplo n.º 14
0
    def sigmoid_focal_loss(self, x, label, fg_num, gamma=2.0, alpha=0.25):
        C = x.shape[1]
        eye = paddle.eye(C + 1, dtype='float32')
        one_hot = L.gather(eye, label)
        pos_mask = one_hot[:, 1:]  # 正样本掩码

        p = L.sigmoid(x)  # [批大小*所有格子数, 80], 预测的类别概率
        pos_loss = pos_mask * (0 - L.log(p + 1e-9)) * L.pow(1 - p,
                                                            gamma) * alpha
        neg_loss = (1.0 - pos_mask) * (0 - L.log(1 - p + 1e-9)) * L.pow(
            p, gamma) * (1 - alpha)
        focal_loss = pos_loss + neg_loss
        if fg_num > 0.5:  # 当没有gt时,即fg_num==0时,focal_loss什么都不除。
            focal_loss = focal_loss / fg_num
        return focal_loss
Exemplo n.º 15
0
def perm_to_Pmat(perm, dim):
    pshape = perm.shape
    bs = int(np.product(perm.shape[:-1]).item())
    perm = perm.reshape((bs, pshape[-1]))
    oneslst = []
    for i in range(bs):
        idlst = np.arange(dim)
        perm_item = perm[i, :]
        for idx, p in enumerate(perm_item - 1):
            temp = idlst[idx]
            idlst[idx] = idlst[p]
            idlst[p] = temp

        ones = paddle.eye(dim)
        nmat = paddle.scatter(ones, paddle.to_tensor(idlst), ones)
        oneslst.append(nmat)
    return np.array(oneslst).reshape(list(pshape[:-1]) + [dim, dim])
Exemplo n.º 16
0
def eye_(tensor):
    r"""Fills the 2-dimensional input `Tensor` with the identity
    matrix. Preserves the identity of the inputs in `Linear` layers, where as
    many inputs are preserved as possible.

    Args:
        tensor: a 2-dimensional `torch.Tensor`

    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.eye_(w)
    """
    if tensor.ndimension() != 2:
        raise ValueError("Only tensors with 2 dimensions are supported")

    with paddle.no_grad():
        tensor.set_value(paddle.eye(*tensor.shape))
    return tensor
Exemplo n.º 17
0
def partial_trace_discontiguous(rho, preserve_qubits=None):
    r"""计算量子态的偏迹,可选取任意子系统。

    Args:
        rho (Tensor): 输入的量子态
        preserve_qubits (list): 要保留的量子比特,默认为 None,表示全保留
    """
    if preserve_qubits is None:
        return rho
    else:
        n = int(log2(rho.size) // 2)
        num_preserve = len(preserve_qubits)

        shape = paddle.ones((n + 1, ))
        shape = 2 * shape
        shape[n] = 2**n
        shape = paddle.cast(shape, "int32")
        identity = paddle.eye(2**n)
        identity = paddle.reshape(identity, shape=shape)
        discard = list()
        for idx in range(0, n):
            if idx not in preserve_qubits:
                discard.append(idx)
        addition = [n]
        preserve_qubits.sort()

        preserve_qubits = paddle.to_tensor(preserve_qubits)
        discard = paddle.to_tensor(discard)
        addition = paddle.to_tensor(addition)
        permute = paddle.concat([discard, preserve_qubits, addition])

        identity = paddle.transpose(identity, perm=permute)
        identity = paddle.reshape(identity, (2**n, 2**n))

        result = np.zeros((2**num_preserve, 2**num_preserve),
                          dtype="complex64")
        result = paddle.to_tensor(result)

        for i in range(0, 2**num_preserve):
            bra = identity[i * 2**num_preserve:(i + 1) * 2**num_preserve, :]
            result = result + matmul(matmul(bra, rho),
                                     transpose(bra, perm=[1, 0]))

        return result
    def _contrastive(self, feats_, labels_):
        """
        Args:
            feats_ (Tensor): sampled pixel, shape = [total_classes, n_view, feat_dim], total_classes = batch_size * single image classes
            labels_ (Tensor): label, shape = [total_classes]
        """
        anchor_num, n_view = feats_.shape[0], feats_.shape[1]

        labels_ = labels_.reshape((-1, 1))
        mask = paddle.equal(labels_,
                            paddle.transpose(labels_,
                                             [1, 0])).astype('float32')

        contrast_count = n_view
        contrast_feature = paddle.concat(paddle.unbind(feats_, axis=1), axis=0)

        anchor_feature = contrast_feature
        anchor_count = contrast_count

        anchor_dot_contrast = paddle.matmul(
            anchor_feature, paddle.transpose(contrast_feature,
                                             [1, 0])) / self.temperature
        logits_max = paddle.max(anchor_dot_contrast, axis=1, keepdim=True)
        logits = anchor_dot_contrast - logits_max

        mask = paddle.tile(mask, [anchor_count, contrast_count])
        neg_mask = 1 - mask

        logits_mask = 1 - paddle.eye(mask.shape[0]).astype('float32')
        mask = mask * logits_mask

        neg_logits = paddle.exp(logits) * neg_mask
        neg_logits = neg_logits.sum(1, keepdim=True)

        exp_logits = paddle.exp(logits)

        log_prob = logits - paddle.log(exp_logits + neg_logits)

        mean_log_prob_pos = (mask * log_prob).sum(1) / mask.sum(1)

        loss = -(self.temperature / self.base_temperature) * mean_log_prob_pos
        loss = loss.mean()

        return loss
Exemplo n.º 19
0
    def forward(self, node_feat, edge_feat=None):  # x: bs*N*num_feat
        # compute abs(x_i, x_j)
        x_i = node_feat.unsqueeze(2)
        x_j = paddle.transpose(x_i, (0, 2, 1, 3))
        x_ij = paddle.abs(x_i - x_j)  # size: bs x fs X N x N  (2,128,11,11)
        x_ij = paddle.transpose(x_ij, (0, 3, 2, 1))
        if self.adj_type == 'sim':
            x_ij = paddle.exp(-x_ij)

        sim_val = self.sim_network(x_ij)
        diag_mask = 1.0 - paddle.expand(
            paddle.eye(node_feat.shape[1]),
            [node_feat.shape[0], 1, node_feat.shape[1], node_feat.shape[1]])
        if self.activation == 'softmax':
            sim_val = self.softmax_with_mask(sim_val, diag_mask)
        elif self.activation == 'sigmoid':
            sim_val = F.sigmoid(sim_val) * diag_mask
        else:
            sim_val = sim_val * diag_mask

        if self.edge_dim == 2:
            if self.activation == 'softmax':
                dsim_val = self.softmax_with_mask(1 - sim_val, diag_mask)
            else:
                dsim_val = (1 - sim_val) * diag_mask
            adj_val = paddle.concat([sim_val, dsim_val], 1)
        else:
            adj_val = sim_val

        if self.top_k > 0:
            n_q, n_edge, n1, n2 = adj_val.shape
            k = min(self.top_k, n1)
            adj_temp = adj_val.reshape((n_q * n_edge * n1, n2))
            topk, indices = paddle.topk(adj_temp, k)
            mask = F.one_hot(indices, adj_temp.shape[1]).sum(1)
            mask = mask.reshape((n_q, n_edge, n1, n2))
            if self.activation == 'softmax':
                adj_val = self.softmax_with_mask(adj_val, mask)
            else:
                adj_val = adj_val * mask

        return adj_val, edge_feat
Exemplo n.º 20
0
    def forward(self, all_emb, q_emb=None, return_adj=False, return_emb=False):
        node_feat = all_emb
        if self.pre_dropout > 0:
            node_feat = self.predrop1(node_feat)
        edge_feat_list = []
        if return_adj:
            x_i = node_feat.unsqueeze(2)
            x_j = paddle.transpose(x_i, (1, 2))
            init_adj = paddle.abs(x_i - x_j)
            init_adj = paddle.transpose(
                init_adj, (1, 3))  # size: bs x fs X N x N  (2,128,11,11)
            if self.adj_type == 'sim':
                init_adj = paddle.exp(-init_adj)
            diag_mask = 1.0 - paddle.expand(paddle.eye(node_feat.shape[1]),
                                            [node_feat.shape[0], 1, 1, 1])
            init_adj = init_adj * diag_mask
            edge_feat_list.append(init_adj)

        for i in range(self.num_layers):
            adj, _ = self.layer_edge[i](node_feat)
            node_feat_new = self.layer_node[i](node_feat, adj)
            if self.node_concat:
                node_feat = paddle.concat([node_feat, node_feat_new], 2)
            else:
                node_feat = node_feat_new
            edge_feat_list.append(adj)
        if self.pre_dropout > 0:
            node_feat = self.predrop2(node_feat)
        node_feat = self.fc1(node_feat)
        node_feat = self.res_alpha * all_emb + node_feat

        s_feat = node_feat[:, :-1, :]
        q_feat = node_feat[:, -1, :]

        s_logits = self.fc2(s_feat)
        q_logits = self.fc2(q_feat)
        if return_emb:
            return s_logits, q_logits, edge_feat_list, s_feat, q_feat
        else:
            return s_logits, q_logits, edge_feat_list
Exemplo n.º 21
0
 def test_num_rows_type_check():
     paddle.eye(-1, dtype="int64")
Exemplo n.º 22
0
 def to_one_hot(self, class_idx, num_classes=2):
     return paddle.eye(num_classes)[class_idx]
Exemplo n.º 23
0
    def forward(
        self,
        input_ids=None,
        token_type_ids=None,
        attention_mask=None,
        mems=None,
        perm_mask=None,
        target_mapping=None,
        input_mask=None,
        head_mask=None,
        inputs_embeds=None,
        use_mems_train=False,
        use_mems_eval=False,
        output_attentions=False,
        output_hidden_states=False,
        return_dict=False,
    ):

        if self.training:
            use_mems = use_mems_train
        else:
            use_mems = use_mems_eval

        # The original code for XLNet uses shapes [len, bsz] with the batch dimension at the end
        # but we want a unified interface in the library with the batch size on the first dimension
        # so we move here the first dimension (batch) to the end
        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            input_ids = paddle.transpose(input_ids, perm=[1, 0])
            qlen, bsz = input_ids.shape[0], input_ids.shape[1]
        elif inputs_embeds is not None:
            inputs_embeds = paddle.transpose(inputs_embeds, perm=[1, 0])
            qlen, bsz = inputs_embeds.shape[0], inputs_embeds.shape[1]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        token_type_ids = token_type_ids.transpose(
            [1, 0]) if token_type_ids is not None else None
        input_mask = input_mask.transpose(
            [1, 0]) if input_mask is not None else None
        attention_mask = attention_mask.transpose(
            [1, 0]) if attention_mask is not None else None
        perm_mask = perm_mask.transpose([1, 2, 0
                                         ]) if perm_mask is not None else None
        target_mapping = target_mapping.transpose(
            [1, 2, 0]) if target_mapping is not None else None

        mlen = mems[0].shape[
            0] if mems is not None and mems[0] is not None else 0
        klen = mlen + qlen

        # Attention mask
        # Causal attention mask
        if self.attn_type == "uni":
            attn_mask = self.create_mask(qlen, mlen)
            attn_mask = paddle.unsqueeze(attn_mask, axis=[2, 3])
        elif self.attn_type == "bi":
            attn_mask = None
        else:
            raise ValueError("Unsupported attention type: {}".format(
                self.attn_type))

        # Data mask: input mask & perm mask
        assert input_mask is None or attention_mask is None, "You can only use one of input_mask (uses 1 for padding) "
        "or attention_mask (uses 0 for padding, added for compatibility with BERT). Please choose one."
        if input_mask is None and attention_mask is not None:
            input_mask = 1.0 - attention_mask
        if input_mask is not None and perm_mask is not None:
            data_mask = paddle.unsqueeze(input_mask, axis=0) + perm_mask
        elif input_mask is not None and perm_mask is None:
            data_mask = paddle.unsqueeze(input_mask, axis=0)
        elif input_mask is None and perm_mask is not None:
            data_mask = perm_mask
        else:
            data_mask = None

        if data_mask is not None:
            # All mems can be attended to
            if mlen > 0:
                mems_mask = paddle.cast(paddle.zeros(
                    [data_mask.shape[0], mlen, bsz]),
                                        dtype=dtype_float)
                data_mask = paddle.concat([mems_mask, data_mask], axis=1)
            if attn_mask is None:
                attn_mask = paddle.unsqueeze(data_mask, axis=-1)
            else:
                attn_mask += paddle.unsqueeze(data_mask, axis=-1)

        if attn_mask is not None:
            attn_mask = paddle.cast((attn_mask > 0), dtype=dtype_float)

        if attn_mask is not None:
            non_tgt_mask = paddle.cast(-paddle.eye(qlen), dtype=dtype_float)

            if mlen > 0:
                non_tgt_mask = paddle.concat([
                    paddle.cast(paddle.zeros([qlen, mlen]), dtype=dtype_float),
                    non_tgt_mask
                ],
                                             axis=-1)
            non_tgt_mask = paddle.cast((
                (attn_mask + paddle.unsqueeze(non_tgt_mask, axis=[2, 3])) > 0),
                                       dtype=dtype_float)
        else:
            non_tgt_mask = None

        # Word embeddings and prepare h & g hidden states
        if inputs_embeds is not None:
            word_emb_k = inputs_embeds
        else:
            word_emb_k = self.word_embedding(input_ids)

        output_h = self.dropout(word_emb_k)
        if target_mapping is not None:
            word_emb_q = self.mask_emb.expand(
                [target_mapping.shape[0], bsz, -1])
            output_g = self.dropout(word_emb_q)
        else:
            output_g = None

        # Segment embedding
        if token_type_ids is not None:
            # Convert `token_type_ids` to one-hot `seg_mat`
            if mlen > 0:
                mem_pad = paddle.zeros(shape=[mlen, bsz], dtype='int64')
                cat_ids = paddle.concat(x=[mem_pad, token_type_ids], axis=0)
            else:
                cat_ids = token_type_ids

            # `1` indicates not in the same segment [qlen x klen x bsz]
            seg_mat = paddle.cast(paddle.unsqueeze(token_type_ids, axis=1) !=
                                  paddle.unsqueeze(cat_ids, axis=0),
                                  dtype='int64')
            seg_mat = paddle.cast(F.one_hot(seg_mat, num_classes=2),
                                  dtype=dtype_float)
        else:
            seg_mat = None

        # Positional encoding
        pos_emb = self.relative_positional_encoding(qlen, klen, bsz=bsz)
        pos_emb = self.dropout(pos_emb)

        # Prepare head mask if needed
        # 1.0 in head_mask indicate we keep the head
        # Attention_probs has shape bsz x n_heads x N x N
        # Input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] (a head_mask for each layer)
        # And head_mask is converted to shape [num_hidden_layers x qlen x klen x bsz x n_head]
        if head_mask is not None:
            if head_mask.dim() == 1:
                head_mask = head_mask.unsqueeze(0).unsqueeze(0).unsqueeze(
                    0).unsqueeze(0)
                head_mask = head_mask.expand([self.n_layer, -1, -1, -1, -1])
            elif head_mask.dim() == 2:
                head_mask = head_mask.unsqueeze(1).unsqueeze(1).unsqueeze(1)
        else:
            head_mask = [None] * self.n_layer

        new_mems = ()
        if mems is None:
            mems = [None] * len(self.layer)

        attentions = [] if output_attentions else None
        hidden_states = [] if output_hidden_states else None
        for i, layer_module in enumerate(self.layer):
            if use_mems:
                # Cache new mems
                new_mems = new_mems + (self.cache_mem(output_h, mems[i]), )
            if output_hidden_states:
                hidden_states.append((
                    output_h, output_g) if output_g is not None else output_h)

            outputs = layer_module(
                output_h,
                output_g,
                attn_mask_h=non_tgt_mask,
                attn_mask_g=attn_mask,
                r=pos_emb,
                seg_mat=seg_mat,
                mems=mems[i],
                target_mapping=target_mapping,
                head_mask=head_mask[i],
                output_attentions=output_attentions,
            )
            output_h, output_g = outputs[:2]

            if output_attentions:
                attentions.append(outputs[2])

        # Add last hidden state
        if output_hidden_states:
            hidden_states.append((
                output_h, output_g) if output_g is not None else output_h)

        output = self.dropout(output_g if output_g is not None else output_h)

        # Prepare outputs, we transpose back here to shape [bsz, len, hidden_dim] (cf. beginning of forward() method)
        output = paddle.transpose(output, perm=[1, 0, 2])

        if not use_mems:
            new_mems = None

        if output_hidden_states:
            if output_g is not None:
                hidden_states = tuple(
                    paddle.transpose(h, perm=[1, 0, 2]) for hs in hidden_states
                    for h in hs)
            else:
                hidden_states = tuple(
                    paddle.transpose(hs, perm=[1, 0, 2])
                    for hs in hidden_states)

        if output_attentions:
            if target_mapping is not None:
                # When target_mapping is provided, there are 2-tuple of attentions
                attentions = tuple(
                    tuple(
                        paddle.transpose(att_stream, perm=[2, 3, 0, 1])
                        for att_stream in t) for t in attentions)
            else:
                attentions = tuple(
                    paddle.transpose(t, perm=[2, 3, 0, 1]) for t in attentions)

        if not return_dict:
            return tuple(
                v for v in [output, new_mems, hidden_states, attentions]
                if v is not None)
        return {
            "last_hidden_state": output,
            "mems": new_mems,
            "hidden_states": hidden_states,
            "attentions": attentions,
        }
Exemplo n.º 24
0
def minimize_bfgs(objective_func,
                  initial_position,
                  max_iters=50,
                  tolerance_grad=1e-7,
                  tolerance_change=1e-9,
                  initial_inverse_hessian_estimate=None,
                  line_search_fn='strong_wolfe',
                  max_line_search_iters=50,
                  initial_step_length=1.0,
                  dtype='float32',
                  name=None):
    r"""
    Minimizes a differentiable function `func` using the BFGS method.
    The BFGS is a quasi-Newton method for solving an unconstrained optimization problem over a differentiable function.
    Closely related is the Newton method for minimization. Consider the iterate update formula:

    .. math::
        x_{k+1} = x_{k} + H_k \nabla{f_k}

    If :math:`H_k` is the inverse Hessian of :math:`f` at :math:`x_k`, then it's the Newton method.
    If :math:`H_k` is symmetric and positive definite, used as an approximation of the inverse Hessian, then 
    it's a quasi-Newton. In practice, the approximated Hessians are obtained
    by only using the gradients, over either whole or part of the search 
    history, the former is BFGS, the latter is L-BFGS.

    Reference: 
        Jorge Nocedal, Stephen J. Wright, Numerical Optimization, Second Edition, 2006. pp140: Algorithm 6.1 (BFGS Method).

    Args:
        objective_func: the objective function to minimize. ``objective_func`` accepts a 1D Tensor and returns a scalar.
        initial_position (Tensor): the starting point of the iterates, has the same shape with the input of ``objective_func`` . 
        max_iters (int, optional): the maximum number of minimization iterations. Default value: 50.
        tolerance_grad (float, optional): terminates if the gradient norm is smaller than this. Currently gradient norm uses inf norm. Default value: 1e-7.
        tolerance_change (float, optional): terminates if the change of function value/position/parameter between two iterations is smaller than this value. Default value: 1e-9.
        initial_inverse_hessian_estimate (Tensor, optional): the initial inverse hessian approximation at initial_position. It must be symmetric and positive definite. If not given, will use an identity matrix of order N, which is size of ``initial_position`` . Default value: None.
        line_search_fn (str, optional): indicate which line search method to use, only support 'strong wolfe' right now. May support 'Hager Zhang' in the futrue. Default value: 'strong wolfe'.
        max_line_search_iters (int, optional): the maximum number of line search iterations. Default value: 50.
        initial_step_length (float, optional): step length used in first iteration of line search. different initial_step_length may cause different optimal result. For methods like Newton and quasi-Newton the initial trial step length should always be 1.0. Default value: 1.0.
        dtype ('float32' | 'float64', optional): data type used in the algorithm, the data type of the input parameter must be consistent with the dtype. Default value: 'float32'.
        name (str, optional): Name for the operation. For more information, please refer to :ref:`api_guide_Name`. Default value: None.

    Returns:
        output(tuple):

            - is_converge (bool): Indicates whether found the minimum within tolerance.
            - num_func_calls (int): number of objective function called.
            - position (Tensor): the position of the last iteration. If the search converged, this value is the argmin of the objective function regrading to the initial position.
            - objective_value (Tensor): objective function value at the `position`.
            - objective_gradient (Tensor): objective function gradient at the `position`.
            - inverse_hessian_estimate (Tensor): the estimate of inverse hessian at the `position`.

    Examples:
        .. code-block:: python

            import paddle
            
            def func(x):
                return paddle.dot(x, x)

            x0 = paddle.to_tensor([1.3, 2.7])
            results = paddle.incubate.optimizer.functional.minimize_bfgs(func, x0)
            print("is_converge: ", results[0])
            print("the minimum of func is: ", results[2])
            # is_converge:  is_converge:  Tensor(shape=[1], dtype=bool, place=Place(gpu:0), stop_gradient=True,
            #        [True])
            # the minimum of func is:  Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=True,
            #        [0., 0.])
    """

    if dtype not in ['float32', 'float64']:
        raise ValueError(
            "The dtype must be 'float32' or 'float64', but the specified is {}."
            .format(dtype))

    op_name = 'minimize_bfgs'
    check_input_type(initial_position, 'initial_position', op_name)

    I = paddle.eye(initial_position.shape[0], dtype=dtype)
    if initial_inverse_hessian_estimate is None:
        initial_inverse_hessian_estimate = I
    else:
        check_input_type(initial_inverse_hessian_estimate,
                         'initial_inverse_hessian_estimate', op_name)
        check_initial_inverse_hessian_estimate(
            initial_inverse_hessian_estimate)

    Hk = paddle.assign(initial_inverse_hessian_estimate)
    # use detach and assign to create new tensor rather than =, or xk will share memory and grad with initial_position
    xk = paddle.assign(initial_position.detach())

    value, g1 = _value_and_gradient(objective_func, xk)
    num_func_calls = paddle.full(shape=[1], fill_value=1, dtype='int64')

    # when the dim of x is 1000, it needs more than 30 iters to get all element converge to minimum.
    k = paddle.full(shape=[1], fill_value=0, dtype='int64')
    done = paddle.full(shape=[1], fill_value=False, dtype='bool')
    is_converge = paddle.full(shape=[1], fill_value=False, dtype='bool')

    def cond(k, done, is_converge, num_func_calls, xk, value, g1, Hk):
        return (k < max_iters) & ~done

    def body(k, done, is_converge, num_func_calls, xk, value, g1, Hk):
        #############    compute pk    #############
        pk = -paddle.matmul(Hk, g1)

        #############    compute alpha by line serach    #############
        if line_search_fn == 'strong_wolfe':
            alpha, value, g2, ls_func_calls = strong_wolfe(
                f=objective_func,
                xk=xk,
                pk=pk,
                initial_step_length=initial_step_length,
                dtype=dtype)
        else:
            raise NotImplementedError(
                "Currently only support line_search_fn = 'strong_wolfe', but the specified is '{}'"
                .format(line_search_fn))
        num_func_calls += ls_func_calls

        #############    update Hk    #############
        sk = alpha * pk
        yk = g2 - g1

        xk = xk + sk
        g1 = g2

        sk = paddle.unsqueeze(sk, 0)
        yk = paddle.unsqueeze(yk, 0)

        rhok_inv = paddle.dot(yk, sk)
        rhok = paddle.static.nn.cond(
            rhok_inv == 0.,
            lambda: paddle.full(shape=[1], fill_value=1000.0, dtype=dtype),
            lambda: 1. / rhok_inv)

        Vk_transpose = I - rhok * sk * yk.t()
        Vk = I - rhok * yk * sk.t()
        Hk = paddle.matmul(paddle.matmul(Vk_transpose, Hk),
                           Vk) + rhok * sk * sk.t()

        k += 1

        #############    check convergence    #############
        gnorm = paddle.linalg.norm(g1, p=np.inf)
        pk_norm = paddle.linalg.norm(pk, p=np.inf)
        paddle.assign(
            done | (gnorm < tolerance_grad) | (pk_norm < tolerance_change),
            done)
        paddle.assign(done, is_converge)
        # when alpha=0, there is no chance to get xk change.
        paddle.assign(done | (alpha == 0.), done)
        return [k, done, is_converge, num_func_calls, xk, value, g1, Hk]

    paddle.static.nn.while_loop(
        cond=cond,
        body=body,
        loop_vars=[k, done, is_converge, num_func_calls, xk, value, g1, Hk])
    return is_converge, num_func_calls, xk, value, g1, Hk
Exemplo n.º 25
0
def lddt(predicted_points,
         true_points,
         true_points_mask,
         cutoff=15.,
         per_residue=False):
    """Measure (approximate) lDDT for a batch of coordinates.

    lDDT reference:
    Mariani, V., Biasini, M., Barbato, A. & Schwede, T. lDDT: A local
    superposition-free score for comparing protein structures and models using
    distance difference tests. Bioinformatics 29, 2722–2728 (2013).

    lDDT is a measure of the difference between the true distance matrix and the
    distance matrix of the predicted points.  The difference is computed only on
    points closer than cutoff *in the true structure*.

    This function does not compute the exact lDDT value that the original paper
    describes because it does not include terms for physical feasibility
    (e.g. bond length violations). Therefore this is only an approximate
    lDDT score.

    Args:
    predicted_points: (batch, length, 3) array of predicted 3D points
    true_points: (batch, length, 3) array of true 3D points
    true_points_mask: (batch, length, 1) binary-valued float array.  This mask
        should be 1 for points that exist in the true points.
    cutoff: Maximum distance for a pair of points to be included
    per_residue: If true, return score for each residue.  Note that the overall
        lDDT is not exactly the mean of the per_residue lDDT's because some
        residues have more contacts than others.

    Returns:
    An (approximate, see above) lDDT score in the range 0-1.
    """

    assert len(predicted_points.shape) == 3
    assert predicted_points.shape[-1] == 3
    assert true_points_mask.shape[-1] == 1
    assert len(true_points_mask.shape) == 3

    # Compute true and predicted distance matrices.
    dmat_true = paddle.sqrt(1e-10 + paddle.sum(
        (true_points[:, :, None] - true_points[:, None, :])**2, axis=-1))

    dmat_predicted = paddle.sqrt(1e-10 +
                                 paddle.sum((predicted_points[:, :, None] -
                                             predicted_points[:, None, :])**2,
                                            axis=-1))

    cutoff = paddle.to_tensor(cutoff)

    dists_to_score = (
        paddle.cast((dmat_true < cutoff), 'float32') * true_points_mask *
        paddle.transpose(true_points_mask, [0, 2, 1]) *
        (1. - paddle.eye(dmat_true.shape[1]))  # Exclude self-interaction.
    )

    # Shift unscored distances to be far away.
    dist_l1 = paddle.abs(dmat_true - dmat_predicted)

    # True lDDT uses a number of fixed bins.
    # We ignore the physical plausibility correction to lDDT, though.
    score = 0.25 * (paddle.cast((dist_l1 < 0.5), 'float32') + paddle.cast(
        (dist_l1 < 1.0), 'float32') + paddle.cast(
            (dist_l1 < 2.0), 'float32') + paddle.cast(
                (dist_l1 < 4.0), 'float32'))

    # Normalize over the appropriate axes.
    reduce_axes = (-1, ) if per_residue else (-2, -1)
    norm = 1. / (1e-10 + paddle.sum(dists_to_score, axis=reduce_axes))
    score = norm * (1e-10 +
                    paddle.sum(dists_to_score * score, axis=reduce_axes))

    return score
Exemplo n.º 26
0
    def rmi_lower_bound(self, labels_4D, probs_4D):
        """
        calculate the lower bound of the region mutual information.
        Args:
                labels_4D   :   [N, C, H, W], dtype=float32
                probs_4D    :   [N, C, H, W], dtype=float32
        """
        assert labels_4D.shape == probs_4D.shape, print(
            'shapes', labels_4D.shape, probs_4D.shape)

        p, s = self.rmi_pool_size, self.rmi_pool_stride
        if self.rmi_pool_stride > 1:
            if self.rmi_pool_way == 0:
                labels_4D = F.max_pool2d(labels_4D,
                                         kernel_size=p,
                                         stride=s,
                                         padding=self.kernel_padding)
                probs_4D = F.max_pool2d(probs_4D,
                                        kernel_size=p,
                                        stride=s,
                                        padding=self.kernel_padding)
            elif self.rmi_pool_way == 1:
                labels_4D = F.avg_pool2d(labels_4D,
                                         kernel_size=p,
                                         stride=s,
                                         padding=self.kernel_padding)
                probs_4D = F.avg_pool2d(probs_4D,
                                        kernel_size=p,
                                        stride=s,
                                        padding=self.kernel_padding)
            elif self.rmi_pool_way == 2:
                shape = labels_4D.shape
                new_h, new_w = shape[2] // s, shape[3] // s
                labels_4D = F.interpolate(labels_4D,
                                          size=(new_h, new_w),
                                          mode='nearest')
                probs_4D = F.interpolate(probs_4D,
                                         size=(new_h, new_w),
                                         mode='bilinear',
                                         align_corners=True)
            else:
                raise NotImplementedError("Pool way of RMI is not defined!")

        label_shape = labels_4D.shape
        n, c = label_shape[0], label_shape[1]

        la_vectors, pr_vectors = self.map_get_pairs(labels_4D,
                                                    probs_4D,
                                                    radius=self.rmi_radius,
                                                    is_combine=0)

        la_vectors = paddle.reshape(la_vectors, [n, c, self.half_d, -1])
        la_vectors = paddle.cast(la_vectors, dtype='float64')
        la_vectors.stop_gradient = True

        pr_vectors = paddle.reshape(pr_vectors, [n, c, self.half_d, -1])
        pr_vectors = paddle.cast(pr_vectors, dtype='float64')

        diag_matrix = paddle.unsqueeze(paddle.unsqueeze(paddle.eye(
            self.half_d),
                                                        axis=0),
                                       axis=0)
        la_vectors = la_vectors - paddle.mean(la_vectors, axis=3, keepdim=True)

        la_cov = paddle.matmul(la_vectors,
                               paddle.transpose(la_vectors, [0, 1, 3, 2]))
        pr_vectors = pr_vectors - paddle.mean(pr_vectors, axis=3, keepdim=True)
        pr_cov = paddle.matmul(pr_vectors,
                               paddle.transpose(pr_vectors, [0, 1, 3, 2]))

        pr_cov_inv = self.inverse(pr_cov +
                                  paddle.cast(diag_matrix, dtype='float64') *
                                  _POS_ALPHA)

        la_pr_cov = paddle.matmul(la_vectors,
                                  paddle.transpose(pr_vectors, [0, 1, 3, 2]))

        appro_var = la_cov - paddle.matmul(
            paddle.matmul(la_pr_cov, pr_cov_inv),
            paddle.transpose(la_pr_cov, [0, 1, 3, 2]))

        rmi_now = 0.5 * self.log_det_by_cholesky(
            appro_var + paddle.cast(diag_matrix, dtype='float64') * _POS_ALPHA)

        rmi_per_class = paddle.cast(paddle.mean(paddle.reshape(
            rmi_now, [-1, self.num_classes]),
                                                axis=0),
                                    dtype='float32')
        rmi_per_class = paddle.divide(rmi_per_class,
                                      paddle.to_tensor(float(self.half_d)))

        rmi_loss = paddle.sum(rmi_per_class) if _IS_SUM else paddle.mean(
            rmi_per_class)

        return rmi_loss
Exemplo n.º 27
0
confidence = initial_const

#k值
k = 40

#像素值区间
boxmin = -3.0
boxmax = 3.0

#类别数 pytorch的实现里面是1000
num_labels = 1000

#攻击目标标签 必须使用one hot编码
#target_label = 288
target_label = 344
tlab = paddle.eye(num_labels)[target_label]
print("type of tlab: ", type(tlab))

print()

shape = (1, 3, 224, 224)

#c的初始化边界
lower_bound = 0
c = initial_const
upper_bound = 1e10

# the best l2, score, and image attack
o_bestl2 = 1e10
o_bestscore = -1
o_bestattack = [np.zeros(shape)]
Exemplo n.º 28
0
 def test_num_columns_type_check():
     paddle.eye(10, num_columns=5.2, dtype="int64")
Exemplo n.º 29
0
    def forward_single(self, emb, instance, kernel, training_mask, bboxes):
        training_mask = (training_mask > 0.5).long()
        kernel = (kernel > 0.5).long()
        instance = instance * training_mask
        instance_kernel = paddle.reshape((instance * kernel),(-1))
        instance = paddle.reshape(instance,(-1))
        emb = paddle.reshape(emb,(self.feature_dim, -1))

        unique_labels, unique_ids = paddle.unique(instance_kernel, return_inverse=True)
        num_instance = unique_labels.size(0)
        if num_instance <= 1:
            return 0

        emb_mean = paddle.zeros((self.feature_dim, num_instance), dtype='float32')
        for i, lb in enumerate(unique_labels):
            if lb == 0:
                continue
            ind_k = instance_kernel == lb
            emb_mean[:, i] = paddle.mean(emb[:, ind_k], axis=1)

        l_agg = paddle.zeros(num_instance, dtype='float32')
        for i, lb in enumerate(unique_labels):
            if lb == 0:
                continue
            ind = instance == lb
            emb_ = emb[:, ind]
            dist = (emb_ - emb_mean[:, i:i + 1]).norm(p=2, dim=0)
            dist = F.relu(dist - self.delta_v) ** 2
            l_agg[i] = paddle.mean(paddle.log(dist + 1.0))
        l_agg = paddle.mean(l_agg[1:])

        if num_instance > 2:
            emb_trans = paddle.transpose(emb_mean, perm=[1, 0])
            emb_interleave = paddle.tile(emb_trans, repeat_times=[num_instance, 1])

            emb_trans = paddle.transpose(emb_mean, perm=[1, 0])
            emb_tile = paddle.tile(emb_trans, repeat_times=[num_instance, 1])
            emb_band = paddle.reshape(emb_tile,(-1, self.feature_dim))
            # print(seg_band)

            mask = (1 - paddle.eye(num_instance, dtype=np.int8))
            mask = paddle.reshape(mask,(-1,1))
            mask = paddle.tile(mask, repeat_times=[1, self.feature_dim])
            mask = paddle.reshape(mask,(num_instance, num_instance, -1))
            mask[0, :, :] = 0
            mask[:, 0, :] = 0
            mask = paddle.reshape(mask, (num_instance * num_instance, -1))
            # print(mask)

            dist = emb_interleave - emb_band
            # dist = dist[mask > 0].view(-1, self.feature_dim).norm(p=2, dim=1)
            dist = paddle.reshape(dist[mask > 0], (-1, self.feature_dim)).norm(p=2, axis=1)

            dist = F.relu(2 * self.delta_d - dist) ** 2
            l_dis = paddle.mean(paddle.log(dist + 1.0))
        else:
            l_dis = 0

        l_agg = self.weights[0] * l_agg
        l_dis = self.weights[1] * l_dis
        l_reg = paddle.mean(paddle.log(paddle.norm(emb_mean, 2, 0) + 1.0)) * 0.001
        loss = l_agg + l_dis + l_reg
        return loss
Exemplo n.º 30
0
def eye(n, m):
    return Tensor(paddle.eye(n, m))