Exemplo n.º 1
0
def mean_dist(source_points,warped_points,L_pck):
    # compute precentage of correct keypoints
    batch_size=source_points.size(0)
    dist=torch.zeros((batch_size))
    for i in range(batch_size):
        p_src = source_points[i,:]
        p_wrp = warped_points[i,:]
        N_pts = torch.sum(torch.ne(p_src[0,:],-1)*torch.ne(p_src[1,:],-1))
        point_distance = torch.pow(torch.sum(torch.pow(p_src[:,:N_pts]-p_wrp[:,:N_pts],2),0),0.5)
        L_pck_mat = L_pck[i].expand_as(point_distance)
        dist[i]=torch.mean(torch.div(point_distance,L_pck_mat))
    return dist
Exemplo n.º 2
0
def pck(source_points,warped_points,L_pck,alpha=0.1):
    # compute precentage of correct keypoints
    batch_size=source_points.size(0)
    pck=torch.zeros((batch_size))
    for i in range(batch_size):
        p_src = source_points[i,:]
        p_wrp = warped_points[i,:]
        N_pts = torch.sum(torch.ne(p_src[0,:],-1)*torch.ne(p_src[1,:],-1))
        point_distance = torch.pow(torch.sum(torch.pow(p_src[:,:N_pts]-p_wrp[:,:N_pts],2),0),0.5)
        L_pck_mat = L_pck[i].expand_as(point_distance)
        correct_points = torch.le(point_distance,L_pck_mat*alpha)
        pck[i]=torch.mean(correct_points.float())
    return pck
Exemplo n.º 3
0
    def forward(self, s1, s2):
        # pylint: disable=arguments-differ
        """ """
        # Embeddings
        s1_embs = self._highway_layer(self._text_field_embedder(s1))
        s2_embs = self._highway_layer(self._text_field_embedder(s2))
        if self._elmo is not None:
            s1_elmo_embs = self._elmo(s1['elmo'])
            s2_elmo_embs = self._elmo(s2['elmo'])
            if "words" in s1:
                s1_embs = torch.cat([s1_embs, s1_elmo_embs['elmo_representations'][0]], dim=-1)
                s2_embs = torch.cat([s2_embs, s2_elmo_embs['elmo_representations'][0]], dim=-1)
            else:
                s1_embs = s1_elmo_embs['elmo_representations'][0]
                s2_embs = s2_elmo_embs['elmo_representations'][0]

        if self._cove is not None:
            s1_lens = torch.ne(s1['words'], self.pad_idx).long().sum(dim=-1).data
            s2_lens = torch.ne(s2['words'], self.pad_idx).long().sum(dim=-1).data
            s1_cove_embs = self._cove(s1['words'], s1_lens)
            s1_embs = torch.cat([s1_embs, s1_cove_embs], dim=-1)
            s2_cove_embs = self._cove(s2['words'], s2_lens)
            s2_embs = torch.cat([s2_embs, s2_cove_embs], dim=-1)
        s1_embs = self._dropout(s1_embs)
        s2_embs = self._dropout(s2_embs)

        # Set up masks
        s1_mask = util.get_text_field_mask(s1)
        s2_mask = util.get_text_field_mask(s2)
        s1_lstm_mask = s1_mask.float() if self._mask_lstms else None
        s2_lstm_mask = s2_mask.float() if self._mask_lstms else None

        # Sentence encodings with LSTMs
        s1_enc = self._phrase_layer(s1_embs, s1_lstm_mask)
        s2_enc = self._phrase_layer(s2_embs, s2_lstm_mask)
        if self._elmo is not None and len(s1_elmo_embs['elmo_representations']) > 1:
            s1_enc = torch.cat([s1_enc, s1_elmo_embs['elmo_representations'][1]], dim=-1)
            s2_enc = torch.cat([s2_enc, s2_elmo_embs['elmo_representations'][1]], dim=-1)
        s1_enc = self._dropout(s1_enc)
        s2_enc = self._dropout(s2_enc)

        # Max pooling
        s1_mask = s1_mask.unsqueeze(dim=-1)
        s2_mask = s2_mask.unsqueeze(dim=-1)
        s1_enc.data.masked_fill_(1 - s1_mask.byte().data, -float('inf'))
        s2_enc.data.masked_fill_(1 - s2_mask.byte().data, -float('inf'))
        s1_enc, _ = s1_enc.max(dim=1)
        s2_enc, _ = s2_enc.max(dim=1)

        return torch.cat([s1_enc, s2_enc, torch.abs(s1_enc - s2_enc), s1_enc * s2_enc], 1)
Exemplo n.º 4
0
    def forward(self, sent):
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        sent : Dict[str, torch.LongTensor]
            From a ``TextField``.

        Returns
        -------
        """
        sent_embs = self._highway_layer(self._text_field_embedder(sent))
        if self._cove is not None:
            sent_lens = torch.ne(sent['words'], self.pad_idx).long().sum(dim=-1).data
            sent_cove_embs = self._cove(sent['words'], sent_lens)
            sent_embs = torch.cat([sent_embs, sent_cove_embs], dim=-1)
        if self._elmo is not None:
            elmo_embs = self._elmo(sent['elmo'])
            if "words" in sent:
                sent_embs = torch.cat([sent_embs, elmo_embs['elmo_representations'][0]], dim=-1)
            else:
                sent_embs = elmo_embs['elmo_representations'][0]
        sent_embs = self._dropout(sent_embs)

        sent_mask = util.get_text_field_mask(sent).float()
        sent_lstm_mask = sent_mask if self._mask_lstms else None

        sent_enc = self._phrase_layer(sent_embs, sent_lstm_mask)
        if self._elmo is not None and len(elmo_embs['elmo_representations']) > 1:
            sent_enc = torch.cat([sent_enc, elmo_embs['elmo_representations'][1]], dim=-1)
        sent_enc = self._dropout(sent_enc)

        sent_mask = sent_mask.unsqueeze(dim=-1)
        sent_enc.data.masked_fill_(1 - sent_mask.byte().data, -float('inf'))
        return sent_enc.max(dim=1)[0]
Exemplo n.º 5
0
def compute_mask(v, padding_idx=0):
    """
    compute mask on given tensor v
    :param v:
    :param padding_idx:
    :return:
    """
    mask = torch.ne(v, padding_idx).float()
    return mask
Exemplo n.º 6
0
 def box_loss(self,gt_label,gt_offset,pred_offset):
     #get the mask element which != 0
     mask = torch.ne(gt_label,0)
     #convert mask to dim index
     chose_index = torch.nonzero(mask)
     chose_index = torch.squeeze(chose_index)
     #only valid element can effect the loss
     valid_gt_offset = gt_offset[chose_index,:]
     valid_pred_offset = pred_offset[chose_index,:]
     valid_pred_offset = torch.squeeze(valid_pred_offset)
     return self.loss_box(valid_pred_offset,valid_gt_offset)
Exemplo n.º 7
0
def compute_accuracy(y_pred, y_true, mask_index):
    y_pred, y_true = normalize_sizes(y_pred, y_true)

    _, y_pred_indices = y_pred.max(dim=1)

    correct_indices = torch.eq(y_pred_indices, y_true).float()
    valid_indices = torch.ne(y_true, mask_index).float()

    n_correct = (correct_indices * valid_indices).sum().item()
    n_valid = valid_indices.sum().item()

    return n_correct / n_valid * 100
Exemplo n.º 8
0
def rpn_class_loss(rpn_match, rpn_class_logits):
    """RPN anchor classifier loss.

    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for FG/BG.
    """
    # Get anchor classes. Convert the -1/+1 match to 0/1 values.    
    anchor_class = torch.eq(rpn_match, 1)
    # Positive and Negative anchors contribute to the loss,
    # but neutral anchors (match value = 0) don't.    
    indices = torch.ne(rpn_match, 0.)

    rpn_class_logits = torch.masked_select(rpn_class_logits, indices)
    anchor_class = torch.masked_select(anchor_class, indices)

    rpn_class_logits = rpn_class_logits.contiguous().view(-1, 2)

    anchor_class = anchor_class.contiguous().view(-1).type(torch.cuda.LongTensor)
    loss = F.cross_entropy(rpn_class_logits, anchor_class, weight=None)
    return loss
Exemplo n.º 9
0
    def forward(self, prediction, annotations):
        # Get the inputs
        classifications, regressions, anchors = prediction

        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[0, :, :]

        anchor_widths = anchor[:, 2] - anchor[:, 0]
        anchor_heights = anchor[:, 3] - anchor[:, 1]
        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            #bbox_annotation = annotations[j, :, :]
            bbox_annotation = torch.from_numpy(annotations[j].astype(
                np.float32)).to(anchors.device)
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            if bbox_annotation.shape[0] == 0:
                regression_losses.append(torch.tensor(0).float().cuda())
                classification_losses.append(torch.tensor(0).float().cuda())

                continue

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            # num_anchors x num_annotations
            IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # num_anchors x 1

            #import pdb
            # pdb.set_trace()

            # compute the loss for classification
            # targets = torch.ones_like(classification.shape) * -1
            # targets = targets.cuda()
            targets = torch.ones_like(classification) * -1

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices,
                                                           4].long()] = 1

            # alpha_factor = torch.ones(targets.shape).cuda() * alpha
            alpha_factor = torch.ones_like(targets) * alpha

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            # cls_loss = focal_weight * torch.pow(bce, gamma)
            cls_loss = focal_weight * bce

            # cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss,
                                   torch.zeros_like(cls_loss))

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.float(), min=1.0))

            # compute the loss for regression

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # clip widths to 1
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dx, targets_dy, targets_dw, targets_dh))
                targets = targets.t()

                targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()

                negative_indices = 1 + (~positive_indices)

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                regression_losses.append(torch.tensor(0).float().cuda())

        classification_loss = torch.stack(classification_losses).mean(
            dim=0, keepdim=True)
        classification_loss = classification_loss.mean()

        regression_loss = torch.stack(regression_losses).mean(dim=0,
                                                              keepdim=True)
        regression_loss = regression_loss.mean()

        loss = classification_loss + regression_loss
        return loss, regression_loss, classification_loss
Exemplo n.º 10
0
 def forward(self, x):
     mask = torch.ne(x, self.pad_idx).float()
     return mask
Exemplo n.º 11
0
    def forward(self,
                classifications,
                regressions,
                anchors,
                annotations,
                alpha=0.25,
                gamma=2.0,
                cuda=True):
        # 设置
        dtype = regressions.dtype
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        # 获得先验框,将先验框转换成中心宽高的形势
        anchor = anchors[0, :, :].to(dtype)
        # 转换成中心,宽高的形式
        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):
            # 取出真实框
            bbox_annotation = annotations[j]

            # 获得每张图片的分类结果和回归预测结果
            classification = classifications[j, :, :]
            regression = regressions[j, :, :]
            # 平滑标签
            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            if len(bbox_annotation) == 0:
                alpha_factor = torch.ones_like(classification) * alpha

                if cuda:
                    alpha_factor = alpha_factor.cuda()
                alpha_factor = 1. - alpha_factor
                focal_weight = classification
                focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

                bce = -(torch.log(1.0 - classification))

                cls_loss = focal_weight * bce

                if cuda:
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))
                classification_losses.append(cls_loss.sum())
                continue

            # 获得目标预测结果
            targets, num_positive_anchors, positive_indices, assigned_annotations = get_target(
                anchor, bbox_annotation, classification, cuda)

            alpha_factor = torch.ones_like(targets) * alpha
            if cuda:
                alpha_factor = alpha_factor.cuda()
            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce

            zeros = torch.zeros_like(cls_loss)
            if cuda:
                zeros = zeros.cuda()
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)
            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))
            # smoooth_l1
            if positive_indices.sum() > 0:
                targets = encode_bbox(assigned_annotations, positive_indices,
                                      anchor_widths, anchor_heights,
                                      anchor_ctr_x, anchor_ctr_y)

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if cuda:
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        c_loss = torch.stack(classification_losses).mean()
        r_loss = torch.stack(regression_losses).mean()
        loss = c_loss + r_loss
        return loss, c_loss, r_loss
Exemplo n.º 12
0
 def __ne__(self, other):
     return torch.ne(self, other)
Exemplo n.º 13
0
 def easy(x, y):
     c = torch.ne(x, y)
     return c
Exemplo n.º 14
0
    def forward(self, classifications, regressions, anchors, annotations):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        class_losses = []
        bbox_losses = []

        anchor = anchors[0, :, :]
        # anchor(9x4), 9 anchors, 4 is top-left xy coordinates and width&height
        anchor_widths = anchor[:, 2] - anchor[:, 0]
        anchor_heights = anchor[:, 3] - anchor[:, 1]
        # centre coordinates of anchor
        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y = anchors[:, 1] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():
                    alpha_factor = torch.ones(classification.shape).cuda() * alpha

                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    # cls_loss = focal_weight * torch.pow(bce, gamma)
                    cls_loss = focal_weight * bce
                    class_losses.append(cls_loss.sum())
                    bbox_losses.append(torch.tensor(0).float())

                else:
                    alpha_factor = torch.ones(classification.shape) * alpha

                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    # cls_loss = focal_weight * torch.pow(bce, gamma)
                    cls_loss = focal_weight * bce
                    class_losses.append(cls_loss.sum())
                    bbox_losses.append(torch.tensor(0).float())

                continue

            IoU = IoU(anchors[0, :, :], bbox_annotation[:, :4])  # num_anchors x num_annotations

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # num_anchors x 1

            # import pdb
            # pdb.set_trace()

            # compute the loss for classification
            targets = torch.ones(classification.shape) * -1

            if torch.cuda.is_available():
                targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1

            if torch.cuda.is_available():
                alpha_factor = torch.ones(targets.shape).cuda() * alpha
            else:
                alpha_factor = torch.ones(targets.shape) * alpha

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))

            # cls_loss = focal_weight * torch.pow(bce, gamma)
            cls_loss = focal_weight * bce

            if torch.cuda.is_available():
                cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
            else:
                cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape))

            class_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0))

            # compute the loss for regression

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
                gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # clip widths to 1
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
                targets = targets.t()

                if torch.cuda.is_available():
                    targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
                else:
                    targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]])

                negative_indices = 1 + (~positive_indices)

                regression_diff = torch.abs(targets - regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0
                )
                bbox_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    bbox_losses.append(torch.tensor(0).float().cuda())
                else:
                    bbox_losses.append(torch.tensor(0).float())

        return torch.stack(class_losses).mean(dim=0, keepdim=True), torch.stack(bbox_losses).mean(dim=0,
                                                                                                  keepdim=True)
Exemplo n.º 15
0
    def forward(self,
                outs,
                snt_state,
                snt_padding_mask,
                copy_seq,
                target=None,
                work=False):
        x, alignment_weight = self.alignment_layer(
            outs,
            snt_state,
            snt_state,
            key_padding_mask=snt_padding_mask,
            need_weights='one')
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.alignment_layer_norm(outs + x)
        residual = x
        x = F.relu(self.fc1(x))
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.fc2(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        outs = self.ff_layer_norm(residual + x)

        seq_len, bsz, _ = outs.size()
        outs_concept = torch.tanh(self.transfer(outs))
        outs_concept = F.dropout(outs_concept,
                                 p=self.dropout,
                                 training=self.training)

        gen_gate, map_gate, copy_gate = F.softmax(self.diverter(outs_concept),
                                                  -1).chunk(3, dim=-1)
        copy_gate = torch.cat([copy_gate, map_gate], -1)

        probs = gen_gate * F.softmax(self.generator(outs_concept), -1)

        tot_ext = 1 + copy_seq.max().item()
        vocab_size = probs.size(-1)

        if tot_ext - vocab_size > 0:
            ext_probs = probs.new_zeros(
                (1, 1, tot_ext - vocab_size)).expand(seq_len, bsz, -1)
            probs = torch.cat([probs, ext_probs], -1)
        #copy_seq: src_len x bsz x 2
        #copy_gate: tgt_len x bsz x 2
        #alignment_weight: tgt_len x bsz x src_len
        #index: tgt_len x bsz x (src_len x 2)
        index = copy_seq.transpose(0, 1).contiguous().view(1, bsz, -1).expand(
            seq_len, -1, -1)
        copy_probs = (copy_gate.unsqueeze(2) *
                      alignment_weight.unsqueeze(-1)).view(seq_len, bsz, -1)
        probs = probs.scatter_add_(-1, index, copy_probs)
        ll = torch.log(probs + 1e-12)

        if work:
            return ll, outs

        if not self.training:
            _, pred = torch.max(ll, -1)
            total_concepts = torch.ne(
                target, self.vocabs['predictable_concept'].padding_idx)
            acc = torch.eq(
                pred,
                target).masked_select(total_concepts).float().sum().item()
            tot = total_concepts.sum().item()
            print('conc acc', acc / tot)

        concept_loss = -ll.gather(dim=-1,
                                  index=target.unsqueeze(-1)).squeeze(-1)
        concept_mask = torch.eq(target,
                                self.vocabs['predictable_concept'].padding_idx)
        concept_loss = concept_loss.masked_fill_(concept_mask, 0.).sum(0)
        return concept_loss, outs
Exemplo n.º 16
0
def _input_split_sort(input, offsets, padding_idx):
    """
    In the circumstance of row-wise sharding of weight, we need to distribute
    the sorted lookup IDs of embeddingBag to each rank by range. The constraint
    here is that we can not directly sort the whole input because we have to
    differentiate between each interval because the result is aggregated.

    If the index in the placement is not equal to the rank number, we need to
    do the rearrangement based on the order given by the Sharding Spec (placement).

    We also calculate the split_size with padding_idx excluded per interval
    so that we can use it as the divisor to calculate the mean correctly.

    Args:
        input: tensor to be applied op on.
        offsets: start index of each interval in the 1D case.
        padding_idx: the embedding vector at padding_idx is
            excluded from the reduction.

    Return:
        input_split_sorted_list: list of ID positions sorted per interval.
        input_split_sorted_indices: sorted indices for per_sample_weights
            rearrangments.
        split_sizes_1d: size of each split for 1D input because it can be
            different in such scenario.
        split_sizes_1d_with_padding: size of each split for 1D input with
            padding_idx excluded. This is for the divisor of `mean` mode.
    """
    input_size = input.size()
    input_split_sorted_list = []
    split_sizes_1d = []
    split_sizes_1d_with_padding = []
    padding_idx = padding_idx if padding_idx is not None else -1

    # For 2D tensor, we just first sort and then append row by row into a list.
    if len(input_size) > 1:
        indice_offset = 0
        sorted_input, input_split_sorted_indices = torch.sort(input)
        for i in range(0, sorted_input.size(0)):
            input_split_sorted_list.append(sorted_input[i])
            input_split_sorted_indices[i] += indice_offset
            indice_offset += input.size(1)
            split_sizes_1d_with_padding.append(
                torch.sum(torch.ne(sorted_input[i], padding_idx)).item())
        input_split_sorted_indices = torch.reshape(input_split_sorted_indices,
                                                   (-1, ))
    # Split 1D input tensor based on the given offsets.
    else:
        input_split_sorted_indices_list = []
        offset_len = len(offsets)
        split_size = offsets[1:offset_len] - offsets[0:-1]
        split_sizes_1d = split_size.tolist()
        if torch.sum(split_size) < input.size(0):
            split_sizes_1d.append(input.size(0) - offsets[-1].item())
        indice_offset = 0
        for idx, split_result in enumerate(torch.split(input, split_sizes_1d)):
            split_result_sorted, indices = torch.sort(split_result)
            input_split_sorted_list.append(split_result_sorted)
            split_sizes_1d_with_padding.append(
                torch.sum(torch.ne(split_result_sorted, padding_idx)).item())
            input_split_sorted_indices_list.append(indices + indice_offset)
            indice_offset += split_sizes_1d[idx]
        input_split_sorted_indices = torch.cat(input_split_sorted_indices_list)

    return (
        input_split_sorted_list,
        input_split_sorted_indices,
        split_sizes_1d,
        split_sizes_1d_with_padding,
    )
Exemplo n.º 17
0
def _handle_row_wise_sharding(
    input,
    world_size,
    weight,
    local_shard,
    offsets,
    per_sample_weights,
    mode,
    max_norm,
    norm_type,
    padding_idx,
    rank,
    pg,
):
    """
    Entry-point function to handle the logic of row-wise sharding of weight
    for embeddingBag. (Detailed explanations of the logic can be found in
    the comment for sharded_embedding_bag.)

    Args:
        input: list of ID used for lookup and aggregation.
        world_size: number of ranks.
        weight: shareded weight tensor.
        local_shard: row-wise shared local weight used for lookup.
        offsets: list of start positions of each bag for 1D input.
        per_sample_weights: weights for weighted sum mode.
        mode: aggregation method of each bag.
        max_norm: If given, each embedding vector with norm larger
            than max_norm is renormalized to have norm max_norm.
            Note: this will modify weight in-place.
        norm_type: The p in the p-norm to compute for the max_norm option.
        padding_idx: If specified, the entries at padding_idx do
            not contribute to the gradient; therefore, the embedding
            vector at padding_idx is not updated during training,
            i.e. it remains as a fixed “pad”.
            Note that the embedding vector at padding_idx is
            excluded from the reduction.
        rank: # of cuda process.
        pg: process group.

    Returns:
        gathered_output: final result of lookup and aggregation.
    """
    # We sort each interval defined by offset. If 2D, each interval is a row.
    input_size = input.size()
    (
        input_split_sorted_list,
        input_split_sorted_indices,
        split_sizes_1d,
        split_sizes_1d_with_padding,
    ) = _input_split_sort(input, offsets, padding_idx)

    # Within each interval of the sorted list, we first need to distribute
    # each ID to different bucket(rank) and also ensure the rearrangement
    # has been done in case the placement idx not equal to rank.
    # We then perform some simple stats on each interval for the next step
    # If user specifies per_sample_weights we need to rearrange them
    # to be sync with IDs and then distribute them to each rank
    (
        input_combined,
        input_combined_split_sizes,
        offsets_rearrange_list,
        offsets_rearrange_sizes,
        per_sample_weights,
        sharded_dim_size_max,
        padding_idx,
    ) = _sorted_input_distribute_prepare(
        input_split_sorted_list,
        input_split_sorted_indices,
        world_size,
        input,
        weight,
        per_sample_weights,
        rank,
        padding_idx,
    )

    # Send ID/offsets/per_sample_weights to different bucket(rank).
    (
        gathered_input,
        output_offsets_tensor_list,
        output_split_sizes,
        gathered_per_sample_weights,
    ) = _distribute_input(
        input_combined,
        input_combined_split_sizes,
        offsets_rearrange_list,
        offsets_rearrange_sizes,
        sharded_dim_size_max,
        world_size,
        input,
        per_sample_weights,
        pg,
    )

    # Perform the embedding bag look-up and aggregation
    results = []
    for i, inp in enumerate(gathered_input):
        per_sample_weights = (gathered_per_sample_weights[i]
                              if gathered_per_sample_weights is not None else
                              None)
        # If input is None, passing in max_norm causes
        # errors in CUDA.
        if max_norm is not None and inp.size(0) == 0:
            max_norm = None

        # Perform local embedding look up and aggregation.
        result = torch.nn.functional.embedding_bag(
            inp,
            local_shard,
            offsets=output_offsets_tensor_list[i],
            mode=mode if mode != "mean" else "sum",
            per_sample_weights=per_sample_weights,
            max_norm=max_norm,
            norm_type=norm_type,
            padding_idx=padding_idx,
        )
        if mode != "max":
            results.append(result)
        # For max case, it there is no look-up from some ranks
        # it will return all zero for that. For that case, we need
        # to set the row to neg inf; otherwise, in the final
        # aggregation negative values will be rounded up to zero.
        elif inp.size(0) == 0:
            result[:] = -float("Inf")
            results.append(result)
        else:
            for idx, current_offset in enumerate(
                    output_offsets_tensor_list[i]):
                next_offset = current_offset
                if idx == len(output_offsets_tensor_list[i]) - 1:
                    next_offset = output_split_sizes[i]
                else:
                    next_offset = output_offsets_tensor_list[i][idx + 1]
                # When there is no interval in the current rank or all IDs
                # are equal to padding_idx, we then need to ensure they
                # don't contribute to the final result.
                if (current_offset == next_offset) or (
                        padding_idx is not None and not torch.any(
                            torch.ne(inp[current_offset:next_offset],
                                     padding_idx))):
                    result[idx] = -float("Inf")
            results.append(result)

    # Gather all the aggregated results appropriately by using reduce_scatter.
    row_size = input.size(0) if len(input_size) > 1 else len(split_sizes_1d)
    gathered_output = torch.empty(row_size,
                                  weight.size(1),
                                  device=input.device)
    op = ReduceOp.SUM if mode != "max" else ReduceOp.MAX
    dist.reduce_scatter(gathered_output, results, op=op, group=pg)

    # For Mean, we cannot do the division until very end because the sum of means
    # not equal to the mean of sum. (Divisor is different)
    if mode == "mean":
        split_sizes_1d_tensor = torch.tensor(split_sizes_1d_with_padding,
                                             dtype=torch.float,
                                             device=input.device)
        # Make sure divisor is not zero.
        split_sizes_1d_tensor[split_sizes_1d_tensor == 0.0] = 1.0
        return (torch.div(gathered_output.t().contiguous(),
                          split_sizes_1d_tensor).t().contiguous())

    # Return the appropriate local result.
    return gathered_output
Exemplo n.º 18
0
 def check_adv(cls, logits, label, targeted, confidence):
     if targeted:
         return torch.eq(torch.argmax(logits - confidence, 1),
                         label.squeeze())
     return torch.ne(torch.argmax(logits - confidence, 1), label.squeeze())
Exemplo n.º 19
0
    def forward(self, classifications, bbox_regressions, ldm_regressions,
                anchors, annotations):

        device = classifications.device
        batch_size = classifications.shape[0]
        classification_losses = []
        bbox_regression_losses = []
        ldm_regression_losses = []

        anchor = anchors[0, :, :]
        anchor_widths = anchor[:, 2] - anchor[:, 0]
        anchor_heights = anchor[:, 3] - anchor[:, 1]
        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights

        for j in range(batch_size):
            classification = classifications[j, :, :]
            bbox_regression = bbox_regressions[j, :, :]
            ldm_regression = ldm_regressions[j, :, :]

            annotation = annotations[j, :, :]
            annotation = annotation[annotation[:, 0] > 0]
            bbox_annotation = annotation[:, :4]
            ldm_annotation = annotation[:, 4:]

            if bbox_annotation.shape[0] == 0:
                bbox_regression_losses.append(
                    torch.tensor(0., requires_grad=True, device=device))
                classification_losses.append(
                    torch.tensor(0., requires_grad=True, device=device))
                ldm_regression_losses.append(
                    torch.tensor(0., requires_grad=True, device=device))
                continue

            # IoU betweens anchors and annotations
            IoU = box_iou(anchor, bbox_annotation)
            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            targets = torch.ones_like(classification) * -1
            # those whose iou<0.3 have no object
            negative_indices = torch.lt(IoU_max, 0.3)
            targets[negative_indices, :] = 0
            targets[negative_indices, 1] = 1
            # those whose iou>0.5 have object
            positive_indices = torch.ge(IoU_max, 0.5)
            targets[positive_indices, :] = 0
            targets[positive_indices, 0] = 1
            # keep positive and negative ratios with 1:3
            num_positive_anchors = positive_indices.sum()
            keep_negative_anchors = num_positive_anchors * 3

            bbox_assigned_annotations = bbox_annotation[IoU_argmax, :]
            ldm_assigned_annotations = ldm_annotation[IoU_argmax, :]
            ldm_sum = ldm_assigned_annotations.sum(dim=1)
            ge0_mask = ldm_sum > 0
            ldm_positive_indices = ge0_mask & positive_indices
            # OHEM
            # negative_losses = classification[negative_indices, 1] * -1
            negative_losses = self.centropy(
                classification[negative_indices],
                targets[negative_indices].argmax(dim=1))
            sorted_losses, _ = torch.sort(negative_losses, descending=True)
            if sorted_losses.numel() > keep_negative_anchors:
                sorted_losses = sorted_losses[:keep_negative_anchors]
            # positive_losses = classification[positive_indices, 0] * -1
            positive_losses = self.centropy(
                classification[positive_indices],
                targets[positive_indices].argmax(dim=1))
            # focal loss
            focal_loss = False
            if focal_loss:
                alpha = 0.25
                gamma = 2.0

                alpha_factor = torch.ones_like(targets) * alpha
                alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                           1. - alpha_factor)
                focal_weight = torch.where(torch.eq(targets, 1.),
                                           1. - classification, classification)
                focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

                bce = -(targets * torch.log(classification) +
                        (1.0 - targets) * torch.log(1.0 - classification))
                cls_loss = focal_weight * bce
                cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss,
                                       torch.zeros_like(cls_loss))
                classification_losses.append(
                    cls_loss.sum() /
                    torch.clamp(num_positive_anchors.float(), min=1.0))
            else:
                if positive_indices.sum() > 0:
                    classification_losses.append(positive_losses.mean() +
                                                 sorted_losses.mean())
                else:
                    classification_losses.append(
                        torch.tensor(0., requires_grad=True, device=device))

            # compute bboxes loss
            if positive_indices.sum() > 0:
                # bbox
                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                bbox_assigned_annotations = bbox_assigned_annotations[
                    positive_indices, :]
                gt_widths = bbox_assigned_annotations[:,
                                                      2] - bbox_assigned_annotations[:,
                                                                                     0]
                gt_heights = bbox_assigned_annotations[:,
                                                       3] - bbox_assigned_annotations[:,
                                                                                      1]
                gt_ctr_x = bbox_assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = bbox_assigned_annotations[:, 1] + 0.5 * gt_heights

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / (anchor_widths_pi +
                                                             1e-14)
                targets_dy = (gt_ctr_y -
                              anchor_ctr_y_pi) / (anchor_heights_pi + 1e-14)
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                bbox_targets = torch.stack(
                    (targets_dx, targets_dy, targets_dw, targets_dh))
                bbox_targets = bbox_targets.t()
                # Rescale
                bbox_targets = bbox_targets / torch.tensor(
                    [[0.1, 0.1, 0.2, 0.2]], device=device)

                # smooth L1 box losses
                bbox_regression_loss = self.smoothl1(
                    bbox_targets, bbox_regression[positive_indices, :])
                bbox_regression_losses.append(bbox_regression_loss)
            else:
                bbox_regression_losses.append(
                    torch.tensor(0., requires_grad=True, device=device))

            # compute landmarks loss
            if ldm_positive_indices.sum() > 0:
                ldm_assigned_annotations = ldm_assigned_annotations[
                    ldm_positive_indices, :]

                anchor_widths_l = anchor_widths[ldm_positive_indices]
                anchor_heights_l = anchor_heights[ldm_positive_indices]
                anchor_ctr_x_l = anchor_ctr_x[ldm_positive_indices]
                anchor_ctr_y_l = anchor_ctr_y[ldm_positive_indices]

                l0_x = (ldm_assigned_annotations[:, 0] -
                        anchor_ctr_x_l) / (anchor_widths_l + 1e-14)
                l0_y = (ldm_assigned_annotations[:, 1] -
                        anchor_ctr_y_l) / (anchor_heights_l + 1e-14)
                l1_x = (ldm_assigned_annotations[:, 2] -
                        anchor_ctr_x_l) / (anchor_widths_l + 1e-14)
                l1_y = (ldm_assigned_annotations[:, 3] -
                        anchor_ctr_y_l) / (anchor_heights_l + 1e-14)
                l2_x = (ldm_assigned_annotations[:, 4] -
                        anchor_ctr_x_l) / (anchor_widths_l + 1e-14)
                l2_y = (ldm_assigned_annotations[:, 5] -
                        anchor_ctr_y_l) / (anchor_heights_l + 1e-14)
                l3_x = (ldm_assigned_annotations[:, 6] -
                        anchor_ctr_x_l) / (anchor_widths_l + 1e-14)
                l3_y = (ldm_assigned_annotations[:, 7] -
                        anchor_ctr_y_l) / (anchor_heights_l + 1e-14)
                l4_x = (ldm_assigned_annotations[:, 8] -
                        anchor_ctr_x_l) / (anchor_widths_l + 1e-14)
                l4_y = (ldm_assigned_annotations[:, 9] -
                        anchor_ctr_y_l) / (anchor_heights_l + 1e-14)

                ldm_targets = torch.stack((l0_x, l0_y, l1_x, l1_y, l2_x, l2_y,
                                           l3_x, l3_y, l4_x, l4_y))
                ldm_targets = ldm_targets.t()
                # Rescale
                scale = torch.ones(1, 10, device=device) * 0.1
                ldm_targets = ldm_targets / scale

                ldm_regression_loss = self.smoothl1(
                    ldm_targets, ldm_regression[ldm_positive_indices, :])
                ldm_regression_losses.append(ldm_regression_loss)
            else:
                ldm_regression_losses.append(
                    torch.tensor(0., requires_grad=True).cuda())

        batch_cls_losses = torch.stack(classification_losses).mean()
        batch_box_losses = torch.stack(bbox_regression_losses).mean()
        batch_lmk_losses = torch.stack(ldm_regression_losses).mean()
        losses = batch_cls_losses + self.lmd1 * batch_box_losses + self.lmd2 * batch_lmk_losses

        return losses
Exemplo n.º 20
0
    def forward(self, classifications, regressions, anchors, annotations):
        def extract(box):
            w, h = box[:, 2] - box[:, 0], box[:, 3] - box[:, 1]
            return w, h, box[:, 0] + .5 * w, box[:, 1] + .5 * h

        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor_widths, anchor_heights, anchor_ctr_x, anchor_ctr_y = extract(
            anchors[0, :, :])

        for classification, regression, bbox_annotation in zip(
                classifications, regressions, annotations):
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            if bbox_annotation.shape[0] == 0:
                regression_losses.append(
                    torch.tensor(0).float().to(self.device))
                classification_losses.append(
                    torch.tensor(0).float().to(self.device))
                continue

            classification = torch.clamp(classification, 1e-4, 1. - 1e-4)
            IoU = compute_iou_(
                anchors[0, :, :],
                bbox_annotation[:, :4])  # num_anchors x num_annotations
            IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # num_anchors x 1

            # compute the loss for classification
            targets = (torch.ones(classification.shape) * -1).to(self.device)

            targets[torch.lt(IoU_max, .4), :] = 0

            positive_indices = torch.ge(IoU_max, .5)
            num_positive_anchors = positive_indices.sum()
            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices,
                                                           4].long()] = 1

            alpha_factor = torch.ones(targets.shape).to(
                self.device) * self.alpha

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma)

            bce = -(targets * torch.log(classification) +
                    (1. - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce

            cls_loss = torch.where(torch.ne(targets, -1.), cls_loss,
                                   torch.zeros(cls_loss.shape).to(self.device))

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.float(), min=1.))

            # compute the loss for regression
            if positive_indices.sum().item() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths, gt_heights, gt_ctr_x, gt_ctr_y = extract(
                    assigned_annotations)

                # clip widths to 1
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dx, targets_dy, targets_dw, targets_dh))
                targets = targets.t()

                targets = targets / torch.Tensor([[.1, .1, .2, .2]]).to(
                    self.device)

                negative_indices = 1 - positive_indices

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff,
                             1. / 9.), .5 * 9. * torch.pow(regression_diff, 2),
                    regression_diff - .5 / 9.)
                regression_losses.append(regression_loss.mean())

            else:
                regression_losses.append(
                    torch.tensor(0).float().to(self.device))

        return torch.stack(classification_losses).mean(dim=0, keepdim=True), \
            torch.stack(regression_losses).mean(dim=0, keepdim=True)
    def search(self, src, is_greedy=True, max_length=255):
        mask, x_length = None, None

        if isinstance(src, tuple):
            x, x_length = src
            mask = self.generate_mask(x, x_length)
        else:
            x = src
        batch_size = x.size(0)

        emb_src = self.emb_src(x)
        h_src, h_0_tgt = self.encoder((emb_src, x_length))
        h_0_tgt, c_0_tgt = h_0_tgt
        h_0_tgt = h_0_tgt.transpose(0, 1).contiguous().view(
            batch_size, -1, self.hidden_size).transpose(0, 1).contiguous()
        c_0_tgt = c_0_tgt.transpose(0, 1).contiguous().view(
            batch_size, -1, self.hidden_size).transpose(0, 1).contiguous()
        h_0_tgt = (h_0_tgt, c_0_tgt)

        # Fill a vector, which has 'batch_size' dimension, with BOS value.
        y = x.new(batch_size, 1).zero_() + data_loader.BOS
        is_undone = x.new_ones(batch_size, 1).float()
        decoder_hidden = h_0_tgt
        h_t_tilde, y_hats, indice = None, [], []

        # Repeat a loop while sum of 'is_undone' flag is bigger than 0,
        # or current time-step is smaller than maximum length.
        while is_undone.sum() > 0 and len(indice) < max_length:
            # Unlike training procedure,
            # take the last time-step's output during the inference.
            emb_t = self.emb_dec(y)
            # |emb_t| = (batch_size, 1, word_vec_dim)

            decoder_output, decoder_hidden = self.decoder(
                emb_t, h_t_tilde, decoder_hidden)
            context_vector = self.attn(h_src, decoder_output, mask)
            h_t_tilde = self.tanh(
                self.concat(torch.cat([decoder_output, context_vector],
                                      dim=-1)))
            y_hat = self.generator(h_t_tilde)
            # |y_hat| = (batch_size, 1, output_size)
            y_hats += [y_hat]

            if is_greedy:
                y = torch.topk(y_hat, 1, dim=-1)[1].squeeze(-1)
            else:
                # Take a random sampling based on the multinoulli distribution.
                y = torch.multinomial(y_hat.exp().view(batch_size, -1), 1)
            # Put PAD if the sample is done.
            y = y.masked_fill_((1. - is_undone).bool(), data_loader.PAD)
            is_undone = is_undone * torch.ne(y, data_loader.EOS).float()
            # |y| = (batch_size, 1)
            # |is_undone| = (batch_size, 1)
            indice += [y]

        y_hats = torch.cat(y_hats, dim=1)
        indice = torch.cat(indice, dim=-1)
        # |y_hat| = (batch_size, length, output_size)
        # |indice| = (batch_size, length)

        return y_hats, indice
Exemplo n.º 22
0
 def compute_mask(self, x):
     mask = torch.ne(x, 0).float()
     if self.enable_cuda:
         mask = mask.cuda()
     return mask
Exemplo n.º 23
0
    input_seq[batch_size - 1, max_src_seq_len - 1] = 0
    input_seq[batch_size - 1, max_src_seq_len - 2] = 0
    input_seq[batch_size - 2, max_src_seq_len - 1] = 0
    input_seq[1][5] = 1
    input_seq[3][2] = 1
    input_seq[3][5] = 1
    input_seq[3][6] = 1
    input_seq[0][2] = 1

    input_seq_oov = np.copy(input_seq)
    input_seq_oov[1][5] = 20
    input_seq_oov[3][2] = 20
    input_seq_oov[3][5] = 21
    input_seq_oov[3][6] = 22
    input_seq_oov[0][2] = 20

    input_seq = torch.LongTensor(input_seq)
    input_seq_oov = torch.LongTensor(input_seq_oov)

    src_mask = torch.ne(input_seq, 0)
    src_mask = src_mask.type(torch.FloatTensor)
    max_num_oovs = 3

    t = 5
    trg_side_memory_bank = torch.randn((batch_size, t - 1, decoder_size))

    final_dist, h_next, context, attn_dist, p_gen, coverage = decoder(
        y, h, memory_bank, src_mask, max_num_oovs, input_seq_oov, coverage,
        trg_side_memory_bank)
    print("Pass")
Exemplo n.º 24
0
    def generate(self, batch_data, eval_data):
        generate_corpus = []
        idx2token = eval_data.target_idx2token

        source_text = batch_data['source_idx']
        source_length = batch_data['source_length']
        source_embeddings = self.source_token_embedder(source_text)
        encoder_outputs, encoder_states = self.encoder(source_embeddings, source_length)

        if self.bidirectional:
            encoder_outputs = encoder_outputs[:, :, self.hidden_size:] + encoder_outputs[:, :, :self.hidden_size]
            if (self.rnn_type == 'lstm'):
                encoder_states = (encoder_states[0][::2], encoder_states[1][::2])
            else:
                encoder_states = encoder_states[::2]

        encoder_masks = torch.ne(source_text, self.padding_token_idx)
        for bid in range(source_text.size(0)):
            decoder_states = encoder_states[:, bid, :].unsqueeze(1)
            encoder_output = encoder_outputs[bid, :, :].unsqueeze(0)
            encoder_mask = encoder_masks[bid, :].unsqueeze(0)
            generate_tokens = []
            input_seq = torch.LongTensor([[self.sos_token_idx]]).to(self.device)

            if (self.strategy == 'beam_search'):
                hypothesis = Beam_Search_Hypothesis(
                    self.beam_size, self.sos_token_idx, self.eos_token_idx, self.device, idx2token
                )

            for gen_idx in range(self.target_max_length):
                decoder_input = self.target_token_embedder(input_seq)
                if self.attention_type is not None:
                    decoder_outputs, decoder_states, _ = self.decoder(
                        decoder_input, decoder_states, encoder_output, encoder_mask
                    )
                else:
                    decoder_outputs, decoder_states = self.decoder(decoder_input, decoder_states)

                token_logits = self.vocab_linear(decoder_outputs)
                if (self.strategy == 'topk_sampling'):
                    token_idx = topk_sampling(token_logits).item()
                elif (self.strategy == 'greedy_search'):
                    token_idx = greedy_search(token_logits).item()
                elif (self.strategy == 'beam_search'):
                    if self.attention_type is not None:
                        input_seq, decoder_states, encoder_output, encoder_mask = \
                            hypothesis.step(gen_idx, token_logits, decoder_states, encoder_output, encoder_mask)
                    else:
                        input_seq, decoder_states = hypothesis.step(gen_idx, token_logits, decoder_states)

                if (self.strategy in ['topk_sampling', 'greedy_search']):
                    if token_idx == self.eos_token_idx:
                        break
                    else:
                        generate_tokens.append(idx2token[token_idx])
                        input_seq = torch.LongTensor([[token_idx]]).to(self.device)
                elif (self.strategy == 'beam_search'):
                    if (hypothesis.stop()):
                        break

            if (self.strategy == 'beam_search'):
                generate_tokens = hypothesis.generate()

            generate_corpus.append(generate_tokens)

        return generate_corpus
Exemplo n.º 25
0
 def test_ne(x, y):
     c = torch.ne(torch.add(x, y), y)
     return c
Exemplo n.º 26
0
    def test_model_trains(self):
        # Performs one step of training and verifies that the weights are updated, implying some training occurs.
        with TemporaryDirectory() as tmpdirname:
            T = torch.cuda.FloatTensor
            latent = np.random.rand(64, 1, 512)
            os.makedirs(os.path.dirname(tmpdirname + '/encoded_smiles.latent'),
                        exist_ok=True)
            with open(tmpdirname + '/encoded_smiles.latent', 'w') as f:
                json.dump(latent.tolist(), f)

            C = CreateModelRunner(input_data_path=tmpdirname +
                                  '/encoded_smiles.latent',
                                  output_model_folder=tmpdirname)
            C.run()
            D = Discriminator.load(tmpdirname + '/discriminator.txt')
            G = Generator.load(tmpdirname + '/generator.txt')
            G.cuda()
            D.cuda()
            optimizer_G = torch.optim.Adam(G.parameters())
            optimizer_D = torch.optim.Adam(D.parameters())
            json_smiles = open(tmpdirname + '/encoded_smiles.latent', "r")
            latent_space_mols = np.array(json.load(json_smiles))
            testSampler = Sampler(G)
            latent_space_mols = latent_space_mols.reshape(
                latent_space_mols.shape[0], 512)
            dataloader = torch.utils.data.DataLoader(
                LatentMolsDataset(latent_space_mols),
                shuffle=True,
                batch_size=64,
                drop_last=True)
            for _, real_mols in enumerate(dataloader):
                real_mols = real_mols.type(T)
                before_G_params = []
                before_D_params = []
                for param in G.parameters():
                    before_G_params.append(param.view(-1))
                before_G_params = torch.cat(before_G_params)
                for param in D.parameters():
                    before_D_params.append(param.view(-1))
                before_D_params = torch.cat(before_D_params)

                optimizer_D.zero_grad()
                fake_mols = testSampler.sample(real_mols.shape[0])
                real_validity = D(real_mols)
                fake_validity = D(fake_mols)
                #It is not relevant to compute gradient penalty. The test is only interested in if there is a change in
                #the weights (training), not in giving proper training
                d_loss = -torch.mean(real_validity) + torch.mean(fake_validity)
                d_loss.backward()
                optimizer_D.step()
                optimizer_G.zero_grad()
                fake_mols = testSampler.sample(real_mols.shape[0])
                fake_validity = D(fake_mols)
                g_loss = -torch.mean(fake_validity)
                g_loss.backward()
                optimizer_G.step()
                after_G_params = []
                after_D_params = []
                for param in G.parameters():
                    after_G_params.append(param.view(-1))
                after_G_params = torch.cat(after_G_params)
                for param in D.parameters():
                    after_D_params.append(param.view(-1))
                after_D_params = torch.cat(after_D_params)
                self.assertTrue(
                    torch.any(torch.ne(after_G_params, before_G_params)))
                self.assertTrue(
                    torch.any(torch.ne(after_D_params, before_D_params)))

                break
Exemplo n.º 27
0
 def forward(self, x):
     return torch.ne(x, 3)
Exemplo n.º 28
0
    def forward(self, classifications, bbox_regressions, ldm_regressions,
                anchors, annotations):
        batch_size = classifications.shape[0]
        classification_losses = []
        bbox_regression_losses = []
        ldm_regression_losses = []

        anchor = anchors[0, :, :]
        anchor_widths = anchor[:, 2] - anchor[:, 0]
        anchor_heights = anchor[:, 3] - anchor[:, 1]
        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights

        #temp
        positive_indices_list = []

        for j in range(batch_size):
            classification = classifications[j, :, :]
            bbox_regression = bbox_regressions[j, :, :]
            ldm_regression = ldm_regressions[j, :, :]

            annotation = annotations[j, :, :]
            # annotation = annotation[annotation[:,0] != -1]
            annotation = annotation[annotation[:, 0] > 0]
            bbox_annotation = annotation[:, :4]
            ldm_annotation = annotation[:, 4:]

            if bbox_annotation.shape[0] == 0:
                bbox_regression_losses.append(torch.tensor(0).float().cuda())
                classification_losses.append(torch.tensor(0).float().cuda())
                ldm_regression_losses.append(torch.tensor(0).float().cuda())

                # temp
                positive_indices_list.append([])

                continue

            IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4])
            #IoU, filt_iou = filt_IoU(anchors[0, :, :], bbox_annotation, ldm_annotation)

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            targets = torch.ones(classification.shape) * -1
            targets = targets.cuda()

            # those whose iou<0.3 have no object
            negative_indices = torch.lt(IoU_max, 0.3)
            targets[negative_indices, :] = 0
            targets[negative_indices, 1] = 1

            # those whose iou>0.5 have object
            positive_indices = torch.ge(IoU_max, 0.5)

            #temp
            positive_indices_list.append(positive_indices)

            num_positive_anchors = positive_indices.sum()

            #keep positive and negative ratios with 1:3
            keep_negative_anchors = num_positive_anchors * 3

            bbox_assigned_annotations = bbox_annotation[IoU_argmax, :]
            ldm_assigned_annotations = ldm_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, 0] = 1

            # ignore targets with no landmarks
            # f_IoU_max ,f_IoU_argmax = torch.max(filt_iou, dim=1)
            # ldm_positive_indices = torch.ge(f_IoU_max, 0.5)

            ldm_sum = ldm_assigned_annotations.sum(dim=1)
            ge0_mask = ldm_sum > 0
            ldm_positive_indices = ge0_mask & positive_indices

            # OHEM
            negative_losses = classification[negative_indices, 1] * -1
            sorted_losses, _ = torch.sort(negative_losses, descending=True)
            if sorted_losses.numel() > keep_negative_anchors:
                sorted_losses = sorted_losses[:keep_negative_anchors]
            positive_losses = classification[positive_indices, 0] * -1

            focal_loss = False
            # focal loss
            if focal_loss:
                alpha = 0.25
                gamma = 2.0
                alpha_factor = torch.ones(targets.shape).cuda() * alpha

                alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                           1. - alpha_factor)
                focal_weight = torch.where(torch.eq(targets, 1.),
                                           1. - classification, classification)
                focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

                bce = -(targets * torch.log(classification) +
                        (1.0 - targets) * torch.log(1.0 - classification))

                cls_loss = focal_weight * bce

                cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss,
                                       torch.zeros(cls_loss.shape).cuda())

                classification_losses.append(
                    cls_loss.sum() /
                    torch.clamp(num_positive_anchors.float(), min=1.0))
            else:
                if positive_indices.sum() > 0:
                    classification_losses.append(positive_losses.mean() +
                                                 sorted_losses.mean())
                else:
                    classification_losses.append(
                        torch.tensor(0).float().cuda())

            # compute bboxes loss
            if positive_indices.sum() > 0:
                # bbox
                bbox_assigned_annotations = bbox_assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = bbox_assigned_annotations[:,
                                                      2] - bbox_assigned_annotations[:,
                                                                                     0]
                gt_heights = bbox_assigned_annotations[:,
                                                       3] - bbox_assigned_annotations[:,
                                                                                      1]
                gt_ctr_x = bbox_assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = bbox_assigned_annotations[:, 1] + 0.5 * gt_heights

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / (anchor_widths_pi +
                                                             1e-14)
                targets_dy = (gt_ctr_y -
                              anchor_ctr_y_pi) / (anchor_heights_pi + 1e-14)
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                bbox_targets = torch.stack(
                    (targets_dx, targets_dy, targets_dw, targets_dh))
                bbox_targets = bbox_targets.t()

                # Rescale
                bbox_targets = bbox_targets / torch.Tensor(
                    [[0.1, 0.1, 0.2, 0.2]]).cuda()

                # smooth L1
                # box losses
                bbox_regression_loss = self.smoothl1(
                    bbox_targets, bbox_regression[positive_indices, :])
                bbox_regression_losses.append(bbox_regression_loss)
            else:
                bbox_regression_losses.append(torch.tensor(0).float().cuda())

            # compute landmarks loss
            if ldm_positive_indices.sum() > 0:
                ldm_assigned_annotations = ldm_assigned_annotations[
                    ldm_positive_indices, :]

                anchor_widths_l = anchor_widths[ldm_positive_indices]
                anchor_heights_l = anchor_heights[ldm_positive_indices]
                anchor_ctr_x_l = anchor_ctr_x[ldm_positive_indices]
                anchor_ctr_y_l = anchor_ctr_y[ldm_positive_indices]

                l0_x = (ldm_assigned_annotations[:, 0] -
                        anchor_ctr_x_l) / (anchor_widths_l + 1e-14)
                l0_y = (ldm_assigned_annotations[:, 1] -
                        anchor_ctr_y_l) / (anchor_heights_l + 1e-14)
                l1_x = (ldm_assigned_annotations[:, 2] -
                        anchor_ctr_x_l) / (anchor_widths_l + 1e-14)
                l1_y = (ldm_assigned_annotations[:, 3] -
                        anchor_ctr_y_l) / (anchor_heights_l + 1e-14)
                l2_x = (ldm_assigned_annotations[:, 4] -
                        anchor_ctr_x_l) / (anchor_widths_l + 1e-14)
                l2_y = (ldm_assigned_annotations[:, 5] -
                        anchor_ctr_y_l) / (anchor_heights_l + 1e-14)
                l3_x = (ldm_assigned_annotations[:, 6] -
                        anchor_ctr_x_l) / (anchor_widths_l + 1e-14)
                l3_y = (ldm_assigned_annotations[:, 7] -
                        anchor_ctr_y_l) / (anchor_heights_l + 1e-14)
                l4_x = (ldm_assigned_annotations[:, 8] -
                        anchor_ctr_x_l) / (anchor_widths_l + 1e-14)
                l4_y = (ldm_assigned_annotations[:, 9] -
                        anchor_ctr_y_l) / (anchor_heights_l + 1e-14)

                ldm_targets = torch.stack((l0_x, l0_y, l1_x, l1_y, l2_x, l2_y,
                                           l3_x, l3_y, l4_x, l4_y))
                ldm_targets = ldm_targets.t()

                # Rescale
                scale = torch.ones(1, 10) * 0.1
                ldm_targets = ldm_targets / scale.cuda()

                ldm_regression_loss = self.smoothl1(
                    ldm_targets, ldm_regression[ldm_positive_indices, :])
                ldm_regression_losses.append(ldm_regression_loss)
            else:
                ldm_regression_losses.append(torch.tensor(0).float().cuda())

        return torch.stack(classification_losses), torch.stack(
            bbox_regression_losses), torch.stack(ldm_regression_losses)
Exemplo n.º 29
0
    def forward(
        self,
        input_ids=None,
        past_key_values=None,
        attention_mask=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        use_cache=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutputWithPast]:
        r"""
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        """

        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        transformer_outputs = self.transformer(
            input_ids,
            past_key_values=past_key_values,
            attention_mask=attention_mask,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        hidden_states = transformer_outputs[0]
        logits = self.score(hidden_states)

        if input_ids is not None:
            batch_size = input_ids.shape[0]
        else:
            batch_size = inputs_embeds.shape[0]

        if self.config.pad_token_id is None and batch_size != 1:
            raise ValueError(
                "Cannot handle batch sizes > 1 if no padding token is defined."
            )
        if self.config.pad_token_id is None:
            sequence_lengths = -1
        else:
            if input_ids is not None:
                sequence_lengths = torch.ne(
                    input_ids, self.config.pad_token_id).sum(-1) - 1
            else:
                sequence_lengths = -1
                logger.warning(
                    f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
                    "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
                )

        pooled_logits = logits[torch.arange(batch_size, device=logits.device),
                               sequence_lengths]

        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.num_labels == 1:
                    self.config.problem_type = "regression"
                elif self.num_labels > 1 and (labels.dtype == torch.long
                                              or labels.dtype == torch.int):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.num_labels == 1:
                    loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(pooled_logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(pooled_logits.view(-1, self.num_labels),
                                labels.view(-1))
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(pooled_logits, labels)
        if not return_dict:
            output = (pooled_logits, ) + transformer_outputs[1:]
            return ((loss, ) + output) if loss is not None else output

        return SequenceClassifierOutputWithPast(
            loss=loss,
            logits=pooled_logits,
            past_key_values=transformer_outputs.past_key_values,
            hidden_states=transformer_outputs.hidden_states,
            attentions=transformer_outputs.attentions,
        )
Exemplo n.º 30
0
 def nNanElement(self, x):
     return torch.sum(torch.ne(x, x).float())
    def forward(self, s1, s2):
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        s1 : Dict[str, torch.LongTensor]
            From a ``TextField``.
        s2 : Dict[str, torch.LongTensor]
            From a ``TextField``.  The model assumes that this s2 contains the answer to the
            s1, and predicts the beginning and ending positions of the answer within the
            s2.

        Returns
        -------
        pair_rep : torch.FloatTensor?
            Tensor representing the final output of the BiDAF model
            to be plugged into the next module

        """
        s1_embs = self._highway_layer(self._text_field_embedder(s1))
        s2_embs = self._highway_layer(self._text_field_embedder(s2))
        if self._elmo is not None:
            s1_elmo_embs = self._elmo(s1['elmo'])
            s2_elmo_embs = self._elmo(s2['elmo'])
            if "words" in s1:
                s1_embs = torch.cat(
                    [s1_embs, s1_elmo_embs['elmo_representations'][0]], dim=-1)
                s2_embs = torch.cat(
                    [s2_embs, s2_elmo_embs['elmo_representations'][0]], dim=-1)
            else:
                s1_embs = s1_elmo_embs['elmo_representations'][0]
                s2_embs = s2_elmo_embs['elmo_representations'][0]
        if self._cove is not None:
            s1_lens = torch.ne(s1['words'],
                               self.pad_idx).long().sum(dim=-1).data
            s2_lens = torch.ne(s2['words'],
                               self.pad_idx).long().sum(dim=-1).data
            s1_cove_embs = self._cove(s1['words'], s1_lens)
            s1_embs = torch.cat([s1_embs, s1_cove_embs], dim=-1)
            s2_cove_embs = self._cove(s2['words'], s2_lens)
            s2_embs = torch.cat([s2_embs, s2_cove_embs], dim=-1)
        s1_embs = self._dropout(s1_embs)
        s2_embs = self._dropout(s2_embs)

        if self._mask_lstms:
            s1_mask = s1_lstm_mask = util.get_text_field_mask(s1).float()
            s2_mask = s2_lstm_mask = util.get_text_field_mask(s2).float()
            s1_mask_2 = util.get_text_field_mask(s1).float()
            s2_mask_2 = util.get_text_field_mask(s2).float()
        else:
            s1_lstm_mask, s2_lstm_mask, s2_lstm_mask_2 = None, None, None

        s1_enc = self._phrase_layer(s1_embs, s1_lstm_mask)
        s2_enc = self._phrase_layer(s2_embs, s2_lstm_mask)

        # Similarity matrix
        # Shape: (batch_size, s2_length, s1_length)
        similarity_mat = self._matrix_attention(s2_enc, s1_enc)

        # s2 representation
        # Shape: (batch_size, s2_length, s1_length)
        s2_s1_attention = util.last_dim_softmax(similarity_mat, s1_mask)
        # Shape: (batch_size, s2_length, encoding_dim)
        s2_s1_vectors = util.weighted_sum(s1_enc, s2_s1_attention)
        # batch_size, seq_len, 4*enc_dim
        s2_w_context = torch.cat([s2_enc, s2_s1_vectors], 2)
        # s1 representation, using same attn method as for the s2 representation
        s1_s2_attention = util.last_dim_softmax(
            similarity_mat.transpose(1, 2).contiguous(), s2_mask)
        # Shape: (batch_size, s1_length, encoding_dim)
        s1_s2_vectors = util.weighted_sum(s2_enc, s1_s2_attention)
        s1_w_context = torch.cat([s1_enc, s1_s2_vectors], 2)
        if self._elmo is not None and self._deep_elmo:
            s1_w_context = torch.cat(
                [s1_w_context, s1_elmo_embs['elmo_representations'][1]],
                dim=-1)
            s2_w_context = torch.cat(
                [s2_w_context, s2_elmo_embs['elmo_representations'][1]],
                dim=-1)
        s1_w_context = self._dropout(s1_w_context)
        s2_w_context = self._dropout(s2_w_context)

        modeled_s2 = self._dropout(
            self._modeling_layer(s2_w_context, s2_lstm_mask))
        s2_mask_2 = s2_mask_2.unsqueeze(dim=-1)
        modeled_s2.data.masked_fill_(1 - s2_mask_2.byte().data, -float('inf'))
        s2_enc_attn = modeled_s2.max(dim=1)[0]
        modeled_s1 = self._dropout(
            self._modeling_layer(s1_w_context, s1_lstm_mask))
        s1_mask_2 = s1_mask_2.unsqueeze(dim=-1)
        modeled_s1.data.masked_fill_(1 - s1_mask_2.byte().data, -float('inf'))
        s1_enc_attn = modeled_s1.max(dim=1)[0]

        return torch.cat([
            s1_enc_attn, s2_enc_attn,
            torch.abs(s1_enc_attn - s2_enc_attn), s1_enc_attn * s2_enc_attn
        ], 1)
Exemplo n.º 32
0
 def getNanMask(self, x):
     return torch.ne(x, x)
Exemplo n.º 33
0
    def forward(self, s1, s2):
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        s1 : Dict[str, torch.LongTensor]
            From a ``TextField``.
        s2 : Dict[str, torch.LongTensor]
            From a ``TextField``.  The model assumes that this s2 contains the answer to the
            s1, and predicts the beginning and ending positions of the answer within the
            s2.

        Returns
        -------
        pair_rep : torch.FloatTensor?
            Tensor representing the final output of the BiDAF model
            to be plugged into the next module

        """
        s1_embs = self._highway_layer(self._text_field_embedder(s1))
        s2_embs = self._highway_layer(self._text_field_embedder(s2))
        if self._elmo is not None:
            s1_elmo_embs = self._elmo(s1['elmo'])
            s2_elmo_embs = self._elmo(s2['elmo'])
            if "words" in s1:
                s1_embs = torch.cat([s1_embs, s1_elmo_embs['elmo_representations'][0]], dim=-1)
                s2_embs = torch.cat([s2_embs, s2_elmo_embs['elmo_representations'][0]], dim=-1)
            else:
                s1_embs = s1_elmo_embs['elmo_representations'][0]
                s2_embs = s2_elmo_embs['elmo_representations'][0]
        if self._cove is not None:
            s1_lens = torch.ne(s1['words'], self.pad_idx).long().sum(dim=-1).data
            s2_lens = torch.ne(s2['words'], self.pad_idx).long().sum(dim=-1).data
            s1_cove_embs = self._cove(s1['words'], s1_lens)
            s1_embs = torch.cat([s1_embs, s1_cove_embs], dim=-1)
            s2_cove_embs = self._cove(s2['words'], s2_lens)
            s2_embs = torch.cat([s2_embs, s2_cove_embs], dim=-1)
        s1_embs = self._dropout(s1_embs)
        s2_embs = self._dropout(s2_embs)

        if self._mask_lstms:
            s1_mask = s1_lstm_mask = util.get_text_field_mask(s1).float()
            s2_mask = s2_lstm_mask = util.get_text_field_mask(s2).float()
            s1_mask_2 = util.get_text_field_mask(s1).float()
            s2_mask_2 = util.get_text_field_mask(s2).float()
        else:
            s1_lstm_mask, s2_lstm_mask, s2_lstm_mask_2 = None, None, None

        s1_enc = self._phrase_layer(s1_embs, s1_lstm_mask)
        s2_enc = self._phrase_layer(s2_embs, s2_lstm_mask)

        # Similarity matrix
        # Shape: (batch_size, s2_length, s1_length)
        similarity_mat = self._matrix_attention(s2_enc, s1_enc)

        # s2 representation
        # Shape: (batch_size, s2_length, s1_length)
        s2_s1_attention = util.last_dim_softmax(similarity_mat, s1_mask)
        # Shape: (batch_size, s2_length, encoding_dim)
        s2_s1_vectors = util.weighted_sum(s1_enc, s2_s1_attention)
        # batch_size, seq_len, 4*enc_dim
        s2_w_context = torch.cat([s2_enc, s2_s1_vectors], 2)
        # s1 representation, using same attn method as for the s2 representation
        s1_s2_attention = util.last_dim_softmax(similarity_mat.transpose(1, 2).contiguous(), s2_mask)
        # Shape: (batch_size, s1_length, encoding_dim)
        s1_s2_vectors = util.weighted_sum(s2_enc, s1_s2_attention)
        s1_w_context = torch.cat([s1_enc, s1_s2_vectors], 2)
        if self._elmo is not None and self._deep_elmo:
            s1_w_context = torch.cat([s1_w_context, s1_elmo_embs['elmo_representations'][1]], dim=-1)
            s2_w_context = torch.cat([s2_w_context, s2_elmo_embs['elmo_representations'][1]], dim=-1)
        s1_w_context = self._dropout(s1_w_context)
        s2_w_context = self._dropout(s2_w_context)

        modeled_s2 = self._dropout(self._modeling_layer(s2_w_context, s2_lstm_mask))
        s2_mask_2 = s2_mask_2.unsqueeze(dim=-1)
        modeled_s2.data.masked_fill_(1 - s2_mask_2.byte().data, -float('inf'))
        s2_enc_attn = modeled_s2.max(dim=1)[0]
        modeled_s1 = self._dropout(self._modeling_layer(s1_w_context, s1_lstm_mask))
        s1_mask_2 = s1_mask_2.unsqueeze(dim=-1)
        modeled_s1.data.masked_fill_(1 - s1_mask_2.byte().data, -float('inf'))
        s1_enc_attn = modeled_s1.max(dim=1)[0]

        return torch.cat([s1_enc_attn, s2_enc_attn, torch.abs(s1_enc_attn - s2_enc_attn),
                          s1_enc_attn * s2_enc_attn], 1)
Exemplo n.º 34
0
    def forward(self, act_classifications, sub_regressions, obj_regressions,
                anchors, union_annotations, **kwargs):
        alpha = 0.25
        gamma = 2.0
        batch_size = act_classifications.shape[0]
        # obj_classification_losses = []
        act_classification_losses = []
        # regression_losses = []
        sub_regression_losses = []
        obj_regression_losses = []
        diff_regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            act_classification = act_classifications[
                j, :, :]  # (h*w*feat_num, num_classes)
            sub_regression = sub_regressions[
                j, :, :]  # (h*w*feat_num*num_anchor, 4)
            obj_regression = obj_regressions[
                j, :, :]  # (h*w*feat_num*num_anchor, num_union_class, 4)

            bbox_annotation = union_annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 0] >=
                                              0]  # (num_union, K)

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():
                    act_classification_losses.append(
                        torch.tensor(0).to(dtype).cuda())
                    sub_regression_losses.append(
                        torch.tensor(0).to(dtype).cuda())
                    obj_regression_losses.append(
                        torch.tensor(0).to(dtype).cuda())
                    diff_regression_losses.append(
                        torch.tensor(0).to(dtype).cuda())

                else:
                    act_classification_losses.append(torch.tensor(0).to(dtype))
                    sub_regression_losses.append(torch.tensor(0).to(dtype))
                    obj_regression_losses.append(torch.tensor(0).to(dtype))
                    diff_regression_losses.append(torch.tensor(0).to(dtype))

                continue

            act_classification = torch.clamp(
                act_classification, 1e-4,
                1.0 - 1e-4)  # (h*w*feat_num, num_classes)

            IoU = calc_iou(
                anchor[:, :],
                bbox_annotation[:, 8:12])  # (h*w*anchor_num, num_union)
            IoA_sub = calc_ioa(
                anchor[:, :],
                bbox_annotation[:, :4])  # (h*w*anchor_num, num_union)
            IoA_obj = calc_ioa(
                anchor[:, :],
                bbox_annotation[:, 4:8])  # (h*w*anchor_num, num_union)

            IoU_max, IoU_argmax = torch.max(
                IoU, dim=1)  # 不同stride, (h*w*anchor_num, )

            Union_IoU = (IoU > 0.25) * (IoA_sub > 0.25) * (IoA_obj > 0.25)
            Union_IoU = Union_IoU.float()

            IoU_max_ge, IoU_argmax_ge = torch.max(
                0.5 * (IoU + torch.sqrt(IoA_sub * IoA_obj)) * Union_IoU,
                dim=1)  # (h*w*anchor_num, )

            # compute the loss for classification
            act_targets = torch.ones_like(
                act_classification,
                dtype=torch.float32) * -1  # (h*w*feat_num, num_classes)

            if torch.cuda.is_available():
                act_targets = act_targets.cuda()

            act_targets[torch.lt(IoU_max, 0.4), :] = 0  # IoU < 0.4,

            positive_indices = torch.max(Union_IoU,
                                         dim=1)[0] > 0  # (h*w*anchor_num, 1)
            positive_indices_reg = torch.ge(IoU_max_ge,
                                            0.1)  # actually same as above

            num_positive_anchors = positive_indices.sum()

            assigned_act_annotation_all_fore = torch.mm(
                Union_IoU, bbox_annotation[:,
                                           13:])  # (h*w*anchor_num, num_class)
            assigned_act_annotation_all_fore = torch.clamp(
                assigned_act_annotation_all_fore, 0,
                1)  # (h*w*anchor_num, num_class)

            assigned_act_annotation = bbox_annotation[
                IoU_argmax_ge, 13:]  # (h*w*anchor_num, num_class)
            assigned_annotations = bbox_annotation[IoU_argmax_ge, :]

            assigned_act_annotations_ignore = assigned_act_annotation_all_fore - assigned_act_annotation
            assigned_act_annotations_ignore = assigned_act_annotations_ignore[
                positive_indices]
            # assert assigned_act_annotations_ignore.max() <= 1
            # assert assigned_act_annotations_ignore.min() >= 0

            act_targets[positive_indices, :] = 0
            act_targets[positive_indices, :] = assigned_act_annotation[
                positive_indices, :]

            act_targets = act_targets[positive_indices]
            act_classification = act_classification[positive_indices]
            act_targets = act_targets - assigned_act_annotations_ignore

            alpha_factor_act = torch.ones_like(act_targets,
                                               dtype=torch.float32) * alpha

            if torch.cuda.is_available():
                alpha_factor_act = alpha_factor_act.cuda()
            alpha_factor_act = torch.where(torch.eq(act_targets,
                                                    1.), alpha_factor_act,
                                           1. - alpha_factor_act)

            focal_weight_act = torch.where(torch.eq(act_targets, 1.),
                                           1. - act_classification,
                                           act_classification)
            focal_weight_act = alpha_factor_act * torch.pow(
                focal_weight_act, gamma)

            act_bce = -(
                act_targets * torch.log(act_classification) +
                (1.0 - act_targets) * torch.log(1.0 - act_classification))

            if self.dataset == "vcoco":
                act_cls_loss = focal_weight_act * act_bce
            else:
                act_cls_loss = focal_weight_act * act_bce * self.hoi_weight.to(
                    dtype).cuda()  # classification loss

            act_zeros = torch.zeros_like(act_cls_loss)

            act_cls_loss = torch.where(
                torch.ne(act_targets, -1.0), act_cls_loss,
                act_zeros)  # ignore loss if IoU is too small
            act_classification_losses.append(
                act_cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices_reg.sum() > 0:
                assigned_annotations_sub = assigned_annotations[
                    positive_indices_reg, 0:4]
                assigned_annotations_obj = assigned_annotations[
                    positive_indices_reg, 4:8]

                sub_regression_pi = sub_regression[positive_indices_reg, :]
                obj_regression_pi = obj_regression[positive_indices_reg, :]

                anchor_widths_pi = anchor_widths[positive_indices_reg]
                anchor_heights_pi = anchor_heights[positive_indices_reg]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices_reg]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices_reg]

                sub_regression_loss = regression_loss(
                    anchor_widths_pi, anchor_heights_pi, anchor_ctr_x_pi,
                    anchor_ctr_y_pi, assigned_annotations_sub,
                    sub_regression_pi)
                obj_regression_loss = regression_loss(
                    anchor_widths_pi, anchor_heights_pi, anchor_ctr_x_pi,
                    anchor_ctr_y_pi, assigned_annotations_obj,
                    obj_regression_pi)

                diff_regression_loss = union_regression_loss(
                    anchor_widths_pi, anchor_heights_pi, anchor_ctr_x_pi,
                    anchor_ctr_y_pi, assigned_annotations_sub,
                    assigned_annotations_obj, sub_regression_pi,
                    obj_regression_pi)

                sub_regression_losses.append(sub_regression_loss.mean())
                obj_regression_losses.append(obj_regression_loss.mean())
                diff_regression_losses.append(diff_regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    sub_regression_losses.append(
                        torch.tensor(0).to(dtype).cuda())
                    obj_regression_losses.append(
                        torch.tensor(0).to(dtype).cuda())
                    diff_regression_losses.append(
                        torch.tensor(0).to(dtype).cuda())
                else:
                    sub_regression_losses.append(torch.tensor(0).to(dtype))
                    obj_regression_losses.append(torch.tensor(0).to(dtype))
                    diff_regression_losses.append(torch.tensor(0).to(dtype))

        return torch.stack(act_classification_losses).mean(dim=0, keepdim=True), \
               torch.stack(sub_regression_losses).mean(dim=0, keepdim=True), \
               torch.stack(obj_regression_losses).mean(dim=0, keepdim=True), \
               torch.stack(diff_regression_losses).mean(dim=0, keepdim=True)
Exemplo n.º 35
0
def main():
  """Inference for semantic segmentation.
  """
  # Retreve experiment configurations.
  args = parse_args('Inference for semantic segmentation.')
  config.network.kmeans_num_clusters = separate_comma(args.kmeans_num_clusters)
  config.network.label_divisor = args.label_divisor

  # Create directories to save results.
  semantic_dir = os.path.join(args.save_dir, 'semantic_gray')
  semantic_rgb_dir = os.path.join(args.save_dir, 'semantic_color')

  # Create color map.
  color_map = vis_utils.load_color_map(config.dataset.color_map_path)
  color_map = color_map.numpy()

  # Create data loaders.
  test_dataset = ListDataset(
      data_dir=args.data_dir,
      data_list=args.data_list,
      img_mean=config.network.pixel_means,
      img_std=config.network.pixel_stds,
      size=None,
      random_crop=False,
      random_scale=False,
      random_mirror=False,
      training=False)
  test_image_paths = test_dataset.image_paths

  # Create models.
  if config.network.backbone_types == 'panoptic_pspnet_101':
    embedding_model = resnet_101_pspnet(config).cuda()
  elif config.network.backbone_types == 'panoptic_deeplab_101':
    embedding_model = resnet_101_deeplab(config).cuda()
  else:
    raise ValueError('Not support ' + config.network.backbone_types)

  if config.network.prediction_types == 'segsort':
    prediction_model = segsort(config)
  else:
    raise ValueError('Not support ' + config.network.prediction_types)

  embedding_model = embedding_model.to("cuda:0")
  prediction_model = prediction_model.to("cuda:0")
  embedding_model.eval()
  prediction_model.eval()
      
  # Load trained weights.
  model_path_template = os.path.join(args.snapshot_dir, 'model-{:d}.pth')
  save_iter = config.train.max_iteration - 1
  embedding_model.load_state_dict(
      torch.load(model_path_template.format(save_iter))['embedding_model'],
      resume=True)
  prediction_model.load_state_dict(
      torch.load(model_path_template.format(save_iter))['prediction_model'])

  # Define CRF.
  postprocessor = DenseCRF(
      iter_max=args.crf_iter_max,
      pos_xy_std=args.crf_pos_xy_std,
      pos_w=args.crf_pos_w,
      bi_xy_std=args.crf_bi_xy_std,
      bi_rgb_std=args.crf_bi_rgb_std,
      bi_w=args.crf_bi_w,)

  # Load memory prototypes.
  semantic_memory_prototypes, semantic_memory_prototype_labels = None, None
  if args.semantic_memory_dir is not None:
    semantic_memory_prototypes, semantic_memory_prototype_labels = (
      segsort_others.load_memory_banks(args.semantic_memory_dir))
    semantic_memory_prototypes = semantic_memory_prototypes.to("cuda:0")
    semantic_memory_prototype_labels = semantic_memory_prototype_labels.to("cuda:0")

    # Remove ignore class.
    valid_prototypes = torch.ne(
        semantic_memory_prototype_labels,
        config.dataset.semantic_ignore_index).nonzero()
    valid_prototypes = valid_prototypes.view(-1)
    semantic_memory_prototypes = torch.index_select(
        semantic_memory_prototypes,
        0,
        valid_prototypes)
    semantic_memory_prototype_labels = torch.index_select(
        semantic_memory_prototype_labels,
        0,
        valid_prototypes)

  # Start inferencing.
  for data_index in tqdm(range(len(test_dataset))):
    # Image path.
    image_path = test_image_paths[data_index]
    base_name = os.path.basename(image_path).replace('.jpg', '.png')

    # Image resolution.
    image_batch, label_batch, _ = test_dataset[data_index]
    image_h, image_w = image_batch['image'].shape[-2:]

    # Resize the input image.
    if config.test.image_size > 0:
      image_batch['image'] = transforms.resize_with_interpolation(
          image_batch['image'].transpose(1, 2, 0),
          config.test.image_size,
          method='bilinear').transpose(2, 0, 1)
      for lab_name in ['semantic_label', 'instance_label']:
        label_batch[lab_name] = transforms.resize_with_interpolation(
            label_batch[lab_name],
            config.test.image_size,
            method='nearest')
    resize_image_h, resize_image_w = image_batch['image'].shape[-2:]

    # Crop and Pad the input image.
    image_batch['image'] = transforms.resize_with_pad(
        image_batch['image'].transpose(1, 2, 0),
        config.test.crop_size,
        image_pad_value=0).transpose(2, 0, 1)
    image_batch['image'] = torch.FloatTensor(
        image_batch['image'][np.newaxis, ...]).to("cuda:0")
    pad_image_h, pad_image_w = image_batch['image'].shape[-2:]

    # Create the fake labels where clustering ignores 255.
    fake_label_batch = {}
    for label_name in ['semantic_label', 'instance_label']:
      lab = np.zeros((resize_image_h, resize_image_w),
                     dtype=np.uint8)
      lab = transforms.resize_with_pad(
          lab,
          config.test.crop_size,
          image_pad_value=config.dataset.semantic_ignore_index)

      fake_label_batch[label_name] = torch.LongTensor(
          lab[np.newaxis, ...]).to("cuda:0")

    # Put label batch to gpu 1.
    for k, v in label_batch.items():
      label_batch[k] = torch.LongTensor(v[np.newaxis, ...]).to("cuda:0")

    # Create the ending index of each patch.
    stride_h, stride_w = config.test.stride
    crop_h, crop_w = config.test.crop_size
    npatches_h = math.ceil(1.0 * (pad_image_h-crop_h) / stride_h) + 1
    npatches_w = math.ceil(1.0 * (pad_image_w-crop_w) / stride_w) + 1
    patch_ind_h = np.linspace(
        crop_h, pad_image_h, npatches_h, dtype=np.int32)
    patch_ind_w = np.linspace(
        crop_w, pad_image_w, npatches_w, dtype=np.int32)

    # Create place holder for full-resolution embeddings.
    embeddings = {}
    counts = torch.FloatTensor(
        1, 1, pad_image_h, pad_image_w).zero_().to("cuda:0")
    with torch.no_grad():
      for ind_h in patch_ind_h:
        for ind_w in patch_ind_w:
          sh, eh = ind_h - crop_h, ind_h
          sw, ew = ind_w - crop_w, ind_w
          crop_image_batch = {
            k: v[:, :, sh:eh, sw:ew] for k, v in image_batch.items()}

          # Feed-forward.
          crop_embeddings = embedding_model.generate_embeddings(
              crop_image_batch, resize_as_input=True)

          # Initialize embedding.
          for name in crop_embeddings:
            if crop_embeddings[name] is None:
              continue
            crop_emb = crop_embeddings[name].to("cuda:0")
            if name in ['embedding']:
              crop_emb = common_utils.normalize_embedding(
                  crop_emb.permute(0, 2, 3, 1).contiguous())
              crop_emb = crop_emb.permute(0, 3, 1, 2)
            else:
              continue

            if name not in embeddings.keys():
              embeddings[name] = torch.FloatTensor(
                  1,
                  crop_emb.shape[1],
                  pad_image_h,
                  pad_image_w).zero_().to("cuda:0")
            embeddings[name][:, :, sh:eh, sw:ew] += crop_emb
          counts[:, :, sh:eh, sw:ew] += 1

    for k in embeddings.keys():
      embeddings[k] /= counts

    # KMeans.
    lab_div = config.network.label_divisor
    fake_sem_lab = fake_label_batch['semantic_label']
    fake_inst_lab = fake_label_batch['instance_label']
    clustering_outputs = embedding_model.generate_clusters(
        embeddings.get('embedding', None),
        fake_sem_lab,
        fake_inst_lab)
    embeddings.update(clustering_outputs)

    # Generate predictions.
    outputs = prediction_model(
        embeddings,
        {'semantic_memory_prototype': semantic_memory_prototypes,
         'semantic_memory_prototype_label': semantic_memory_prototype_labels},
        with_loss=False, with_prediction=True)
    semantic_topk = outputs['semantic_score']

    # DenseCRF post-processing.
    semantic_prob = common_utils.one_hot(
        semantic_topk, max_label=config.dataset.num_classes)
    semantic_prob = semantic_prob.sum(dim=1).float() / semantic_topk.shape[1]
    semantic_prob = semantic_prob.view(resize_image_h, resize_image_w, -1)
    semantic_prob = semantic_prob.data.cpu().numpy().astype(np.float32)
    semantic_prob = semantic_prob.transpose(2, 0, 1)

    image = image_batch['image'].data.cpu().numpy().astype(np.float32)
    image = image[0, :, :resize_image_h, :resize_image_w].transpose(1, 2, 0)
    image *= np.reshape(config.network.pixel_stds, (1, 1, 3))
    image += np.reshape(config.network.pixel_means, (1, 1, 3))
    image = image * 255
    image = image.astype(np.uint8)

    semantic_prob = postprocessor(image, semantic_prob)

    # Save semantic predictions.
    semantic_pred = np.argmax(semantic_prob, axis=0).astype(np.uint8)
    semantic_pred = cv2.resize(
        semantic_pred,
        (image_w, image_h),
        interpolation=cv2.INTER_NEAREST)

    semantic_pred_name = os.path.join(
        semantic_dir, base_name)
    if not os.path.isdir(os.path.dirname(semantic_pred_name)):
      os.makedirs(os.path.dirname(semantic_pred_name))
    Image.fromarray(semantic_pred, mode='L').save(semantic_pred_name)

    semantic_pred_rgb = color_map[semantic_pred]
    semantic_pred_rgb_name = os.path.join(
        semantic_rgb_dir, base_name)
    if not os.path.isdir(os.path.dirname(semantic_pred_rgb_name)):
      os.makedirs(os.path.dirname(semantic_pred_rgb_name))
    Image.fromarray(semantic_pred_rgb, mode='RGB').save(
        semantic_pred_rgb_name)
Exemplo n.º 36
0
    def test_comparison_ops_with_type_promotion(self, device):
        value_for_type = {
            torch.uint8: (1 << 5),
            torch.int8: (1 << 5),
            torch.int16: (1 << 10),
            torch.int32: (1 << 20),
            torch.int64: (1 << 35),
            torch.float16: (1 << 10),
            torch.float32: (1 << 20),
            torch.float64: (1 << 35),
            torch.complex64: (1 << 20),
            torch.complex128: (1 << 35)
        }
        comparison_ops = [
            dict(
                name="lt",
                out_op=lambda x, y, d: torch.lt(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.lt(x, y),
                compare_op=lambda x, y: x < y,
            ),
            dict(
                name="le",
                out_op=lambda x, y, d: torch.le(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.le(x, y),
                compare_op=lambda x, y: x <= y,
            ),
            dict(
                name="gt",
                out_op=lambda x, y, d: torch.gt(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.gt(x, y),
                compare_op=lambda x, y: x > y,
            ),
            dict(
                name="ge",
                out_op=lambda x, y, d: torch.ge(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.ge(x, y),
                compare_op=lambda x, y: x >= y,
            ),
            dict(
                name="eq",
                out_op=lambda x, y, d: torch.eq(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.eq(x, y),
                compare_op=lambda x, y: x == y,
            ),
            dict(
                name="ne",
                out_op=lambda x, y, d: torch.ne(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.ne(x, y),
                compare_op=lambda x, y: x != y,
            ),
        ]
        for op in comparison_ops:
            for dt1 in torch.testing.get_all_math_dtypes(device):
                for dt2 in torch.testing.get_all_math_dtypes(device):
                    if (dt1.is_complex or dt2.is_complex
                        ) and not (op["name"] == "eq" or op["name"] == "ne"):
                        continue
                    val1 = value_for_type[dt1]
                    val2 = value_for_type[dt2]
                    t1 = torch.tensor([val1], dtype=dt1, device=device)
                    t2 = torch.tensor([val2], dtype=dt2, device=device)
                    expected = torch.tensor([op["compare_op"](val1, val2)],
                                            dtype=torch.bool)

                    out_res = op["out_op"](t1, t2, device)
                    self.assertEqual(out_res, expected)
                    self.assertTrue(out_res.dtype == torch.bool)
                    self.assertTrue(t1.dtype == dt1)
                    self.assertTrue(t2.dtype == dt2)

                    out_res = op["ret_op"](t1, t2)
                    self.assertEqual(out_res, expected)
                    self.assertTrue(out_res.dtype == torch.bool)
                    self.assertTrue(t1.dtype == dt1)
                    self.assertTrue(t2.dtype == dt2)

                    # test that comparing a zero dim tensor with another zero dim tensor has type promotion behavior
                    t1 = torch.tensor(val1, dtype=dt1, device=device)
                    t2 = torch.tensor(val2, dtype=dt2, device=device)
                    expected = torch.tensor(op["compare_op"](val1, val2),
                                            dtype=torch.bool)

                    out_res = op["out_op"](t1, t2, device)
                    self.assertEqual(out_res, expected)
                    self.assertTrue(out_res.dtype == torch.bool)
                    self.assertTrue(t1.dtype == dt1)
                    self.assertTrue(t2.dtype == dt2)

                    out_res = op["ret_op"](t1, t2)
                    self.assertEqual(out_res, expected)
                    self.assertTrue(out_res.dtype == torch.bool)
                    self.assertTrue(t1.dtype == dt1)
                    self.assertTrue(t2.dtype == dt2)
Exemplo n.º 37
0
 def test_ne(self):
     x = torch.randn(1, 2, 3, 1, requires_grad=False).int()
     y = torch.randn(1, 4, requires_grad=False).int()
     self.assertONNX(lambda x, y: torch.ne(x, y), (x, y))
Exemplo n.º 38
0
    def forward(self, classifications, regressions, anchors, annotations,
                **kwargs):

        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[
                j, :, :]  # (h*w*feat_num, num_classes)
            regression = regressions[j, :, :]  # (h*w*feat_num, num_anchor*4)

            bbox_annotation = annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] !=
                                              -1]  # (num_boxes, 5)

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                    classification_losses.append(
                        torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))
                    classification_losses.append(torch.tensor(0).to(dtype))

                continue

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # 不同stride

            # compute the loss for classification
            targets = torch.ones_like(classification) * -1
            if torch.cuda.is_available():
                targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0  # IoU < 0.4

            positive_indices = torch.ge(IoU_max, 0.5)  # IoU > 0.5

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[
                positive_indices,
                4].long()] = 1  # set the corresponding categories as 1

            alpha_factor = torch.ones_like(targets) * alpha
            if torch.cuda.is_available():
                alpha_factor = alpha_factor.cuda()

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce  # 分类loss

            zeros = torch.zeros_like(cls_loss)
            if torch.cuda.is_available():
                zeros = zeros.cuda()
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss,
                                   zeros)  # ignore loss if IoU is too small

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(
                imgs.detach(),
                torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                regressions.detach(), classifications.detach(), regressBoxes,
                clipBoxes, 0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) *
                    255).astype(np.uint8)
            imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            display(out, imgs, obj_list, imshow=False, imwrite=True)

        return torch.stack(classification_losses).mean(dim=0, keepdim=True), \
               torch.stack(regression_losses).mean(dim=0, keepdim=True)
 def forward(self, x):
     mask = torch.ne(x, self.pad_idx).float()
     return mask
Exemplo n.º 40
0
    def forward(self, classifications, regressions, anchors, annotations):
        alpha = self.alpha
        gamma = self.gamma
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[0, :, :]

        anchor_widths = anchor[:, 2] - anchor[:, 0]
        anchor_heights = anchor[:, 3] - anchor[:, 1]
        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            #If self.loss_with_no_bboxes is True, focal loss will take those pictures without bboxes into account.
            #Though so far I haven't figure out the exact parameter to make this method work!!! (customized by Yu Han Huang)
            if bbox_annotation.shape[
                    0] == 0 and self.loss_with_no_bboxes == False:
                regression_losses.append(torch.tensor(0).float().cuda())
                classification_losses.append(torch.tensor(0).float().cuda())

                continue

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
            if bbox_annotation.shape[0] != 0:
                IoU = calc_iou(
                    anchors[0, :, :],
                    bbox_annotation[:, :4])  # num_anchors x num_annotations

                IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # num_anchors x 1
                #print(IoU_max.shape)
                #import pdb
                #pdb.set_trace()

                # compute the loss for classification
                targets = torch.ones(classification.shape) * -1
                targets = targets.cuda()

                targets[torch.lt(IoU_max, 0.4), :] = 0

                positive_indices = torch.ge(IoU_max, 0.4)

                num_positive_anchors = positive_indices.sum()

                assigned_annotations = bbox_annotation[IoU_argmax, :]

                targets[positive_indices, :] = 0
                targets[positive_indices,
                        assigned_annotations[positive_indices, 4].long()] = 1

                alpha_factor = torch.ones(targets.shape).cuda() * alpha

                alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                           1. - alpha_factor)
                focal_weight = torch.where(torch.eq(targets, 1.),
                                           1. - classification, classification)
                focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

                bce = -(targets * torch.log(classification) +
                        (1.0 - targets) * torch.log(1.0 - classification))

                # cls_loss = focal_weight * torch.pow(bce, gamma)
                cls_loss = focal_weight * bce

                cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss,
                                       torch.zeros(cls_loss.shape).cuda())

                classification_losses.append(
                    cls_loss.sum() /
                    torch.clamp(num_positive_anchors.float(), min=1.0))

            #This is the part of the code where those pictures without bboxes are take into account(customized by Yu Han Huang)
            elif self.loss_with_no_bboxes == True:
                targets = torch.ones(classification.shape)
                targets = targets.cuda()
                targets[torch.le(classification[:, 0], 0.5), :] = 0
                targets_num = targets.sum()
                targets = 1 - targets
                alpha_factor = torch.ones(
                    targets.shape).cuda() * self.no_bboxes_alpha
                alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                           1. - alpha_factor)
                focal_weight = torch.where(torch.eq(targets, 1.),
                                           1. - classification, classification)
                focal_weight = alpha_factor * torch.pow(
                    focal_weight, self.no_bboxes_gamma)
                bce = -((1.0 - targets) * torch.log(1.0 - classification))
                cls_loss = focal_weight * bce
                classification_losses.append(
                    cls_loss.sum() / torch.clamp(targets_num.float(), min=1.0))

            # compute the loss for regression
            if bbox_annotation.shape[0] != 0:
                if positive_indices.sum() > 0:
                    assigned_annotations = assigned_annotations[
                        positive_indices, :]

                    anchor_widths_pi = anchor_widths[positive_indices]
                    anchor_heights_pi = anchor_heights[positive_indices]
                    anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                    anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                    gt_widths = assigned_annotations[:,
                                                     2] - assigned_annotations[:,
                                                                               0]
                    gt_heights = assigned_annotations[:,
                                                      3] - assigned_annotations[:,
                                                                                1]
                    gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                    gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                    # clip widths to 1
                    gt_widths = torch.clamp(gt_widths, min=1)
                    gt_heights = torch.clamp(gt_heights, min=1)

                    targets_dx = (gt_ctr_x -
                                  anchor_ctr_x_pi) / anchor_widths_pi
                    targets_dy = (gt_ctr_y -
                                  anchor_ctr_y_pi) / anchor_heights_pi
                    targets_dw = torch.log(gt_widths / anchor_widths_pi)
                    targets_dh = torch.log(gt_heights / anchor_heights_pi)

                    targets = torch.stack(
                        (targets_dx, targets_dy, targets_dw, targets_dh))
                    targets = targets.t()

                    targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]
                                                      ]).cuda()

                    negative_indices = 1 - positive_indices

                    regression_diff = torch.abs(
                        targets - regression[positive_indices, :])

                    regression_loss = torch.where(
                        torch.le(regression_diff, 1.0 / 9.0),
                        0.5 * 9.0 * torch.pow(regression_diff, 2),
                        regression_diff - 0.5 / 9.0)
                    regression_losses.append(regression_loss.mean())
                else:
                    regression_losses.append(torch.tensor(0).float().cuda())
            else:
                regression_losses.append(torch.tensor(0).float().cuda())

        return torch.stack(classification_losses).mean(
            dim=0,
            keepdim=True), torch.stack(regression_losses).mean(dim=0,
                                                               keepdim=True)