Exemplo n.º 1
0
def train_generator_batch(image, label, *, gm, netG, netloss):
    B, T, _, h, w = image.shape
    biup = get_bilinear(image)
    netG.train()
    with gm:
        forward_hiddens = []
        backward_hiddens = []
        res = []
        hidden = F.zeros((2 * B, netG.hidden_channels, h, w))
        for i in range(T):
            now_frame = F.concat([image[:, i, ...], image[:, T - i - 1, ...]],
                                 axis=0)
            if i == 0:
                flow = netG.flownet(now_frame, now_frame)
            else:
                ref = F.concat([image[:, i - 1, ...], image[:, T - i, ...]],
                               axis=0)
                flow = netG.flownet(now_frame, ref)
            hidden = netG(hidden, flow, now_frame)
            forward_hiddens.append(hidden[0:B, ...])
            backward_hiddens.append(hidden[B:2 * B, ...])
        for i in range(T):
            res.append(
                netG.do_upsample(forward_hiddens[i],
                                 backward_hiddens[T - i - 1]))
        res = F.stack(res, axis=1)  # [B,T,3,H,W]
        loss = netloss(res + biup, label)
        gm.backward(loss)
        if dist.is_distributed():
            loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size()
    return loss
Exemplo n.º 2
0
def bbox_transform_inv_opr(bbox, deltas):
    max_delta = math.log(1000.0 / 16)
    """ Transforms the learned deltas to the final bbox coordinates, the axis is 1"""
    bbox_width = bbox[:, 2] - bbox[:, 0] + 1
    bbox_height = bbox[:, 3] - bbox[:, 1] + 1
    bbox_ctr_x = bbox[:, 0] + 0.5 * bbox_width
    bbox_ctr_y = bbox[:, 1] + 0.5 * bbox_height
    pred_ctr_x = bbox_ctr_x + deltas[:, 0] * bbox_width
    pred_ctr_y = bbox_ctr_y + deltas[:, 1] * bbox_height

    dw = deltas[:, 2]
    dh = deltas[:, 3]
    dw = F.minimum(dw, max_delta)
    dh = F.minimum(dh, max_delta)
    pred_width = bbox_width * F.exp(dw)
    pred_height = bbox_height * F.exp(dh)

    pred_x1 = pred_ctr_x - 0.5 * pred_width
    pred_y1 = pred_ctr_y - 0.5 * pred_height
    pred_x2 = pred_ctr_x + 0.5 * pred_width
    pred_y2 = pred_ctr_y + 0.5 * pred_height
    # pred_boxes = F.concat((pred_x1.reshape(-1, 1), pred_y1.reshape(-1, 1),
    #                         pred_x2.reshape(-1, 1), pred_y2.reshape(-1, 1)), axis=1)
    pred_boxes = F.stack([pred_x1, pred_y1, pred_x2, pred_y2], axis=1)
    return pred_boxes
Exemplo n.º 3
0
def test_generator_batch(image, *, netG):
    # image: [1,100,3,180,320]
    B, T, _, h, w = image.shape
    biup = get_bilinear(image)
    netG.eval()
    forward_hiddens = []
    backward_hiddens = []
    res = []
    hidden = F.zeros((2 * B, netG.hidden_channels, h, w))
    for i in range(T):
        now_frame = F.concat([image[:, i, ...], image[:, T - i - 1, ...]],
                             axis=0)
        if i == 0:
            flow = netG.flownet(now_frame, now_frame)
        else:
            ref = F.concat([image[:, i - 1, ...], image[:, T - i, ...]],
                           axis=0)
            flow = netG.flownet(now_frame, ref)
        hidden = netG(hidden, flow, now_frame)
        forward_hiddens.append(hidden[0:B, ...])
        backward_hiddens.append(hidden[B:2 * B, ...])
    for i in range(T):
        res.append(
            netG.do_upsample(forward_hiddens[i], backward_hiddens[T - i - 1]))
    res = F.stack(res, axis=1)  # [B,T,3,H,W]
    return res + biup
Exemplo n.º 4
0
 def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor:
     return F.stack([
         F.expand_dims(anchors[:, 0], axis=1) - deltas[:, 0::4],
         F.expand_dims(anchors[:, 1], axis=1) - deltas[:, 1::4],
         F.expand_dims(anchors[:, 0], axis=1) + deltas[:, 2::4],
         F.expand_dims(anchors[:, 1], axis=1) + deltas[:, 3::4],
     ], axis=2).reshape(deltas.shape)
Exemplo n.º 5
0
    def get_center_offsets(self, featmap, stride):

        # f_shp = featmap.shape
        # fm_height, fm_width = f_shp[-2], f_shp[-1]
        fm_height, fm_width = featmap.shape[2:]
        shift_x = F.linspace(0, fm_width - 1, fm_width) * stride
        shift_y = F.linspace(0, fm_height - 1, fm_height) * stride

        # make the mesh grid of shift_x and shift_y
        mesh_shape = (fm_height, fm_width)

        broad_shift_x = F.broadcast_to(shift_x.reshape(1, -1), mesh_shape)
        broad_shift_y = F.broadcast_to(shift_y.reshape(-1, 1), mesh_shape)
        # broad_shift_x = shift_x.reshape(-1, shift_x.shape[0]).broadcast_to(*mesh_shape)
        # broad_shift_y = shift_y.reshape(shift_y.shape[0], -1).broadcast_to(*mesh_shape)

        flatten_shift_x = broad_shift_x.flatten()
        flatten_shift_y = broad_shift_y.flatten()
        shifts = F.stack([
            flatten_shift_x, flatten_shift_y, flatten_shift_x, flatten_shift_y
        ],
                         axis=1)
        # flatten_shift_x = F.add_axis(broad_shift_x.reshape(-1), 1)
        # flatten_shift_y = F.add_axis(broad_shift_y.reshape(-1), 1)

        # shifts = F.concat(
        #     [flatten_shift_x, flatten_shift_y, flatten_shift_x, flatten_shift_y,],
        #     axis=1)
        return shifts
Exemplo n.º 6
0
    def generate_anchors_opr(self,
                             fm_3x3,
                             fm_stride,
                             anchor_scales=(8, 16, 32, 64, 128),
                             anchor_ratios=(1, 2, 3),
                             base_size=4):

        np_anchors = generate_anchors(base_size=base_size,
                                      ratios=np.array(anchor_ratios),
                                      scales=np.array(anchor_scales))
        device = fm_3x3.device
        anchors = mge.tensor(np_anchors).to(device)
        height, width = fm_3x3.shape[2], fm_3x3.shape[3]
        shift_x = F.linspace(0, width - 1, width).to(device) * fm_stride
        shift_y = F.linspace(0, height - 1, height).to(device) * fm_stride

        broad_shift_x = F.broadcast_to(shift_x.reshape(1, -1),
                                       (height, width)).flatten()
        broad_shift_y = F.broadcast_to(shift_y.reshape(-1, 1),
                                       (height, width)).flatten()
        shifts = F.stack(
            [broad_shift_x, broad_shift_y, broad_shift_x, broad_shift_y],
            axis=1)

        c = anchors.shape[1]
        all_anchors = F.expand_dims(anchors, axis=0) + F.expand_dims(shifts,
                                                                     axis=1)
        all_anchors = all_anchors.reshape(-1, c).detach()
        return all_anchors
Exemplo n.º 7
0
    def anchor_iou_target_opr(self, boxes, im_info, all_anchors,
                              rpn_bbox_offsets):

        n = rpn_bbox_offsets.shape[0]
        res = []
        for i in range(n):

            gtboxes = boxes[i, :im_info[i, 5].astype(np.int32)]
            offsets = rpn_bbox_offsets[i].reshape(-1, 4).detach()
            m = offsets.shape[0]
            an, ac = all_anchors.shape[0], all_anchors.shape[1]
            anchors = F.broadcast_to(F.expand_dims(all_anchors, 1),
                                     (an, 2, ac)).reshape(-1, ac)
            dtboxes = bbox_transform_inv_opr(anchors[:, :4], offsets[:, :4])
            overlaps = box_overlap_opr(dtboxes, gtboxes[:, :4])
            ignore_mask = 1 - F.equal(
                gtboxes[:, 4], config.anchor_ignore_label).astype(np.float32)
            ignore_mask = F.expand_dims(ignore_mask, axis=0)
            overlaps = overlaps * ignore_mask
            overlaps = overlaps.reshape(-1, 2,
                                        overlaps.shape[1]).transpose(1, 0, 2)

            a, b = overlaps[0], overlaps[1]
            index = F.argmax(a, axis=1)
            a = F.nn.indexing_one_hot(a, index, 1)
            b = F.scatter(b, 1, index.reshape(-1, 1), F.zeros([b.shape[0], 1]))
            index = F.argmax(b, axis=1)
            b = F.nn.indexing_one_hot(b, index, 1)
            value = F.expand_dims(F.stack([a, b], axis=1), axis=0)
            res.append(value)

        result = F.concat(res, 0)
        return result
Exemplo n.º 8
0
    def forward(self, pred_cls_list, rpn_num_prob_list, pred_reg_list,
                anchors_list, rpn_iou_list, boxes, im_info):

        all_anchors_list = [
            F.concat([a, i * F.ones([a.shape[0], 1]).to(a.device)], axis=1)
            for i, a in enumerate(anchors_list)
        ]

        all_anchors_final = F.concat(all_anchors_list, axis=0)

        rpn_bbox_offset_final = F.concat(pred_reg_list, axis=1)
        rpn_cls_prob_final = F.concat(pred_cls_list, axis=1)
        rpn_iou_prob_final = F.concat(rpn_iou_list, axis=1)
        rpn_num_per_points_final = F.concat(rpn_num_prob_list, axis=1)

        rpn_labels, rpn_target_boxes = rpn_anchor_target_opr(
            boxes, im_info, all_anchors_final)
        ious_target = self.anchor_iou_target_opr(boxes, im_info,
                                                 all_anchors_final,
                                                 rpn_bbox_offset_final)

        n = rpn_labels.shape[0]
        target_boxes = rpn_target_boxes.reshape(n, -1, 4)
        rpn_cls_prob_final = rpn_cls_prob_final.reshape(n, -1, 1)
        offsets_final = rpn_bbox_offset_final.reshape(n, -1, 4)

        rpn_labels = rpn_labels.transpose(2, 0, 1)
        a, b = rpn_labels[0], rpn_labels[1]

        ignores = b - F.equal(a, 0).astype(np.float32) * F.equal(b, 0).astype(
            np.float32)
        labels = F.stack([a, ignores], axis=2).reshape(n, -1)
        cls_loss = sigmoid_cross_entropy_retina(rpn_cls_prob_final,
                                                labels,
                                                alpha=config.focal_loss_alpha,
                                                gamma=config.focal_loss_gamma)
        rpn_bbox_loss = smooth_l1_loss_retina(offsets_final, target_boxes,
                                              labels)

        rpn_labels = labels.reshape(n, -1, 2)
        rpn_iou_loss = iou_l1_loss(rpn_iou_prob_final, ious_target, rpn_labels)

        # whether one anchor produce one proposal or two.
        nlabels = ((labels.reshape(n, -1, 2) > 0).sum(2)).flatten() - 1
        c = rpn_num_per_points_final.shape[2]
        num_per_anchor = rpn_num_per_points_final.reshape(-1, c)

        rpn_num_per_points_final = rpn_num_per_points_final.reshape(-1, c)
        nlabels = nlabels.reshape(-1)
        rpn_num_loss = softmax_loss(rpn_num_per_points_final, nlabels)

        loss_dict = {}
        loss_dict['rpn_cls_loss'] = cls_loss
        loss_dict['rpn_bbox_loss'] = 2 * rpn_bbox_loss
        loss_dict['rpn_iou_loss'] = 2 * rpn_iou_loss
        loss_dict['rpn_num_loss'] = rpn_num_loss
        return loss_dict
Exemplo n.º 9
0
def mesh_grid(B, H, W):
    # mesh grid
    x_base = F.arange(0, W)
    x_base = F.tile(x_base, (B, H, 1))

    y_base = F.arange(0, H)  # BHW
    y_base = F.tile(y_base, (B, W, 1)).transpose(0, 2, 1)

    base_grid = F.stack([x_base, y_base], 1)  # B2HW
    return base_grid
Exemplo n.º 10
0
def _compute_pos_area(gtboxes, ratio=0.3):

    H, W = gtboxes[:, 3] - gtboxes[:, 1], gtboxes[:, 2] - gtboxes[:, 0]
    centres = _compute_center(gtboxes)
    l = centres[:, 0] - ratio * W
    r = centres[:, 0] + ratio * W
    t = centres[:, 1] - ratio * H
    b = centres[:, 1] + ratio * H
    boundary = F.stack([l, t, r, b], axis=1)
    return boundary
Exemplo n.º 11
0
    def compute_gemini_loss(self, prob, bbox_targets, labels):

        c = prob.shape[1]
        prob = prob.reshape(-1, 2, c).transpose(1, 0, 2)
        a, b = prob[0], prob[1]
        loss0 = self.compute_emd_loss(a, b, bbox_targets, labels)
        loss1 = self.compute_emd_loss(b, a, bbox_targets, labels)
        loss = F.stack([loss0, loss1], axis=1)
        vlabel = (labels > -1).reshape(-1, 2).sum(axis=1) > 1
        emd_loss = loss.min(axis=1).sum() / F.maximum(vlabel.sum(), 1)
        return emd_loss
Exemplo n.º 12
0
    def compute_gemini_loss_opr(self, prob, bbox_targets, labels):

        prob = prob.reshape(prob.shape[0], 2, -1)
        n, _, c = prob.shape
        prob = prob.transpose(1, 0, 2)
        a, b = prob[0], prob[1]
        loss0 = self.compute_emd_loss_opr(a, b, bbox_targets, labels)
        loss1 = self.compute_emd_loss_opr(b, a, bbox_targets, labels)
        loss = F.stack([loss0, loss1], dim=1)
        emd_loss = loss.min(axis=1)[0].sum() / F.maximum(loss.shape[0], 1)
        loss = {'rcnn_emd_loss': emd_loss}
        return loss
Exemplo n.º 13
0
 def generate_anchors_by_features(self, sizes, device):
     all_anchors = []
     assert len(sizes) == self.num_features, (
         "input features expected {}, got {}".format(self.num_features, len(sizes))
     )
     for size, stride, base_anchor in zip(sizes, self.strides, self.base_anchors):
         grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device)
         grids = F.stack([grid_x, grid_y, grid_x, grid_y], axis=1)
         all_anchors.append(
             (F.expand_dims(grids, axis=1) + F.expand_dims(base_anchor, axis=0)).reshape(-1, 4)
         )
     return all_anchors
Exemplo n.º 14
0
def mesh_grid_mge(B, H, W):
    # mesh grid
    x_base = F.arange(0, W)
    x_base = F.tile(x_base, (B, H, 1))

    y_base = F.arange(0, H)  # BHW
    y_base = F.tile(y_base, (B, W, 1)).transpose(0, 2, 1)

    ones = F.ones_like(x_base)

    base_grid = F.stack([x_base, y_base, ones], 1)  # B3HW
    return base_grid
Exemplo n.º 15
0
def train_generator_batch(image, label, *, gm, netG, netloss):
    B, T, _, h, w = image.shape
    biup = get_bilinear(image)
    # np_weight = [0,-1,0,-1,4,-1,0,-1,0]  # (1,1,3,3)
    # conv_weight = mge.tensor(np.array(np_weight).astype(np.float32)).reshape(1,1,3,3)
    # HR_mask = F.mean(label, axis=2, keepdims=False) # [B,T,H,W]       对T是做depthwise
    # HR_mask = HR_mask.reshape(B*T, 1, 4*h, 4*w)
    # HR_mask = F.conv2d(HR_mask, conv_weight, padding=1) #
    # HR_mask = (F.abs(HR_mask) > 0.1).astype("float32") # [B*T, 1, H, W]
    # HR_mask = HR_mask.reshape(B, T, 1, 4*h, 4*w)
    # HR_mask = 1 + HR_mask * 0.1
    HR_mask = 1
    netG.train()
    with gm:
        forward_hiddens = []
        backward_hiddens = []
        res = []
        # 对所有的image提取特征
        image = image.reshape(B * T, 3, h, w)
        image = netG.rgb(image).reshape(B, T, -1, h, w)
        # T=0
        now_frame = image[:, 0, ...]
        hidden = now_frame
        forward_hiddens.append(now_frame)
        for i in range(1, T):
            now_frame = image[:, i, ...]
            hidden = netG.aggr(F.concat([hidden, now_frame], axis=1))
            forward_hiddens.append(hidden)
        # T=-1
        now_frame = image[:, T - 1, ...]
        hidden = now_frame
        backward_hiddens.append(now_frame)
        for i in range(T - 2, -1, -1):
            now_frame = image[:, i, ...]
            hidden = netG.aggr(F.concat([hidden, now_frame], axis=1))
            backward_hiddens.append(hidden)
        # do upsample for all frames
        for i in range(T):
            res.append(
                netG.upsample(
                    F.concat([forward_hiddens[i], backward_hiddens[T - i - 1]],
                             axis=1)))

        res = F.stack(res, axis=1)  # [B,T,3,H,W]
        res = res + biup
        loss = netloss(res, label, HR_mask)
        # 加上edge损失
        # 探测label的edge map
        gm.backward(loss)
        if dist.is_distributed():
            loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size()
    return loss
Exemplo n.º 16
0
    def encode(self, bbox: Tensor, gt: Tensor) -> Tensor:
        bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y = self._box_ltrb_to_cs_opr(bbox)
        gt_width, gt_height, gt_ctr_x, gt_ctr_y = self._box_ltrb_to_cs_opr(gt)

        target_dx = (gt_ctr_x - bbox_ctr_x) / bbox_width
        target_dy = (gt_ctr_y - bbox_ctr_y) / bbox_height
        target_dw = F.log(gt_width / bbox_width)
        target_dh = F.log(gt_height / bbox_height)
        target = F.stack([target_dx, target_dy, target_dw, target_dh], axis=1)

        target -= self.reg_mean
        target /= self.reg_std
        return target
Exemplo n.º 17
0
    def get_ground_truth(self, anchors, batched_gt_boxes, batched_num_gts):
        labels_list = []
        offsets_list = []

        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            overlaps = layers.get_iou(gt_boxes[:, :4], anchors)
            match_indices, labels = self.matcher(overlaps)
            gt_boxes_matched = gt_boxes[match_indices]

            fg_mask = labels == 1
            labels[fg_mask] = gt_boxes_matched[fg_mask, 4].astype(np.int32)
            offsets = self.box_coder.encode(anchors, gt_boxes_matched[:, :4])

            labels_list.append(labels)
            offsets_list.append(offsets)

        return (
            F.stack(labels_list, axis=0).detach(),
            F.stack(offsets_list, axis=0).detach(),
        )
Exemplo n.º 18
0
 def generate_anchors_by_features(self, sizes, device):
     all_anchors = []
     assert len(sizes) == self.num_features, (
         "input features expected {}, got {}".format(self.num_features, len(sizes))
     )
     for size, stride in zip(sizes, self.strides):
         grid_x, grid_y = create_anchor_grid(size, self.offset, stride, device)
         grids = F.stack([grid_x, grid_y], axis=1)
         all_anchors.append(
             F.broadcast_to(
                 F.expand_dims(grids, axis=1), (grids.shape[0], self.num_anchors, 2)
             ).reshape(-1, 2)
         )  # FIXME: need F.repeat
     return all_anchors
Exemplo n.º 19
0
    def compute_emd_loss(self, a, b, bbox_targets, labels):

        c = a.shape[1]
        prob = F.stack([a, b], axis = 1).reshape(-1, c)
        pred_bbox, cls_scores = prob[:,:-self.n], prob[:,-self.n:]
        n, c = bbox_targets.shape[0], bbox_targets.shape[1]
        bbox_targets, labels = bbox_targets.reshape(-1, 4), labels.flatten()

        cls_loss = softmax_loss_opr(cls_scores, labels)
        pred_bbox = pred_bbox.reshape(-1, self.n, 4)
        rcnn_bbox_loss = smooth_l1_loss_rcnn_opr(pred_bbox, bbox_targets, labels,
            config.rcnn_smooth_l1_beta)
        loss = cls_loss + rcnn_bbox_loss
        loss = loss.reshape(-1, 2).sum(axis=1)
        return loss
Exemplo n.º 20
0
    def decode_outputs(self, outputs):
        grids = []
        strides = []
        for (hsize, wsize), stride in zip(self.hw, self.strides):
            xv, yv = meshgrid(F.arange(hsize), F.arange(wsize))
            grid = F.stack((xv, yv), 2).reshape(1, -1, 2)
            grids.append(grid)
            shape = grid.shape[:2]
            strides.append(F.full((*shape, 1), stride))

        grids = F.concat(grids, axis=1)
        strides = F.concat(strides, axis=1)

        outputs[..., :2] = (outputs[..., :2] + grids) * strides
        outputs[..., 2:4] = F.exp(outputs[..., 2:4]) * strides
        return outputs
Exemplo n.º 21
0
    def compute_emd_loss_opr(self, a, b, bbox_targets, labels):
        
        labels = labels.flatten()
        c = a.shape[1]
        prob = F.stack([a, b], axis=1).reshape(-1, c)
        offsets, cls_score = prob[:, :-self.n], prob[:,-self.n:]

        cls_loss = softmax_loss_opr(cls_score, labels)
        n = offsets.shape[0]
        offsets = offsets.reshape(n, -1, 4)
        bbox_targets = bbox_targets.reshape(-1, 4)
        reg_loss = smooth_l1_loss_rcnn_opr(offsets, bbox_targets,
            labels, sigma = config.rcnn_smooth_l1_beta)

        vlabel = 1 - ((labels < 0).reshape(-1, 2).sum(axis=1) > 1)
        loss = (cls_loss + 1 * reg_loss).reshape(-1, 2).sum(axis=1) * vlabel
        return loss
Exemplo n.º 22
0
def bbox_transform_opr(bbox, gt):
    """ Transform the bounding box and ground truth to the loss targets.
    The 4 box coordinates are in axis 1"""
    bbox_width = bbox[:, 2] - bbox[:, 0] + 1
    bbox_height = bbox[:, 3] - bbox[:, 1] + 1
    bbox_ctr_x = bbox[:, 0] + 0.5 * bbox_width
    bbox_ctr_y = bbox[:, 1] + 0.5 * bbox_height

    gt_width = gt[:, 2] - gt[:, 0] + 1
    gt_height = gt[:, 3] - gt[:, 1] + 1
    gt_ctr_x = gt[:, 0] + 0.5 * gt_width
    gt_ctr_y = gt[:, 1] + 0.5 * gt_height

    target_dx = (gt_ctr_x - bbox_ctr_x) / bbox_width
    target_dy = (gt_ctr_y - bbox_ctr_y) / bbox_height
    target_dw = F.log(gt_width / bbox_width)
    target_dh = F.log(gt_height / bbox_height)
    target = F.stack([target_dx, target_dy, target_dw, target_dh], axis=1)
    return target
Exemplo n.º 23
0
    def forward(self, fpn_fms, rcnn_rois, labels=None, bbox_targets=None):
        # stride: 64,32,16,8,4 -> 4, 8, 16, 32
        fpn_fms = fpn_fms[1:]
        fpn_fms.reverse()
        stride = [4, 8, 16, 32]
        poo5, rcnn_rois, labels, bbox_targets = roi_pool(
            fpn_fms, rcnn_rois, stride, (7, 7), 'roi_align', labels,
            bbox_targets)
        poo5 = F.flatten(poo5, start_axis=1)
        fc1 = F.relu(self.fc1(poo5))
        fc2 = F.relu(self.fc2(fc1))

        a = self.a(fc2)
        b = self.b(fc2)
        prob = F.stack([a, b], axis=1).reshape(-1, a.shape[1])

        if self.refinement:
            final_prob = self.refinement_module(prob, fc2)

        if self.training:

            emd_loss = self.compute_gemini_loss(prob, bbox_targets, labels)
            loss_dict = {}
            loss_dict['loss_rcnn_emd'] = emd_loss
            if self.refinement_module:
                final_emd_loss = self.compute_gemini_loss(
                    final_prob, bbox_targets, labels)
                loss_dict['final_rcnn_emd'] = final_emd_loss
            return loss_dict
        else:

            offsets, cls_scores = prob[:, :-self.n], prob[:, -self.n:]
            pred_bbox = offsets.reshape(-1, self.n, 4)
            cls_prob = F.softmax(cls_scores, axis=1)
            n = rcnn_rois.shape[0]
            rois = F.broadcast_to(F.expand_dims(rcnn_rois[:, 1:5], axis=1),
                                  (n, 2, 4)).reshape(-1, 4)
            normalized = config.rcnn_bbox_normalize_targets
            pred_boxes = restore_bbox(rois, pred_bbox, normalized, config)
            pred_bbox = F.concat(
                [pred_boxes, F.expand_dims(cls_prob, axis=2)], axis=2)
            return pred_bbox
Exemplo n.º 24
0
    def refinement_module(self, prob, fc2):
        
        m = prob.reshape(-1, 5*self.n)
        offsets, scores = m[:, :-self.n], m[:, -self.n:]
        n = offsets.shape[0]
        offsets = offsets.reshape(-1, self.n, 4)
        cls_scores = F.expand_dims(F.softmax(scores, axis=1), axis=2)
        pred_boxes = F.concat([offsets, cls_scores], axis=2)[:, 1]
        n, c = pred_boxes.shape
        pred_boxes = F.broadcast_to(F.expand_dims(pred_boxes, axis=1), (n, 6, c)).reshape(n,-1)

        n, c = fc2.shape
        fc3 = F.broadcast_to(F.expand_dims(fc2, axis=1), (n, 2, c)).reshape(-1, c)
        fc3 = F.concat([fc3, pred_boxes], axis=1)
        fc3 = self.relu(self.fc3(fc3))
        fc3 = fc3.reshape(n, 2, -1).transpose(1, 0, 2)

        a = self.q(fc3[0])
        b = self.r(fc3[1])
        prob = F.stack([a, b], axis=1).reshape(-1, 10*self.n)
        return prob
Exemplo n.º 25
0
    def get_output_and_grid(self, output, k, stride, dtype):
        grid = self.grids[k]

        batch_size = output.shape[0]
        n_ch = 5 + self.num_classes
        hsize, wsize = output.shape[-2:]
        if grid.shape[2:4] != output.shape[2:4]:
            yv, xv = meshgrid([F.arange(hsize), F.arange(wsize)])
            grid = F.stack((xv, yv), 2).reshape(1, 1, hsize, wsize,
                                                2).type(dtype)
            self.grids[k] = grid

        output = output.view(batch_size, self.n_anchors, n_ch, hsize, wsize)
        output = (output.permute(0, 1, 3, 4,
                                 2).reshape(batch_size,
                                            self.n_anchors * hsize * wsize,
                                            -1))
        grid = grid.view(1, -1, 2)
        output[..., :2] = (output[..., :2] + grid) * stride
        output[..., 2:4] = F.exp(output[..., 2:4]) * stride
        return output, grid
Exemplo n.º 26
0
    def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor:
        deltas *= self.reg_std
        deltas += self.reg_mean

        (
            anchor_width,
            anchor_height,
            anchor_ctr_x,
            anchor_ctr_y,
        ) = self._box_ltrb_to_cs_opr(anchors, 1)
        pred_ctr_x = anchor_ctr_x + deltas[:, 0::4] * anchor_width
        pred_ctr_y = anchor_ctr_y + deltas[:, 1::4] * anchor_height
        pred_width = anchor_width * F.exp(deltas[:, 2::4])
        pred_height = anchor_height * F.exp(deltas[:, 3::4])

        pred_x1 = pred_ctr_x - 0.5 * pred_width
        pred_y1 = pred_ctr_y - 0.5 * pred_height
        pred_x2 = pred_ctr_x + 0.5 * pred_width
        pred_y2 = pred_ctr_y + 0.5 * pred_height

        pred_box = F.stack([pred_x1, pred_y1, pred_x2, pred_y2], axis=2)
        pred_box = pred_box.reshape(pred_box.shape[0], -1)

        return pred_box
Exemplo n.º 27
0
def test_generator_batch(image, *, netG):
    B, T, _, h, w = image.shape
    biup = get_bilinear(image)
    netG.eval()
    forward_hiddens = []
    backward_hiddens = []
    res = []
    # 对所有的image提取特征
    image = image.reshape(B * T, 3, h, w)
    image = netG.rgb(image).reshape(B, T, -1, h, w)
    # T=0
    now_frame = image[:, 0, ...]
    hidden = now_frame
    forward_hiddens.append(now_frame)
    for i in tqdm(range(1, T)):
        now_frame = image[:, i, ...]
        hidden = netG.aggr(F.concat([hidden, now_frame], axis=1))
        forward_hiddens.append(hidden)
    # T=-1
    now_frame = image[:, T - 1, ...]
    hidden = now_frame
    backward_hiddens.append(now_frame)
    for i in tqdm(range(T - 2, -1, -1)):
        now_frame = image[:, i, ...]
        hidden = netG.aggr(F.concat([hidden, now_frame], axis=1))
        backward_hiddens.append(hidden)
    # do upsample for all frames
    for i in tqdm(range(T)):
        res.append(
            netG.upsample(
                F.concat([forward_hiddens[i], backward_hiddens[T - i - 1]],
                         axis=1)))

    res = F.stack(res, axis=1)  # [B,T,3,H,W]
    res = res + biup
    return res
Exemplo n.º 28
0
    def get_ground_truth(self, anchors_list, batched_gt_boxes,
                         batched_num_gts):
        labels_list = []
        offsets_list = []
        ctrness_list = []

        all_level_anchors = F.concat(anchors_list, axis=0)
        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            offsets = self.point_coder.encode(
                all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1))

            object_sizes_of_interest = F.concat([
                F.broadcast_to(
                    F.expand_dims(mge.tensor(size, dtype=np.float32), axis=0),
                    (anchors_i.shape[0], 2)) for anchors_i, size in zip(
                        anchors_list, self.cfg.object_sizes_of_interest)
            ],
                                                axis=0)
            max_offsets = F.max(offsets, axis=2)
            is_cared_in_the_level = (
                (max_offsets >= F.expand_dims(object_sizes_of_interest[:, 0],
                                              axis=0))
                & (max_offsets <= F.expand_dims(object_sizes_of_interest[:, 1],
                                                axis=0)))

            if self.cfg.center_sampling_radius > 0:
                gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2
                is_in_boxes = []
                for stride, anchors_i in zip(self.cfg.stride, anchors_list):
                    radius = stride * self.cfg.center_sampling_radius
                    center_boxes = F.concat([
                        F.maximum(gt_centers - radius, gt_boxes[:, :2]),
                        F.minimum(gt_centers + radius, gt_boxes[:, 2:4]),
                    ],
                                            axis=1)
                    center_offsets = self.point_coder.encode(
                        anchors_i, F.expand_dims(center_boxes, axis=1))
                    is_in_boxes.append(F.min(center_offsets, axis=2) > 0)
                is_in_boxes = F.concat(is_in_boxes, axis=1)
            else:
                is_in_boxes = F.min(offsets, axis=2) > 0

            gt_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * (gt_boxes[:, 3] -
                                                           gt_boxes[:, 1])
            # FIXME: use repeat instead of broadcast_to
            areas = F.broadcast_to(F.expand_dims(gt_area, axis=1),
                                   offsets.shape[:2])
            areas[~is_cared_in_the_level] = float("inf")
            areas[~is_in_boxes] = float("inf")

            match_indices = F.argmin(areas, axis=0)
            gt_boxes_matched = gt_boxes[match_indices]
            anchor_min_area = F.indexing_one_hot(areas, match_indices, axis=0)

            labels = gt_boxes_matched[:, 4].astype(np.int32)
            labels[anchor_min_area == float("inf")] = 0
            offsets = self.point_coder.encode(all_level_anchors,
                                              gt_boxes_matched[:, :4])

            left_right = offsets[:, [0, 2]]
            top_bottom = offsets[:, [1, 3]]
            ctrness = F.sqrt(
                F.maximum(
                    F.min(left_right, axis=1) / F.max(left_right, axis=1), 0) *
                F.maximum(
                    F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), 0))

            labels_list.append(labels)
            offsets_list.append(offsets)
            ctrness_list.append(ctrness)

        return (
            F.stack(labels_list, axis=0).detach(),
            F.stack(offsets_list, axis=0).detach(),
            F.stack(ctrness_list, axis=0).detach(),
        )
Exemplo n.º 29
0
 def run(data1, data2):
     return F.stack([data1, data2], axis=ai)
Exemplo n.º 30
0
    def get_ground_truth(self, anchors_list, batched_gt_boxes,
                         batched_num_gts):
        labels_list = []
        offsets_list = []
        ctrness_list = []

        all_level_anchors = F.concat(anchors_list, axis=0)
        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            ious = []
            candidate_idxs = []
            base = 0
            for stride, anchors_i in zip(self.cfg.stride, anchors_list):
                ious.append(
                    layers.get_iou(
                        gt_boxes[:, :4],
                        F.concat([
                            anchors_i - stride * self.cfg.anchor_scale / 2,
                            anchors_i + stride * self.cfg.anchor_scale / 2,
                        ],
                                 axis=1)))
                gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2
                distances = F.sqrt(
                    F.sum((F.expand_dims(gt_centers, axis=1) - anchors_i)**2,
                          axis=2))
                _, topk_idxs = F.topk(distances, self.cfg.anchor_topk)
                candidate_idxs.append(base + topk_idxs)
                base += anchors_i.shape[0]
            ious = F.concat(ious, axis=1)
            candidate_idxs = F.concat(candidate_idxs, axis=1)

            candidate_ious = F.gather(ious, 1, candidate_idxs)
            ious_thr = (F.mean(candidate_ious, axis=1, keepdims=True) +
                        F.std(candidate_ious, axis=1, keepdims=True))
            is_foreground = F.scatter(
                F.zeros(ious.shape), 1, candidate_idxs,
                F.ones(candidate_idxs.shape)).astype(bool) & (ious >= ious_thr)

            is_in_boxes = F.min(self.point_coder.encode(
                all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)),
                                axis=2) > 0

            ious[~is_foreground] = -1
            ious[~is_in_boxes] = -1

            match_indices = F.argmax(ious, axis=0)
            gt_boxes_matched = gt_boxes[match_indices]
            anchor_max_iou = F.indexing_one_hot(ious, match_indices, axis=0)

            labels = gt_boxes_matched[:, 4].astype(np.int32)
            labels[anchor_max_iou == -1] = 0
            offsets = self.point_coder.encode(all_level_anchors,
                                              gt_boxes_matched[:, :4])

            left_right = offsets[:, [0, 2]]
            top_bottom = offsets[:, [1, 3]]
            ctrness = F.sqrt(
                F.clip(F.min(left_right, axis=1) / F.max(left_right, axis=1),
                       lower=0) *
                F.clip(F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1),
                       lower=0))

            labels_list.append(labels)
            offsets_list.append(offsets)
            ctrness_list.append(ctrness)

        return (
            F.stack(labels_list, axis=0).detach(),
            F.stack(offsets_list, axis=0).detach(),
            F.stack(ctrness_list, axis=0).detach(),
        )