def forward(self, input, gt_boxes_list, anchors_np, rpn_targets=None):

        anchors = torch.from_numpy(anchors_np).cuda()
        endpoints = self.backbone(input)

        P6 = self.conv6(endpoints['C5'])
        P7 = self.conv7(self.relu6(P6))
        Ps = self.pyramid(endpoints)
        Ps.append(P6)
        Ps.append(P7)

        rpn_outs = []
        for i, f in enumerate(Ps):
            rpn_outs.append(self.rpn(f))
        rpn_logit, rpn_box = self._rerange(rpn_outs)
        rpn_prob = F.sigmoid(
            rpn_logit) if self.rpn_activation == 'sigmoid' else F.softmax(
                rpn_logit, dim=-1)
        rpn_prob.detach()

        if self.is_training:
            if rpn_targets is None:
                rpn_targets = compute_rpn_targets_in_batch(
                    gt_boxes_list, anchors_np)
                rpn_labels, _, rpn_bbtargets, rpn_bbwghts = everything2cuda(
                    rpn_targets)
            else:
                rpn_labels, rpn_bbtargets, rpn_bbwghts = rpn_targets
        else:
            rpn_labels = rpn_bbtargets = rpn_bbwghts = None

        return rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts
Exemplo n.º 2
0
    def compute_rcnn_targets(self, rois, img_ids, gt_boxes_list):
        gt_img_inds = [
            np.zeros((gt.shape[0], ), dtype=np.int64) + i
            for i, gt in enumerate(gt_boxes_list)
        ]
        gt_img_inds = np.concatenate(gt_img_inds, axis=0)
        gt_boxes = np.concatenate(gt_boxes_list, axis=0).astype(np.float32)
        np.set_printoptions(precision=0, suppress=True)

        if gt_boxes.size > 0:
            gt_boxes = everything2cuda(gt_boxes).view(-1, 5)
            gt_img_inds = everything2cuda(gt_img_inds).view(-1)
            rcnn_labels, rcnn_bbtargets, rcnn_bbwgts = self.roi_target(
                rois, img_ids, gt_boxes, gt_img_inds)
        else:
            num_rois = rois.size(0)
            rcnn_labels = torch.LongTensor(num_rois).zero_().cuda()
            rcnn_bbtargets = torch.FloatTensor(num_rois, 4).zero_().cuda()
            rcnn_bbwgts = torch.FloatTensor(num_rois, 4).zero_().cuda()

        rcnn_bbwghts = rcnn_bbwgts.view(-1, 4)
        rcnn_bbox_targets = rcnn_bbtargets.view(-1, 4)
        return rcnn_labels, rcnn_bbox_targets, rcnn_bbwghts
Exemplo n.º 3
0
global_step = 0
timer = Timer()

for ep in range(start_epoch, cfg.max_epoch):
    if ep in cfg.lr_decay_epoches and cfg.solver == 'SGD':
        lr *= cfg.lr_decay
        adjust_learning_rate(optimizer, lr)
        print('adjusting learning rate {:.6f}'.format(lr))

    for step, batch in enumerate(train_data):
        timer.tic()

        input, anchors_np, im_scale_list, image_ids, gt_boxes_list, rpn_targets, _, _ = batch
        #
        gt_boxes_list = ScatterList(gt_boxes_list)
        input = everything2cuda(input)
        rpn_targets = everything2cuda(rpn_targets)
        #
        outs = model(input, gt_boxes_list, anchors_np, rpn_targets=rpn_targets)

        if cfg.model_type == 'maskrcnn':
            rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts, anchors, \
            rois, roi_img_ids, rcnn_logit, rcnn_box, rcnn_prob, rcnn_labels, rcnn_bbtargets, rcnn_bbwghts = outs
            #
            outputs = []
            #
            targets = []
        elif cfg.model_type == 'retinanet':
            # Thinking like this: single-stage detector take rpn results as final results
            rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts = outs
            #
Exemplo n.º 4
0
def main():
    # config model and lr
    num_anchors = len(cfg.anchor_ratios) * len(cfg.anchor_scales[0]) * len(cfg.anchor_shift) \
        if isinstance(cfg.anchor_scales[0], list) else \
        len(cfg.anchor_ratios) * len(cfg.anchor_scales)

    resnet = resnet50 if cfg.backbone == 'resnet50' else resnet101
    detection_model = MaskRCNN if cfg.model_type.lower(
    ) == 'maskrcnn' else RetinaNet

    model = detection_model(resnet(pretrained=True, maxpool5=cfg.maxpool5),
                            num_classes=cfg.num_classes,
                            num_anchors=num_anchors,
                            strides=cfg.strides,
                            in_channels=cfg.in_channels,
                            f_keys=cfg.f_keys,
                            num_channels=256,
                            is_training=False,
                            activation=cfg.class_activation)

    lr = cfg.lr
    start_epoch = 0
    if cfg.restore is not None:
        meta = load_net(cfg.restore, model)
        print(meta)
        if meta[0] >= 0:
            start_epoch = meta[0] + 1
            lr = meta[1]
        print('Restored from %s, starting from %d epoch, lr:%.6f' %
              (cfg.restore, start_epoch, lr))
    else:
        raise ValueError('restore is not set')

    model.cuda()
    model.eval()

    class_names = test_data.dataset.classes
    print('dataset len: {}'.format(len(test_data.dataset)))

    tb_dir = os.path.join(cfg.train_dir, cfg.backbone + '_' + cfg.datasetname,
                          'test', time.strftime("%h%d_%H"))
    writer = tbx.FileWriter(tb_dir)

    # main loop
    timer_all = Timer()
    timer_post = Timer()
    all_results1 = []
    all_results2 = []
    all_results_gt = []
    for step, batch in enumerate(test_data):

        timer_all.tic()

        # NOTE: Targets is in NHWC order!!
        # input, anchors_np, im_scale_list, image_ids, gt_boxes_list = batch
        # input = everything2cuda(input)
        input_t, anchors_np, im_scale_list, image_ids, gt_boxes_list = batch
        input = everything2cuda(input_t, volatile=True)

        outs = model(input, gt_boxes_list=None, anchors_np=anchors_np)

        if cfg.model_type == 'maskrcnn':
            rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts, anchors, \
            rois, roi_img_ids, rcnn_logit, rcnn_box, rcnn_prob, rcnn_labels, rcnn_bbtargets, rcnn_bbwghts = outs
            outputs = [
                rois, roi_img_ids, rpn_logit, rpn_box, rpn_prob, rcnn_logit,
                rcnn_box, rcnn_prob, anchors
            ]
            targets = []
        elif cfg.model_type == 'retinanet':
            rpn_logit, rpn_box, rpn_prob, _, _, _ = outs
            outputs = [rpn_logit, rpn_box, rpn_prob]
        else:
            raise ValueError('Unknown model type: %s' % cfg.model_type)

        timer_post.tic()

        dets_dict = model.get_final_results(
            outputs,
            everything2cuda(anchors_np),
            score_threshold=0.01,
            max_dets=cfg.max_det_num * cfg.batch_size,
            overlap_threshold=cfg.overlap_threshold)
        if 'stage1' in dets_dict:
            Dets = dets_dict['stage1']
        else:
            raise ValueError('No stage1 results:', dets_dict.keys())
        Dets2 = dets_dict['stage2'] if 'stage2' in dets_dict else Dets

        t3 = timer_post.toc()
        t = timer_all.toc()

        formal_res1 = dataset.to_detection_format(copy.deepcopy(Dets),
                                                  image_ids, im_scale_list)
        formal_res2 = dataset.to_detection_format(copy.deepcopy(Dets2),
                                                  image_ids, im_scale_list)
        all_results1 += formal_res1
        all_results2 += formal_res2

        Dets_gt = []
        for gb in gt_boxes_list:
            cpy_mask = gb[:, 4] >= 1
            gb = gb[cpy_mask]
            n = cpy_mask.astype(np.int32).sum()
            res_gt = np.zeros((n, 6))
            res_gt[:, :4] = gb[:, :4]
            res_gt[:, 4] = 1.
            res_gt[:, 5] = gb[:, 4]
            Dets_gt.append(res_gt)
        formal_res_gt = dataset.to_detection_format(Dets_gt, image_ids,
                                                    im_scale_list)
        all_results_gt += formal_res_gt

        if step % cfg.log_image == 0:
            input_np = everything2numpy(input)
            summary_out = []
            Is = single_shot.draw_detection(input_np,
                                            Dets,
                                            class_names=class_names)
            Is = Is.astype(np.uint8)
            summary_out += log_images(Is, image_ids, step, prefix='Detection/')

            Is = single_shot.draw_detection(input_np,
                                            Dets2,
                                            class_names=class_names)
            Is = Is.astype(np.uint8)
            summary_out += log_images(Is,
                                      image_ids,
                                      step,
                                      prefix='Detection2/')

            Imgs = single_shot.draw_gtboxes(input_np,
                                            gt_boxes_list,
                                            class_names=class_names)
            Imgs = Imgs.astype(np.uint8)
            summary_out += log_images(Imgs,
                                      image_ids,
                                      float(step),
                                      prefix='GT')

            for s in summary_out:
                writer.add_summary(s, float(step))

        if step % cfg.display == 0:
            print(time.strftime("%H:%M:%S ") +
                  'Epoch %d iter %d: speed %.3fs (%.3fs)' % (0, step, t, t3) +
                  ' ImageIds: ' + ', '.join(str(s) for s in image_ids),
                  end='\r')

    res_dict = {
        'stage1': all_results1,
        'stage2': all_results2,
        'gt': all_results_gt
    }
    return res_dict
Exemplo n.º 5
0
    def forward(self, input, gt_boxes_list, anchors_np, rpn_targets=None):

        batch_size = input.size(0)
        anchors = torch.from_numpy(anchors_np).cuda()
        endpoints = self.backbone(input)

        Ps = self.pyramid(endpoints)
        rpn_outs = []
        for i, f in enumerate(Ps):
            rpn_outs.append(self.rpn(f))

        rpn_logit, rpn_box = self._rerange(rpn_outs, last_dimension=2)
        rpn_prob = F.sigmoid(
            rpn_logit) if self.rpn_activation == 'sigmoid' else F.softmax(
                rpn_logit, dim=-1)
        rpn_prob.detach()

        if self.is_training:
            assert input.size(0) == len(gt_boxes_list), '%d vs %d' % (
                input.size(0), len(gt_boxes_list))
            if rpn_targets is None:
                rpn_targets = compute_rpn_targets_in_batch(
                    gt_boxes_list, anchors_np)
                rpn_labels, _, rpn_bbtargets, rpn_bbwghts = everything2cuda(
                    rpn_targets)
                # rpn_labels, rpn_bbtargets, rpn_bbwghts = self.compute_anchor_targets(anchors, gt_boxes_list)
            else:
                rpn_labels, rpn_bbtargets, rpn_bbwghts = rpn_targets

            rois, probs, roi_img_ids = self._stage_one_results(
                rpn_box,
                rpn_prob,
                anchors,
                top_n=20000 * batch_size,
                overlap_threshold=0.7,
                top_n_post_nms=2000)
            rois, roi_labels, roi_img_ids = sample_rois(
                rois, roi_img_ids, gt_boxes_list)
        else:
            rpn_labels = rpn_bbtargets = rpn_bbwghts = None
            rois, probs, roi_img_ids = self._stage_one_results(
                rpn_box,
                rpn_prob,
                anchors,
                top_n=6000 * batch_size,
                overlap_threshold=0.7)
            rois, probs, roi_img_ids = self._thresholding(
                rois, probs, roi_img_ids, 0.05)

        rcnn_feats = self.pyramid_roi_align(Ps, rois, roi_img_ids)
        rcnn_logit, rcnn_box = self.rcnn(rcnn_feats)
        rcnn_prob = F.sigmoid(
            rcnn_logit) if self.activation == 'sigmoid' else F.softmax(
                rcnn_logit, dim=-1)
        rcnn_prob.detach()

        if self.is_training:
            rcnn_labels, rcnn_bbtargets, rcnn_bbwghts = self.compute_rcnn_targets(
                rois, roi_img_ids, gt_boxes_list)
            assert rcnn_labels.size(0) == rois.size(0) == roi_img_ids.size(0)
        else:
            rcnn_labels = rcnn_bbtargets = rcnn_bbwghts = None

        return rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts, anchors, \
               rois, roi_img_ids, rcnn_logit, rcnn_box, rcnn_prob, rcnn_labels, rcnn_bbtargets, rcnn_bbwghts
Exemplo n.º 6
0
summary_out = []

global_step = 0
timer = Timer()
for ep in range(start_epoch, cfg.max_epoch):
    if ep in cfg.lr_decay_epoches and cfg.solver == 'SGD':
        lr *= cfg.lr_decay
        adjust_learning_rate(optimizer, lr)
        print('adjusting learning rate %.6f' % lr)

    for step, batch in enumerate(train_data):
        timer.tic()

        input, anchors_np, im_scale_list, image_ids, gt_boxes_list, rpn_targets, _, _ = batch
        gt_boxes_list = ScatterList(gt_boxes_list)
        input = everything2cuda(input)
        rpn_targets = everything2cuda(rpn_targets)
        outs = model(input, gt_boxes_list, anchors_np, rpn_targets=rpn_targets)

        if cfg.model_type == 'maskrcnn':
            rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts, anchors, \
            rois, roi_img_ids, rcnn_logit, rcnn_box, rcnn_prob, rcnn_labels, rcnn_bbtargets, rcnn_bbwghts = outs
            outputs = [
                rois, roi_img_ids, rpn_logit, rpn_box, rpn_prob, rcnn_logit,
                rcnn_box, rcnn_prob, anchors
            ]
            targets = [
                rpn_labels, rpn_bbtargets, rpn_bbwghts, rcnn_labels,
                rcnn_bbtargets, rcnn_bbwghts
            ]
        elif cfg.model_type == 'retinanet':
Exemplo n.º 7
0
    def forward(self, input, gt_boxes_list, anchors_np, rpn_targets=None):

        batch_size = input.size(0)
        # torch.from_numpy() :
        # The returned tensor and ndarray share the same memory.
        anchors = torch.from_numpy(anchors_np).cuda()
        endpoints = self.backbone(input)

        # Currently No ZoomNet.

        Ps = self.pyramid(endpoints)
        rpn_outs = []
        # f means "Floor" in Feature Pyramids.
        for i, f in enumerate(Ps):
            rpn_outs.append(self.rpn(f))

        rpn_logit, rpn_box = self._rerange(rpn_outs, last_dimension=2)
        rpn_prob = F.sigmoid(rpn_logit) if self.rpn_activation == 'sigmoid' \
            else F.softmax(rpn_logit, dim=-1)
        # This is different from "rpn_prob.detach()"
        rpn_prob = rpn_prob.detach()

        if self.is_training:
            assert input.size(0) == len(gt_boxes_list), \
                '{:d} vs {:d}'.format(input.size(0), len(gt_boxes_list))
            if rpn_targets is None:
                # TODO: compute_rpn_targets_in_batch()
                rpn_targets = compute_rpn_targets_in_batch(
                    gt_boxes_list, anchors_np)
                rpn_labels, _, rpn_bbtargets, rpn_bbwghts = everything2cuda(
                    rpn_targets)
            else:
                rpn_labels, rpn_bbtargets, rpn_bbwghts = rpn_targets
            # end if-else

            # TODO: _stage_one_results()
            rois, probs, roi_img_ids = self._stage_one_results(rpn_box, rpn_prob, anchors, \
                                                               top_n=20000 * batch_size, \
                                                               overlap_threshold=0.7, \
                                                               top_n_post_nms=2000)
            rois, roi_labels, roi_img_ids = sample_rois(
                rois, roi_img_ids, gt_boxes_list)
        else:
            rpn_labels = rpn_bbtargets = rpn_bbwghts = None
            rois, probs, roi_img_ids = self._stage_one_results(rpn_box, rpn_prob, anchors, \
                                                               top_n=6000 * batch_size, \
                                                               overlap_threshold=0.7)
            rois, probs, roi_img_ids = self._thresholding(
                rois, probs, roi_img_ids, 0.05)
        # end if-else

        # TODO: pyramid_roi_align()
        rcnn_feats = self.pyramid_roi_align(Ps, rois, roi_img_ids)
        rcnn_logit, rcnn_box = self.rcnn(rcnn_feats)
        rcnn_prob = F.sigmoid(
            rcnn_logit) if self.activation == 'sigmoid' else F.softmax(
                rcnn_logit, dim=-1)
        rcnn_prob = rcnn_prob.detach()

        if self.is_training:
            rcnn_labels, rcnn_bbtargets, rcnn_bbwghts = self.compute_rcnn_targets(
            )
            assert rcnn_labels.size(0) == rois.size(0) == roi_img_ids.size(0), \
                'Dimension mismatch.'
        else:
            rcnn_labels = rcnn_bbtargets = rcnn_bbwghts = None

        return rpn_logit, rpn_box, rpn_prob, rpn_labels, rpn_bbtargets, rpn_bbwghts, anchors, \
               rois, roi_img_ids, rcnn_logit, rcnn_box, rcnn_prob, rcnn_labels, rcnn_bbtargets, rcnn_bbwghts
Exemplo n.º 8
0
def main():
    # config model and lr
    num_anchors = len(cfg.anchor_ratios) * len(cfg.anchor_scales[0]) \
        if isinstance(cfg.anchor_scales[0], list) else \
        len(cfg.anchor_ratios) * len(cfg.anchor_scales)

    resnet = resnet50 if cfg.backbone == 'resnet50' else resnet101
    detection_model = MaskRCNN if cfg.model_type.lower(
    ) == 'maskrcnn' else RetinaNet

    model = detection_model(resnet(pretrained=True),
                            num_classes=cfg.num_classes,
                            num_anchors=num_anchors,
                            strides=cfg.strides,
                            in_channels=cfg.in_channels,
                            f_keys=cfg.f_keys,
                            num_channels=256,
                            is_training=False,
                            activation=cfg.class_activation)

    lr = cfg.lr
    start_epoch = 0
    if cfg.restore is not None:
        meta = load_net(cfg.restore, model)
        print(meta)
        if meta[0] >= 0:
            start_epoch = meta[0] + 1
            lr = meta[1]
        print('Restored from %s, starting from %d epoch, lr:%.6f' %
              (cfg.restore, start_epoch, lr))
    else:
        raise ValueError('restore is not set')

    model.cuda()
    model.eval()

    ANCHORS = np.vstack(
        [anc.reshape([-1, 4]) for anc in test_data.dataset.ANCHORS])
    model.anchors = everything2cuda(ANCHORS.astype(np.float32))

    class_names = test_data.dataset.classes
    print('dataset len: {}'.format(len(test_data.dataset)))

    tb_dir = os.path.join(cfg.train_dir, cfg.backbone + '_' + cfg.datasetname,
                          'test', time.strftime("%h%d_%H"))
    writer = tbx.FileWriter(tb_dir)
    summary_out = []

    # main loop
    timer_all = Timer()
    timer_post = Timer()
    all_results1 = []
    all_results2 = []
    all_results_gt = []
    for step, batch in enumerate(test_data):

        timer_all.tic()

        # NOTE: Targets is in NHWC order!!
        input, image_ids, gt_boxes_list, image_ori = batch
        input = everything2cuda(input)

        outs = model(input)

        timer_post.tic()

        dets_dict = model.get_final_results(
            score_threshold=0.05,
            max_dets=cfg.max_det_num * cfg.batch_size,
            overlap_threshold=cfg.overlap_threshold)
        if 'stage1' in dets_dict:
            Dets = dets_dict['stage1']
        else:
            raise ValueError('No stage1 results:', dets_dict.keys())
        Dets2 = dets_dict['stage2'] if 'stage2' in dets_dict else Dets

        t3 = timer_post.toc()
        t = timer_all.toc()

        formal_res1 = dataset.to_detection_format(
            copy.deepcopy(Dets),
            image_ids,
            ori_sizes=[im.shape for im in image_ori])
        formal_res2 = dataset.to_detection_format(
            copy.deepcopy(Dets2),
            image_ids,
            ori_sizes=[im.shape for im in image_ori])
        all_results1 += formal_res1
        all_results2 += formal_res2

        if step % cfg.log_image == 0:
            input_np = everything2numpy(input)
            summary_out = []
            Is = single_shot.draw_detection(input_np,
                                            Dets,
                                            class_names=class_names)
            Is = Is.astype(np.uint8)
            summary_out += log_images(Is, image_ids, step, prefix='Detection/')

            Is = single_shot.draw_detection(input_np,
                                            Dets2,
                                            class_names=class_names)
            Is = Is.astype(np.uint8)
            summary_out += log_images(Is,
                                      image_ids,
                                      step,
                                      prefix='Detection2/')

            Imgs = single_shot.draw_gtboxes(input_np,
                                            gt_boxes_list,
                                            class_names=class_names)
            Imgs = Imgs.astype(np.uint8)
            summary_out += log_images(Imgs,
                                      image_ids,
                                      float(step),
                                      prefix='GT')

            for s in summary_out:
                writer.add_summary(s, float(step))

        if step % cfg.display == 0:
            print(time.strftime("%H:%M:%S ") +
                  'Epoch %d iter %d: speed %.3fs (%.3fs)' % (0, step, t, t3) +
                  ' ImageIds: ' + ', '.join(str(s) for s in image_ids),
                  end='\r')

    res_dict = {
        'stage1': all_results1,
        'stage2': all_results2,
        'gt': all_results_gt
    }
    return res_dict