def forward(self, x):
        with timer.env('backbone'):
            outs = self.backbone(x)

        with timer.env('fpn'):
            outs = [outs[i] for i in cfg.backbone.selected_layers]
            outs = self.fpn(outs)
            '''
            outs:
            (n, 3, 550, 550) -> backbone -> (n, 256, 138, 138) -> fpn -> (n, 256, 69, 69) P3
                                            (n, 512, 69, 69)             (n, 256, 35, 35) P4
                                            (n, 1024, 35, 35)            (n, 256, 18, 18) P5
                                            (n, 2048, 18, 18)            (n, 256, 9, 9)   P6
                                                                         (n, 256, 5, 5)   P7
            '''
        if isinstance(self.anchors, list):
            for i, shape in enumerate([list(aa.shape) for aa in outs]):
                self.anchors += make_anchors(shape[2], shape[3], cfg.scales[i])
            self.anchors = torch.Tensor(self.anchors).view(-1, 4)

        with timer.env('proto'):
            # outs[0]: [2, 256, 69, 69], the feature map from P3
            proto_out = self.proto_net(outs[0])  # proto_out: (n, 32, 138, 138)
            proto_out = F.relu(proto_out, inplace=True)
            proto_out = proto_out.permute(0, 2, 3, 1).contiguous()

        with timer.env('pred_heads'):
            predictions = {'box': [], 'class': [], 'coef': []}

            for i in self.selected_layers:  # self.selected_layers [0, 1, 2, 3, 4]
                p = self.prediction_layers[0](outs[i])

                for k, v in p.items():
                    predictions[k].append(v)

        for k, v in predictions.items():
            predictions[k] = torch.cat(v, -2)

        predictions['proto'] = proto_out
        predictions['anchors'] = self.anchors

        if self.training:
            if cfg.train_semantic:  # True
                predictions['segm'] = self.semantic_seg_conv(outs[0])
            return predictions

        else:
            predictions['class'] = F.softmax(predictions['class'], -1)
            return predictions
示例#2
0
    if args.image is not None:
        images = glob.glob(args.image + '/*.jpg')
        num = len(images)

        for i, one_img in enumerate(images):
            img_name = one_img.split('/')[-1]
            img_origin = torch.from_numpy(cv2.imread(one_img)).float()
            if cuda:
                img_origin = img_origin.cuda()
            img_h, img_w = img_origin.shape[0], img_origin.shape[1]
            img_trans = FastBaseTransform()(img_origin.unsqueeze(0))
            net_outs = net(img_trans)
            nms_outs = NMS(net_outs, args.traditional_nms)

            show_lincomb = bool(args.show_lincomb and args.image_path)
            with timer.env('after nms'):
                results = after_nms(nms_outs,
                                    img_h,
                                    img_w,
                                    show_lincomb=show_lincomb,
                                    crop_masks=not args.no_crop,
                                    visual_thre=args.visual_thre,
                                    img_name=img_name)
                if cuda:
                    torch.cuda.synchronize()

            img_numpy = draw_img(results, img_origin, args)

            cv2.imwrite(f'results/images/{img_name}', img_numpy)
            print(f'\r{i + 1}/{num}', end='')
示例#3
0
def evaluate(net,
             dataset,
             max_num=-1,
             during_training=False,
             cocoapi=False,
             traditional_nms=False):
    frame_times = MovingAverage()
    dataset_size = len(dataset) if max_num < 0 else min(max_num, len(dataset))
    dataset_indices = list(range(len(dataset)))
    dataset_indices = dataset_indices[:dataset_size]
    progress_bar = ProgressBar(40, dataset_size)

    # For each class and iou, stores tuples (score, isPositive)
    # Index ap_data[type][iouIdx][classIdx]
    ap_data = {
        'box': [[APDataObject() for _ in cfg.dataset.class_names]
                for _ in iou_thresholds],
        'mask': [[APDataObject() for _ in cfg.dataset.class_names]
                 for _ in iou_thresholds]
    }
    make_json = Make_json()

    for i, image_idx in enumerate(dataset_indices):
        timer.reset()

        with timer.env('Data loading'):
            img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx)

            batch = img.unsqueeze(0)
            if cuda:
                batch = batch.cuda()

        with timer.env('Network forward'):
            net_outs = net(batch)
            nms_outs = NMS(net_outs, traditional_nms)
            prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd,
                         dataset.ids[image_idx], make_json, cocoapi)

        # First couple of images take longer because we're constructing the graph.
        # Since that's technically initialization, don't include those in the FPS calculations.
        fps = 0
        if i > 1 and not during_training:
            frame_times.add(timer.total_time())
            fps = 1 / frame_times.get_avg()

        progress = (i + 1) / dataset_size * 100
        progress_bar.set_val(i + 1)
        print('\rProcessing:  %s  %d / %d (%.2f%%)  %.2f fps  ' %
              (repr(progress_bar), i + 1, dataset_size, progress, fps),
              end='')

    else:
        if cocoapi:
            make_json.dump()
            print(
                f'\nJson files dumped, saved in: \'results/\', start evaluting.'
            )

            gt_annotations = COCO(cfg.dataset.valid_info)
            bbox_dets = gt_annotations.loadRes(f'results/bbox_detections.json')
            mask_dets = gt_annotations.loadRes(f'results/mask_detections.json')

            print('\nEvaluating BBoxes:')
            bbox_eval = COCOeval(gt_annotations, bbox_dets, 'bbox')
            bbox_eval.evaluate()
            bbox_eval.accumulate()
            bbox_eval.summarize()

            print('\nEvaluating Masks:')
            bbox_eval = COCOeval(gt_annotations, mask_dets, 'segm')
            bbox_eval.evaluate()
            bbox_eval.accumulate()
            bbox_eval.summarize()
            return

        table, box_row, mask_row = calc_map(ap_data)
        print(table)
        return table, box_row, mask_row
示例#4
0
def prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd, image_id,
                 make_json, cocoapi):
    """ Returns a list of APs for this image, with each element being for a class  """

    with timer.env('After NMS'):
        class_ids, classes, boxes, masks = after_nms(nms_outs, h, w)

        if class_ids.size(0) == 0:
            return

        class_ids = list(class_ids.cpu().numpy().astype(int))
        classes = list(classes.cpu().numpy().astype(float))
        masks = masks.view(-1, h * w).cuda() if cuda else masks.view(-1, h * w)
        boxes = boxes.cuda() if cuda else boxes

    if cocoapi:
        with timer.env('Output json'):
            boxes = boxes.cpu().numpy()
            masks = masks.view(-1, h, w).cpu().numpy()

            for i in range(masks.shape[0]):
                # Make sure that the bounding box actually makes sense and a mask was produced
                if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] -
                                                  boxes[i, 0]) > 0:
                    make_json.add_bbox(image_id, class_ids[i], boxes[i, :],
                                       classes[i])
                    make_json.add_mask(image_id, class_ids[i], masks[i, :, :],
                                       classes[i])
        return

    with timer.env('Prepare gt'):
        gt_boxes = torch.Tensor(gt[:, :4])
        gt_boxes[:, [0, 2]] *= w
        gt_boxes[:, [1, 3]] *= h
        gt_classes = list(gt[:, 4].astype(int))
        gt_masks = torch.Tensor(gt_masks).view(-1, h * w)

        if num_crowd > 0:
            split = lambda x: (x[-num_crowd:], x[:-num_crowd])
            crowd_boxes, gt_boxes = split(gt_boxes)
            crowd_masks, gt_masks = split(gt_masks)
            crowd_classes, gt_classes = split(gt_classes)

    with timer.env('Eval Setup'):
        num_pred = len(class_ids)
        num_gt = len(gt_classes)

        mask_iou_cache = mask_iou(masks, gt_masks)
        bbox_iou_cache = bbox_iou(boxes.float(), gt_boxes.float())

        if num_crowd > 0:
            crowd_mask_iou_cache = mask_iou(masks, crowd_masks, iscrowd=True)
            crowd_bbox_iou_cache = bbox_iou(boxes.float(),
                                            crowd_boxes.float(),
                                            iscrowd=True)
        else:
            crowd_mask_iou_cache = None
            crowd_bbox_iou_cache = None

        iou_types = [('box', lambda i, j: bbox_iou_cache[i, j].item(),
                      lambda i, j: crowd_bbox_iou_cache[i, j].item()),
                     ('mask', lambda i, j: mask_iou_cache[i, j].item(),
                      lambda i, j: crowd_mask_iou_cache[i, j].item())]

    timer.start('Main loop')
    for _class in set(class_ids + gt_classes):
        num_gt_for_class = sum([1 for x in gt_classes if x == _class])

        for iouIdx in range(len(iou_thresholds)):
            iou_threshold = iou_thresholds[iouIdx]

            for iou_type, iou_func, crowd_func in iou_types:
                gt_used = [False] * len(gt_classes)
                ap_obj = ap_data[iou_type][iouIdx][_class]
                ap_obj.add_gt_positives(num_gt_for_class)

                for i in range(num_pred):
                    if class_ids[i] != _class:
                        continue

                    max_iou_found = iou_threshold
                    max_match_idx = -1
                    for j in range(num_gt):
                        if gt_used[j] or gt_classes[j] != _class:
                            continue

                        iou = iou_func(i, j)

                        if iou > max_iou_found:
                            max_iou_found = iou
                            max_match_idx = j

                    if max_match_idx >= 0:
                        gt_used[max_match_idx] = True
                        ap_obj.push(classes[i], True)
                    else:
                        # If the detection matches a crowd, we can just ignore it
                        matched_crowd = False

                        if num_crowd > 0:
                            for j in range(len(crowd_classes)):
                                if crowd_classes[j] != _class:
                                    continue

                                iou = crowd_func(i, j)

                                if iou > iou_threshold:
                                    matched_crowd = True
                                    break

                        # All this crowd code so that we can make sure that our eval code gives the
                        # same result as COCOEval. There aren't even that many crowd annotations to
                        # begin with, but accuracy is of the utmost importance.
                        if not matched_crowd:
                            ap_obj.push(classes[i], False)
    timer.stop('Main loop')
示例#5
0
def person_segmetation(img_path):
    with torch.no_grad():
        cuda = torch.cuda.is_available()
        if cuda:
            cudnn.benchmark = True
            cudnn.fastest = True
            torch.set_default_tensor_type('torch.cuda.FloatTensor')
        else:
            torch.set_default_tensor_type('torch.FloatTensor')

        net = Yolact()
        net.load_weights(model_weight_path, cuda)
        net.eval()
        print('Model loaded.\n')

        if cuda:
            net = net.cuda()

        img_name = img_path.split('/')[-1]
        img_origin = torch.from_numpy(cv2.imread(img_path)).float()
        # img_origin_dict[img_name] = img_origin.numpy()
        if cuda:
            img_origin = img_origin.cuda()
        img_h, img_w = img_origin.shape[0], img_origin.shape[1]
        img_trans = FastBaseTransform()(img_origin.unsqueeze(0))
        net_outs = net(img_trans)
        nms_outs = NMS(net_outs, traditional_nms)

        with timer.env('after nms'):
            results = after_nms(nms_outs,
                                img_h,
                                img_w,
                                show_lincomb=show_lincomb,
                                crop_masks=not no_crop,
                                visual_thre=visual_thre,
                                img_name=img_name)
            # mask为01二值图
            class_ids, classes, boxes, masks = results
            # 先转化为numpy处理
            class_ids, classes, boxes, masks = class_ids.numpy(
            ), classes.numpy(), boxes.numpy(), masks.numpy()
            # 只保留person的信息
            person_ids = np.squeeze(np.argwhere(class_ids == 0), axis=1)
            class_ids = class_ids[person_ids]
            classes = classes[person_ids]
            boxes = boxes[person_ids]
            masks = masks[person_ids]

            # 选择score最大的一个person
            if np.size(class_ids) != 0:
                max_score_person_id = np.argmax(classes).reshape(1, )
                class_ids = class_ids[max_score_person_id]
                classes = classes[max_score_person_id]
                boxes = boxes[max_score_person_id]
                masks = masks[max_score_person_id]
                # img_mask_dict[img_name] = masks[0]
                # img_bbox_dict[img_name] = boxes[0]
            results = (torch.from_numpy(class_ids), torch.from_numpy(classes),
                       torch.from_numpy(boxes), torch.from_numpy(masks))
            if cuda:
                torch.cuda.synchronize()
            img_numpy = draw_img(results,
                                 img_origin,
                                 visual_thre=visual_thre,
                                 hide_mask=False,
                                 class_color=False,
                                 hide_bbox=False,
                                 hide_score=False)
            return img_numpy, masks[0]