def forward(self, x): with timer.env('backbone'): outs = self.backbone(x) with timer.env('fpn'): outs = [outs[i] for i in cfg.backbone.selected_layers] outs = self.fpn(outs) ''' outs: (n, 3, 550, 550) -> backbone -> (n, 256, 138, 138) -> fpn -> (n, 256, 69, 69) P3 (n, 512, 69, 69) (n, 256, 35, 35) P4 (n, 1024, 35, 35) (n, 256, 18, 18) P5 (n, 2048, 18, 18) (n, 256, 9, 9) P6 (n, 256, 5, 5) P7 ''' if isinstance(self.anchors, list): for i, shape in enumerate([list(aa.shape) for aa in outs]): self.anchors += make_anchors(shape[2], shape[3], cfg.scales[i]) self.anchors = torch.Tensor(self.anchors).view(-1, 4) with timer.env('proto'): # outs[0]: [2, 256, 69, 69], the feature map from P3 proto_out = self.proto_net(outs[0]) # proto_out: (n, 32, 138, 138) proto_out = F.relu(proto_out, inplace=True) proto_out = proto_out.permute(0, 2, 3, 1).contiguous() with timer.env('pred_heads'): predictions = {'box': [], 'class': [], 'coef': []} for i in self.selected_layers: # self.selected_layers [0, 1, 2, 3, 4] p = self.prediction_layers[0](outs[i]) for k, v in p.items(): predictions[k].append(v) for k, v in predictions.items(): predictions[k] = torch.cat(v, -2) predictions['proto'] = proto_out predictions['anchors'] = self.anchors if self.training: if cfg.train_semantic: # True predictions['segm'] = self.semantic_seg_conv(outs[0]) return predictions else: predictions['class'] = F.softmax(predictions['class'], -1) return predictions
if args.image is not None: images = glob.glob(args.image + '/*.jpg') num = len(images) for i, one_img in enumerate(images): img_name = one_img.split('/')[-1] img_origin = torch.from_numpy(cv2.imread(one_img)).float() if cuda: img_origin = img_origin.cuda() img_h, img_w = img_origin.shape[0], img_origin.shape[1] img_trans = FastBaseTransform()(img_origin.unsqueeze(0)) net_outs = net(img_trans) nms_outs = NMS(net_outs, args.traditional_nms) show_lincomb = bool(args.show_lincomb and args.image_path) with timer.env('after nms'): results = after_nms(nms_outs, img_h, img_w, show_lincomb=show_lincomb, crop_masks=not args.no_crop, visual_thre=args.visual_thre, img_name=img_name) if cuda: torch.cuda.synchronize() img_numpy = draw_img(results, img_origin, args) cv2.imwrite(f'results/images/{img_name}', img_numpy) print(f'\r{i + 1}/{num}', end='')
def evaluate(net, dataset, max_num=-1, during_training=False, cocoapi=False, traditional_nms=False): frame_times = MovingAverage() dataset_size = len(dataset) if max_num < 0 else min(max_num, len(dataset)) dataset_indices = list(range(len(dataset))) dataset_indices = dataset_indices[:dataset_size] progress_bar = ProgressBar(40, dataset_size) # For each class and iou, stores tuples (score, isPositive) # Index ap_data[type][iouIdx][classIdx] ap_data = { 'box': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds], 'mask': [[APDataObject() for _ in cfg.dataset.class_names] for _ in iou_thresholds] } make_json = Make_json() for i, image_idx in enumerate(dataset_indices): timer.reset() with timer.env('Data loading'): img, gt, gt_masks, h, w, num_crowd = dataset.pull_item(image_idx) batch = img.unsqueeze(0) if cuda: batch = batch.cuda() with timer.env('Network forward'): net_outs = net(batch) nms_outs = NMS(net_outs, traditional_nms) prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd, dataset.ids[image_idx], make_json, cocoapi) # First couple of images take longer because we're constructing the graph. # Since that's technically initialization, don't include those in the FPS calculations. fps = 0 if i > 1 and not during_training: frame_times.add(timer.total_time()) fps = 1 / frame_times.get_avg() progress = (i + 1) / dataset_size * 100 progress_bar.set_val(i + 1) print('\rProcessing: %s %d / %d (%.2f%%) %.2f fps ' % (repr(progress_bar), i + 1, dataset_size, progress, fps), end='') else: if cocoapi: make_json.dump() print( f'\nJson files dumped, saved in: \'results/\', start evaluting.' ) gt_annotations = COCO(cfg.dataset.valid_info) bbox_dets = gt_annotations.loadRes(f'results/bbox_detections.json') mask_dets = gt_annotations.loadRes(f'results/mask_detections.json') print('\nEvaluating BBoxes:') bbox_eval = COCOeval(gt_annotations, bbox_dets, 'bbox') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() print('\nEvaluating Masks:') bbox_eval = COCOeval(gt_annotations, mask_dets, 'segm') bbox_eval.evaluate() bbox_eval.accumulate() bbox_eval.summarize() return table, box_row, mask_row = calc_map(ap_data) print(table) return table, box_row, mask_row
def prep_metrics(ap_data, nms_outs, gt, gt_masks, h, w, num_crowd, image_id, make_json, cocoapi): """ Returns a list of APs for this image, with each element being for a class """ with timer.env('After NMS'): class_ids, classes, boxes, masks = after_nms(nms_outs, h, w) if class_ids.size(0) == 0: return class_ids = list(class_ids.cpu().numpy().astype(int)) classes = list(classes.cpu().numpy().astype(float)) masks = masks.view(-1, h * w).cuda() if cuda else masks.view(-1, h * w) boxes = boxes.cuda() if cuda else boxes if cocoapi: with timer.env('Output json'): boxes = boxes.cpu().numpy() masks = masks.view(-1, h, w).cpu().numpy() for i in range(masks.shape[0]): # Make sure that the bounding box actually makes sense and a mask was produced if (boxes[i, 3] - boxes[i, 1]) * (boxes[i, 2] - boxes[i, 0]) > 0: make_json.add_bbox(image_id, class_ids[i], boxes[i, :], classes[i]) make_json.add_mask(image_id, class_ids[i], masks[i, :, :], classes[i]) return with timer.env('Prepare gt'): gt_boxes = torch.Tensor(gt[:, :4]) gt_boxes[:, [0, 2]] *= w gt_boxes[:, [1, 3]] *= h gt_classes = list(gt[:, 4].astype(int)) gt_masks = torch.Tensor(gt_masks).view(-1, h * w) if num_crowd > 0: split = lambda x: (x[-num_crowd:], x[:-num_crowd]) crowd_boxes, gt_boxes = split(gt_boxes) crowd_masks, gt_masks = split(gt_masks) crowd_classes, gt_classes = split(gt_classes) with timer.env('Eval Setup'): num_pred = len(class_ids) num_gt = len(gt_classes) mask_iou_cache = mask_iou(masks, gt_masks) bbox_iou_cache = bbox_iou(boxes.float(), gt_boxes.float()) if num_crowd > 0: crowd_mask_iou_cache = mask_iou(masks, crowd_masks, iscrowd=True) crowd_bbox_iou_cache = bbox_iou(boxes.float(), crowd_boxes.float(), iscrowd=True) else: crowd_mask_iou_cache = None crowd_bbox_iou_cache = None iou_types = [('box', lambda i, j: bbox_iou_cache[i, j].item(), lambda i, j: crowd_bbox_iou_cache[i, j].item()), ('mask', lambda i, j: mask_iou_cache[i, j].item(), lambda i, j: crowd_mask_iou_cache[i, j].item())] timer.start('Main loop') for _class in set(class_ids + gt_classes): num_gt_for_class = sum([1 for x in gt_classes if x == _class]) for iouIdx in range(len(iou_thresholds)): iou_threshold = iou_thresholds[iouIdx] for iou_type, iou_func, crowd_func in iou_types: gt_used = [False] * len(gt_classes) ap_obj = ap_data[iou_type][iouIdx][_class] ap_obj.add_gt_positives(num_gt_for_class) for i in range(num_pred): if class_ids[i] != _class: continue max_iou_found = iou_threshold max_match_idx = -1 for j in range(num_gt): if gt_used[j] or gt_classes[j] != _class: continue iou = iou_func(i, j) if iou > max_iou_found: max_iou_found = iou max_match_idx = j if max_match_idx >= 0: gt_used[max_match_idx] = True ap_obj.push(classes[i], True) else: # If the detection matches a crowd, we can just ignore it matched_crowd = False if num_crowd > 0: for j in range(len(crowd_classes)): if crowd_classes[j] != _class: continue iou = crowd_func(i, j) if iou > iou_threshold: matched_crowd = True break # All this crowd code so that we can make sure that our eval code gives the # same result as COCOEval. There aren't even that many crowd annotations to # begin with, but accuracy is of the utmost importance. if not matched_crowd: ap_obj.push(classes[i], False) timer.stop('Main loop')
def person_segmetation(img_path): with torch.no_grad(): cuda = torch.cuda.is_available() if cuda: cudnn.benchmark = True cudnn.fastest = True torch.set_default_tensor_type('torch.cuda.FloatTensor') else: torch.set_default_tensor_type('torch.FloatTensor') net = Yolact() net.load_weights(model_weight_path, cuda) net.eval() print('Model loaded.\n') if cuda: net = net.cuda() img_name = img_path.split('/')[-1] img_origin = torch.from_numpy(cv2.imread(img_path)).float() # img_origin_dict[img_name] = img_origin.numpy() if cuda: img_origin = img_origin.cuda() img_h, img_w = img_origin.shape[0], img_origin.shape[1] img_trans = FastBaseTransform()(img_origin.unsqueeze(0)) net_outs = net(img_trans) nms_outs = NMS(net_outs, traditional_nms) with timer.env('after nms'): results = after_nms(nms_outs, img_h, img_w, show_lincomb=show_lincomb, crop_masks=not no_crop, visual_thre=visual_thre, img_name=img_name) # mask为01二值图 class_ids, classes, boxes, masks = results # 先转化为numpy处理 class_ids, classes, boxes, masks = class_ids.numpy( ), classes.numpy(), boxes.numpy(), masks.numpy() # 只保留person的信息 person_ids = np.squeeze(np.argwhere(class_ids == 0), axis=1) class_ids = class_ids[person_ids] classes = classes[person_ids] boxes = boxes[person_ids] masks = masks[person_ids] # 选择score最大的一个person if np.size(class_ids) != 0: max_score_person_id = np.argmax(classes).reshape(1, ) class_ids = class_ids[max_score_person_id] classes = classes[max_score_person_id] boxes = boxes[max_score_person_id] masks = masks[max_score_person_id] # img_mask_dict[img_name] = masks[0] # img_bbox_dict[img_name] = boxes[0] results = (torch.from_numpy(class_ids), torch.from_numpy(classes), torch.from_numpy(boxes), torch.from_numpy(masks)) if cuda: torch.cuda.synchronize() img_numpy = draw_img(results, img_origin, visual_thre=visual_thre, hide_mask=False, class_color=False, hide_bbox=False, hide_score=False) return img_numpy, masks[0]