def Predict(self, im_in, area): # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if self.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if self.cuda > 0: cfg.CUDA = True if self.cuda > 0: self.fasterRCNN.cuda() self.fasterRCNN.eval() #im_in = cv2.imread(im_file) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im_in = im_in[:, :, ::-1] im = cv2.cvtColor(im_in, cv2.COLOR_BGR2RGB) blobs, im_scales = self._get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = self.fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if self.class_agnostic: if self.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if self.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1]))) pred_boxes = _.cuda() if self.cuda > 0 else _ pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() ItemAndBoxes_all = [] im2show = np.copy(im) for j in xrange(1, len(self.pascal_classes)): inds = torch.nonzero(scores[:, j] > self.thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if self.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] im2show, ItemAndBoxes = vis_detections(im2show, self.pascal_classes[j], cls_dets.cpu().numpy(), self.visThresh) ItemAndBoxes_all.append(ItemAndBoxes) ItemAndBoxes_all = sorted(ItemAndBoxes_all, key=lambda x: x[2], reverse=True) ItemAndBoxes_all = ItemAndBoxes_all[0:3] ItemAndBoxes_all = sorted(ItemAndBoxes_all, key=lambda x: x[1][0]) if self.vis == 1: cv2.namedWindow("result", 0) cv2.resizeWindow("result", 1080, 720) cv2.imshow('result', im2show) cv2.waitKey(0) result_path = os.path.join(self.image_dir, str(area) + ".jpg") cv2.imwrite(result_path, im2show) return { "Left": ItemAndBoxes_all[0][0], "Mid": ItemAndBoxes_all[1][0], "Right": ItemAndBoxes_all[2][0] }
def detect(self, im_in): if len(im_in.shape) == 2: # if gray == 1 ch im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # im = im_in[:,:,::-1] blobs, im_scales = self._get_image_blob(im_in) # Image in as BGR order assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) self.im_data.resize_(im_data_pt.size()).copy_(im_data_pt) self.im_info.resize_(im_info_pt.size()).copy_(im_info_pt) self.gt_boxes.resize_(1, 1, 5).zero_() self.num_boxes.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = self.fasterRCNN(self.im_data, self.im_info, self.gt_boxes, self.num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if self.cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if self.cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if self.args.class_agnostic: if self.args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if self.args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(self.cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(self.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() ret_bbox_score_class = [] # bbox(4), score(1), class_name(1) for j in range(1, len(self.classes)): if self.classes[j] in self.display_classes.keys(): inds = torch.nonzero(scores[:, j] > self.thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if self.args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], self.cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] for k in range(cls_dets.shape[0]): # tensor to numpy ret_bbox_score_class.append([tuple(int(np.round(x.cpu())) for x in cls_dets[k, :4]), cls_dets[k, 4].item(), self.classes[j]]) return ret_bbox_score_class
def test(dataset="kaggle_pna", test_ds="test", arch="couplenet", net="res152", load_dir="save", output_dir="output", cuda=True, large_scale=False, class_agnostic=False, checksession=1, checkepoch=1, checkpoint=10021, batch_size=1, vis=False, anchor_scales=4, min_conf=.5, **kwargs): print("Test Arguments: {}".format(locals())) # Import network definition if arch == 'rcnn': from model.faster_rcnn.vgg16 import vgg16 from model.faster_rcnn.resnet import resnet elif arch == 'rfcn': from model.rfcn.resnet_atrous import resnet elif arch == 'couplenet': from model.couplenet.resnet_atrous import resnet from roi_data_layer.pnaRoiBatchLoader import roibatchLoader from roi_data_layer.pna_roidb import combined_roidb image_read_func = lambda path: pydicom.dcmread(path).pixel_array print('Called with kwargs:') print(kwargs) # Warning to use cuda if available if torch.cuda.is_available() and not cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # Anchor settings: ANCHOR_SCALES: [8, 16, 32] or [4, 8, 16, 32] if anchor_scales == 3: scales = [8, 16, 32] elif anchor_scales == 4: scales = [4, 8, 16, 32] else: scales = [8, 16, 32] # Dataset related settings: MAX_NUM_GT_BOXES: 20, 30, 50 np.random.seed(cfg.RNG_SEED) if test_ds == "val": imdbval_name = "pna_2018_val" elif test_ds == "test": imdbval_name = "pna_2018_test" set_cfgs = ['ANCHOR_SCALES', str(scales), 'ANCHOR_RATIOS', '[0.5,1,2]'] cfg_file = "cfgs/{}_ls.yml".format( net) if large_scale else "cfgs/{}.yml".format(net) import model model_repo_path = os.path.dirname( os.path.dirname(os.path.dirname(model.__file__))) if cfg_file is not None: cfg_from_file(os.path.join(model_repo_path, cfg_file)) if set_cfgs is not None: cfg_from_list(set_cfgs) test_kwargs = kwargs.pop("TEST", None) resnet_kwargs = kwargs.pop("RESNET", None) mobilenet_kwargs = kwargs.pop("MOBILENET", None) if test_kwargs is not None: for key, value in test_kwargs.items(): cfg["TEST"][key] = value if resnet_kwargs is not None: for key, value in resnet_kwargs.items(): cfg["RESNET"][key] = value if mobilenet_kwargs is not None: for key, value in mobilenet_kwargs.items(): cfg["MOBILENET"][key] = value if kwargs is not None: for key, value in kwargs.items(): cfg[key] = value print('Using config:') cfg.MODEL_DIR = os.path.abspath(cfg.MODEL_DIR) cfg.SUBMISSION_DIR = os.path.abspath(cfg.SUBMISSION_DIR) cfg.TEST_DATA_CLEAN_PATH = os.path.abspath(cfg.TEST_DATA_CLEAN_PATH) pprint.pprint(cfg) # create output directory # output_dir = os.path.join(output_dir, arch, net, dataset) output_dir = cfg.SUBMISSION_DIR if not os.path.exists(output_dir): os.makedirs(output_dir) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(imdbval_name, False) imdb.competition_mode(on=True) imdb.sub_mode = True print('{:d} roidb entries'.format(len(roidb))) # Trained network weights path # input_dir = load_dir + "/" + arch + "/" + net + "/" + dataset input_dir = cfg.MODEL_DIR if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, '{}_{}_{}_{}.pth'.format(arch, checksession, checkepoch, checkpoint)) # Initialize the network: if net == 'vgg16': # model = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic) print("Pretrained model is not downloaded and network is not used") elif net == 'res18': model = resnet(imdb.classes, 18, pretrained=False, class_agnostic=class_agnostic) elif net == 'res34': model = resnet(imdb.classes, 34, pretrained=False, class_agnostic=class_agnostic) elif net == 'res50': model = resnet(imdb.classes, 50, pretrained=False, class_agnostic=class_agnostic) elif net == 'res101': model = resnet(imdb.classes, 101, pretrained=True, class_agnostic=class_agnostic) elif net == 'res152': model = resnet(imdb.classes, 152, pretrained=True, class_agnostic=class_agnostic) else: print("network is not defined") pdb.set_trace() # Create network architecture model.create_architecture() # Load pre-trained network weights print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) model.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # Initialize the tensor holder im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # Copy tensors in CUDA memory if cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # Make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) # Set cuda usage if cuda: cfg.CUDA = True # Copy network to CUDA memroy if cuda: model.cuda() # Start test or evaluation start = time.time() max_per_image = 100 # Visualize output bounding boxes if vis: thresh = 0.05 else: thresh = 0.0 save_name = arch + '_' + net num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] img_dataset = roibatchLoader(roidb, ratio_list, ratio_index, batch_size, imdb.num_classes, training=False, normalize=False) dataloader = torch.utils.data.DataLoader( img_dataset, batch_size=batch_size, shuffle=False, num_workers=0, # args.num_workers pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') # Turn on model evaluation mode, i.e. train=False model.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) img_dataset.resize_batch() for i in range(num_images): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = model(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(batch_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(batch_size, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = image_read_func(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel( ) > 0: # tensor.numel() -> returns number of elements in tensor cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if class_agnostic: # Find any object cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('result.png', im2show) pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir) print('Kaggle submission file') if dataset == 'kaggle_pna': cipher = {'rcnn': 'alpha', 'rfcn': 'beta', 'couplenet': 'gamma'} created = datetime.now().strftime("%Y%m%d%H%M") sub_file = cipher[arch] + '_' + created + '_cls-{}_submission.txt' print('Submit file that ends with "_cls-3_submission.txt" file.') submission_file = os.path.join(output_dir, sub_file) imdb.write_kaggle_submission_file(all_boxes, submission_file, min_conf=min_conf) end = time.time() print("Deleting irrelevant files...") delete_irrelevant_files(cfg.SUBMISSION_DIR) print("test time: %0.4fs" % (end - start))
def forward(self, im_data, im_info, gt_boxes, num_boxes): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \ RCNN_loss_cls, RCNN_loss_bbox, rois_label \ = self.FRCN(im_data, im_info, gt_boxes, num_boxes) # get global and local region from Faster R-CNN base_feat = self.FRCN.RCNN_base(im_data) #print(rois.data.cpu().numpy()) scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = self.FRCN._bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: if self.class_agnostic: if self.use_cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda( ) + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS) * torch.FlaotTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if self.use_cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda( ) + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torhc.FlaotTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS) + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(self.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() # get global region thresh = 0.01 region_g = np.ndarray((0, 5)) region_l = np.ndarray((0, 5)) for j in range(1, 4): inds = torch.nonzero(scores[:, j] >= thresh).view(-1) inds_l = torch.nonzero(scores[:, j + 3] >= thresh).view(-1) #print(inds) if inds.numel() > 0 and inds_l.numel() > 0: cls_scores = scores[:, j][inds] cls_scores_l = scores[:, j + 3][inds_l] #print(cls_scores) #print(cls_scores_l) _, order = torch.sort(cls_scores, 0, True) _, order_l = torch.sort(cls_scores_l, 0, True) if self.class_agnostic: cls_boxes = pred_boxes[inds] cls_boxes_l = pred_boxes[inds_l] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_boxes_l = pred_boxes[inds_l][:, (j + 3) * 4:(j + 4) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets_l = torch.cat( (cls_boxes_l, cls_scores_l.unsqueeze(1)), 1) cls_dets = cls_dets[order] cls_dets_l = cls_dets_l[order] region_g = np.vstack((region_g, cls_dets)) region_l = np.vstack((region_l, cls_dets_l)) """ keep = nms(cls_dets, 0.9, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] keep = nms(cls_dets_l, 0.9, force_cpu=not cfg.USE_GPU_NMS) cls_dets_l = cls_dets_l[keep.view(-1).long()] cls_dets = cls_dets[order] cls_dets_l = cls_dets_l[order_l] sort_ind = np.argsort(cls_dets[...,-1]) high_ind = sort_ind[-self.minibatch//2:] low_ind = sort_ind[:self.minibatch//2] region_g = np.vstack((region_g. cls_dets[high_ind])) region_g = np.vstack((region_g, cls_dets[low_ind]))] sort_ind = np.argsort(cls_dets_l[..., -1]) high_ind = sort_ind[-self.minibatch//2:] low_ind = sort_ind[:self.minibatch//2] region_l = np.vstack((region_l, cls_dets_l[high_ind])) reigon_l = np.vstack((region_l, cls_dets_l[low_ind])) """ #region_g = np.vstack((region_g, cls_dets[np.argmax(cls_dets[..., -1])])) #region_l = np.vstack((region_l, cls_dets_l[np.argmax(cls_dets_l[..., -1])])) if not self.training: self.minibatch = 1 if self.training: keep = nms(torch.tensor(region_g).cuda(), 0.9, force_cpu=not cfg.USE_GPU_NMS) if type(keep) is not list: keep = keep.view(-1).long() region_g = region_g[keep] sort_ind = np.argsort(region_g[..., -1]) high_ind_g = sort_ind[-self.minibatch // 2:] low_ind_g = sort_ind[:self.minibatch // 2] keep = nms(torch.tensor(region_l).cuda(), 0.9, force_cpu=not cfg.USE_GPU_NMS) if type(keep) is not list: keep = keep.view(-1).long() region_l = region_l[keep] sort_ind = np.argsort(region_l[..., -1]) high_ind_l = sort_ind[-self.minibatch // 2:] low_ind_l = sort_ind[:self.minibatch // 2] high_num = min(len(high_ind_g), len(high_ind_l)) high_ind_g = high_ind_g[:high_num] high_ind_l = high_ind_l[:high_num] low_num = min(len(low_ind_g), len(low_ind_l)) low_ind_g = low_ind_g[:low_num] low_ind_l = low_ind_l[:low_num] proposal_g = np.vstack((region_g[high_ind_g], region_g[low_ind_g])) proposal_l = np.vstack((region_l[high_ind_l], region_l[low_ind_l])) #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g) #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l) gt_boxes = gt_boxes.cpu().numpy()[0, :2] gt_g = gt_boxes[np.where(gt_boxes[..., -1] < 4)[0]] gt_l = gt_boxes[np.where(gt_boxes[..., -1] >= 4)[0]] # compute pare ground truth def compute_iou(ps, gt, th=0.5): iou_x1 = np.maximum(ps[..., 0], gt[0]) iou_y1 = np.maximum(ps[..., 1], gt[1]) iou_x2 = np.minimum(ps[..., 2], gt[2]) iou_y2 = np.minimum(ps[..., 3], gt[3]) iou_w = np.maximum(iou_x2 - iou_x1, 0) iou_h = np.maximum(iou_y2 - iou_y1, 0) iou_area = iou_w * iou_h gt_area = (gt[2] - gt[0]) * (gt[3] - gt[1]) p_area = (ps[..., 2] - ps[..., 0]) * (ps[..., 3] - ps[..., 1]) overlap = iou_area / (gt_area + p_area - iou_area) count = np.zeros((ps.shape[0]), dtype=int) count[overlap >= self.gt_iou] += 1 return count cou = compute_iou(proposal_g, gt_g[0]) + compute_iou( proposal_l, gt_l[0]) ## 2019.2.13 #glcc_gt = np.zeros((proposal_g.shape[0]), dtype=int) #gilcc_gt[cou==2] = gt_g[0,-1] glcc_gt = np.array([gt_g[0, -1]], dtype=int) glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda() self.glcc_gt.data.resize_(glcc_gt.size()).copy_(glcc_gt) else: # test phase proposal_g = region_g[np.argmax(region_g[..., -1])][None, ...] proposal_l = region_l[np.argmax(region_l[..., -1])][None, ...] #self.proposal_g.data.resize_(proposal_g.size()).copy_(proposal_g.size()) #self.proposal_l.data.resize_(proposal_l.size()).copy_(proposal_l.size()) # if true, then show detection global and local region if False: gt_boxes = gt_boxes.astype(np.int) im = im_data.cpu().numpy()[0] im = np.transpose(im, (1, 2, 0))[..., ::-1] im -= im.min() im /= im.max() plt.imshow(im.astype(np.float)) ax = plt.axes() ax.add_patch( plt.Rectangle((region_g[0, 0], region_g[0, 1]), region_g[0, 2] - region_g[0, 0], region_g[0, 3] - region_g[0, 1], fill=False, edgecolor='red', linewidth=1)) ax.add_patch( plt.Rectangle((region_l[0, 0], region_l[0, 1]), region_l[0, 2] - region_l[0, 0], region_l[0, 3] - region_l[0, 1], fill=False, edgecolor='yellow', linewidth=1)) ax.add_patch( plt.Rectangle((gt_boxes[0, 0], gt_boxes[0, 1]), gt_boxes[0, 2] - gt_boxes[0, 0], gt_boxes[0, 3] - gt_boxes[0, 1], fill=False, edgecolor='green', linewidth=1)) ax.add_patch( plt.Rectangle((gt_boxes[1, 0], gt_boxes[1, 1]), gt_boxes[1, 2] - gt_boxes[1, 0], gt_boxes[1, 3] - gt_boxes[1, 1], fill=False, edgecolor='white', linewidth=1)) plt.show() rois_g = np.zeros((1, proposal_g.shape[0], 5), dtype=np.float32) rois_g[0, :, 1:5] = proposal_g[:, :4] #rois_g /= 16. rois_l = np.zeros((1, proposal_l.shape[0], 5), dtype=np.float32) rois_l[0, :, 1:5] = proposal_l[:, :4] #rois_l /= 16. rois_g = torch.tensor(rois_g, dtype=torch.float).cuda() rois_l = torch.tensor(rois_l, dtype=torch.float).cuda() self.rois_g.data.resize_(rois_g.size()).copy_(rois_g) self.rois_l.data.resize_(rois_l.size()).copy_(rois_l) # global region if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(self.rois_g.view(-1, 5), base_feat.size()[2:], self.FRCN.grid_size) grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]], 3).contiguous() pooled_feat_g = self.FRCN.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat_g = F.max_pool2d(pooled_feat_g, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat_g = self.FRCN.RCNN_roi_align(base_feat, self.rois_g.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat_g = self.FRCN.RCNN_roi_pool(base_feat, self.rois_g.view(-1, 5)) # local region if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(self.rois_l.view(-1, 5), base_feat.size()[2:], self.FRCN.grid_size) grid_yx = torch.stack([grid_xy.data[..., 1], grid_xy.data[..., 0]], 3).contiguous() pooled_feat_l = self.FRCN.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat_l = F.max_pool2d(pooled_feat_l, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat_l = self.FRCN.RCNN_roi_align(base_feat, self.rois_l.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat_l = self.FRCN.RCNN_roi_pool(base_feat, self.rois_l.view(-1, 5)) #print(pooled_feat_g.cpu().detach().numpy().shape) x = torch.cat((pooled_feat_g, pooled_feat_l), dim=1) #print(x.cpu().detach().numpy().shape) x = self.glcc_conv1(x) x = F.relu(x) x = x.view(-1, self.roipool * self.roipool * 512) x = self.glcc_fc1(x) x = F.relu(x) x = nn.Dropout()(x) x = self.glcc_fc2(x) x = F.relu(x) x = nn.Dropout()(x) glcc_out = self.glcc_fc_out(x) if self.training: glcc_gt = torch.tensor(glcc_gt, dtype=torch.long).cuda() glcc_loss = F.cross_entropy(glcc_out, self.glcc_gt) else: glcc_loss = 0. return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, glcc_out, glcc_loss
def test_net(fasterRCNN, image, img_blob, img_scales, items, labels, i): im_data, im_info, num_boxes, gt_boxes = items im_info_np = np.array( [[img_blob.shape[1], img_blob.shape[2], img_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(img_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if config.TEST_BBOX_REG: box_deltas = bbox_pred.data if config.TRAIN_BBOX_NORMALIZE_TARGETS_PRECOMPUTED: if config.cuda: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(labels)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes /= img_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() copy_img = np.copy(image[:, :, ::-1]) bubbles = [] for j in range(1, len(labels)): inds = torch.nonzero(scores[:, j] > config.THRESH).view(-1) if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], config.TEST_NMS) cls_dets = cls_dets[keep.view(-1).long()] copy_img, vis_img, bubbles, boxes = sbd_utils.divideBubbleFromImage( copy_img, image[:, :, ::-1], labels[j], cls_dets.cpu().numpy(), config.CLASS_THRESH, bg=config.BACKGROUND) copy_img, vis_img, cuts = sbd_utils.divideCutFromImage( copy_img, image[:, :, ::-1], i, bg=config.BACKGROUND) alpha_image = sbd_utils.addImageToAlphaChannel(copy_img, copy_img, FLAG='conversion') vis_img, texts = text.detection(vis_img, bubbles, boxes) return alpha_image, vis_img, cuts, bubbles, texts
def test_epoch(fasterRCNN, val_load, epoch): fasterRCNN.eval() tps = list() fps = list() fns = list() for idx, blob in enumerate(val_load): print('\r{}/{}'.format(idx, len(val_load)), end='') rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(*blob()) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() #box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) box_deltas = box_deltas.view(1, -1, 4 * 2) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, blob.im_sizes.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() inds = torch.nonzero(scores[:, 1] > cfg.obj_score_thres).view(-1) #0.5 if inds.numel() > 0: cls_scores = scores[:, 1][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds, 4:] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], 0.1) cls_dets = cls_dets[keep.view(-1).long()] cls_dets = cls_dets.cpu() cls_box_dets = cls_dets[:, :-1] cls_box_scores = cls_dets[:, -1].numpy() else: print(' nothing was detected.') cls_dets = None cls_box_dets = None gts_box = blob.gt_boxes.squeeze()[:blob.num_boxes.item(), :-1].cpu() tp, fp, fn = confusion_matrix(cls_box_dets, gts_box) tps.append(tp) fps.append(fp) fns.append(fn) preds_path = os.path.join(args.save_dir, args.dataset, args.net, 'preds_boxes', 'epoch_{}'.format(-1)) if not os.path.exists(preds_path): os.makedirs(preds_path) if cls_dets is not None: with open( os.path.join(preds_path, '{}.txt'.format(blob.img_names[0])), 'w') as f: cls_dets = cls_dets.numpy() * blob.im_sizes[0, 2].item() cls_dets[:, -1] /= blob.im_sizes[0, 2].item() for i, e in enumerate(cls_dets): #f.write(str(cls_box_scores[i])+' '+ ' '.join(map(str, e))+'\n') f.write(' '.join(map(str, e)) + '\n') P = sum(tps) / (sum(tps) + sum(fps) + 1e-6) R = sum(tps) / (sum(tps) + sum(fns) + 1e-6) printf('epoch: ', epoch) printf("precision:", P) printf('recall:', R) F1 = (2 * P * R) / (P + R + 1e-6) printf("F1:", F1) print("F1:", F1) printf('\n\n\n') return F1
def run_model(support_im_paths, query_path, cnt_shot, output_path_folder): # support # support_root_dir = 'datasets/supports' # class_dir = 'horse' # n_shot = 2 # im_paths = list(Path(os.path.join(support_root_dir, class_dir)).glob('*.jpg')) CWD = os.getcwd() print(support_im_paths) n_shot = len(support_im_paths) random.seed(0) im_path_list = random.sample(support_im_paths, k=n_shot) im_list = [] #fig = plt.figure(num=None, figsize=(8, 8), dpi=50, facecolor='w', edgecolor='k') for i, im_path in enumerate(im_path_list): im = Image.open(im_path) im_list.append(np.asarray(im)) support_data = support_im_preprocess(im_list, cfg, 320, n_shot) # query im = np.asarray(Image.open(query_path)) im2show = im.copy() query_data, im_info, gt_boxes, num_boxes = query_im_preprocess(im, cfg) # prepare data data = [query_data, im_info, gt_boxes, num_boxes, support_data] im_data, im_info, num_boxes, gt_boxes, support_ims = prepare_variable() with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) support_ims.resize_(data[4].size()).copy_(data[4]) # model cfg_from_list( ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']) model_dir = os.path.join(CWD, 'models') load_path = os.path.join(model_dir, 'faster_rcnn_{}_{}_{}.pth'.format(1, 11, 34467)) model = get_model('multi', load_path, n_shot) start_time = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = model(im_data, im_info, gt_boxes, num_boxes, support_ims, gt_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) # re-scale boxes to the origin img scale pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() thresh = 0.05 inds = torch.nonzero(scores[:, 1] > thresh).view(-1) cls_scores = scores[:, 1][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] for i in range(cls_dets.shape[0]): w = cls_dets[i, 2] - cls_dets[i, 0] h = cls_dets[i, 3] - cls_dets[i, 1] if w > 0.5 * im2show.shape[1] or h > 0.5 * im2show.shape[0]: cls_dets[i, 4] = 0 end_time = time.time() im2show = vis_detections(im2show, ' ', cls_dets.cpu().numpy(), 0.5) output_path = os.path.join(output_path_folder, 'result' + str(cnt_shot) + '.jpg') cv2.imwrite(output_path, im2show[:, :, ::-1]) print(cls_dets) print(end_time - start_time)
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, :, 1] # batch_size x num_rois x 1 bbox_deltas = input[1] # batch_size x num_rois x 4 im_info = input[2] cfg_key = input[3] feat_shapes = input[4] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, self._anchor_ratios, feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores) num_anchors = anchors.size(0) anchors = anchors.view(1, num_anchors, 4).expand(batch_size, num_anchors, 4) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze() scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1,1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i,:,0] = i output[i,:num_proposal,1:] = proposals_single return output
def test_net(model=None, image=None, params=None, bg=None, cls=None): blob, scale, label = params with torch.no_grad(): # pre-processing data for passing net im_data = Variable(torch.FloatTensor(1)) im_info = Variable(torch.FloatTensor(1)) num_boxes = Variable(torch.LongTensor(1)) gt_boxes = Variable(torch.FloatTensor(1)) im_info_np = np.array([[blob.shape[1], blob.shape[2], scale[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): # resize im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = model(im_data, im_info, gt_boxes, num_boxes) # predict scores = cls_prob.data boxes = rois.data[:, :, 1:5] if opt.TEST_BBOX_REG: box_deltas = bbox_pred.data if opt.TRAIN_BBOX_NORMALIZE_TARGETS_PRECOMPUTED: if opt.cuda: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(label)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes /= scale[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() image = np.copy(image[:, :, ::-1]) demo = image.copy() bubbles = [] dets_bubbles = [] for j in range(1, len(label)): inds = torch.nonzero(scores[:, j] > opt.THRESH).view(-1) if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], opt.TEST_NMS) cls_dets = cls_dets[keep.view(-1).long()].cpu().numpy() # post-processing : get contours of speech bubble demo, image, bubbles, dets_bubbles = bubble_utils.get_cnt_bubble(image, image.copy(), label[j], cls_dets, cls, bg=bg) return demo, image, bubbles, dets_bubbles
scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(batch_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view( batch_size, -1, 4 * len(imagenet_vid_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size) pred_boxes = clip_boxes(pred_boxes, im_info.data, batch_size) else: # Simply repeat the boxes, once for each class raise NotImplementedError # Assume scales are same for frames in the same video im_scale = im_info.data[0][-1] pred_boxes /= im_scale #pred_boxes = pred_boxes.squeeze() #scores = scores.squeeze() vid_pred_boxes.append(pred_boxes) vid_scores.append(scores) curr_frame_t0 = frames['frame_number'].squeeze()[0] print("Processed frame : t={} / {}"\ .format(curr_frame_t0, video_dataset._n_frames-1))
def detect(self, bbx): with torch.no_grad(): vis = False thresh = 0.05 im_data = torch.FloatTensor(1).to(self.device) im_info = torch.FloatTensor(1).to(self.device) num_boxes = torch.LongTensor(1).to(self.device) gt_boxes = torch.FloatTensor(1).to(self.device) # total_tic = time.time() x, y, w, h = [int(p) for p in bbx] x = max(x, 0) y = max(y, 0) im = self.img[y:(y + h), x:(x + w)] # print ' (x=%d, y=%d), %d * %d, (%d, %d) - cropsize: %d * %d' % (x, y, w, h, x+w, y+h, im.shape[1], im.shape[0]) w, h = im.shape[1], im.shape[0] refine_bbx = [0, 0, w, h] if w * h == 0: print 'What? %d * %d' % (w, h) # raw_input('Continue?') return False blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() # pdb.set_trace() # det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = self.fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).to(self.device) box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1]))) pred_boxes = _.to(self.device) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() # det_toc = time.time() # detect_time = det_toc - det_tic # misc_tic = time.time() if vis: im2show = np.copy(im) j = 15 inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det step = 0 if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] dets = cls_dets.cpu().numpy() for i in range(dets.shape[0]): if dets[i, -1] > cf: x1, y1, w1, h1 = dets[i][:4] det = [x1, y1, w1 - x1, h1 - y1] ratio = self.a_train_set.IOU(det, refine_bbx) if ratio[0] > iou: # IOU between prediction and detection should not be limited step += 1 if vis: print cls_dets dets = cls_dets.cpu().numpy() # for i in range(dets.shape[0]): # bbox = tuple(int(np.round(x)) for x in dets[i, :4]) # score = dets[i, -1] # if score > thresh: # crop = im[bbox[1]:bbox[3], bbox[0]:bbox[2]] # cv2.imwrite('in_place/%02d.jpg'%step, crop) # step += 1 im2show = vis_detections(im2show, self.pascal_classes[j], dets) # misc_toc = time.time() # nms_time = misc_toc - misc_tic if vis: cv2.imshow('test', im2show) cv2.waitKey(0) # result_path = os.path.join('results', imglist[num_images][:-4] + "_det.jpg") # cv2.imwrite(result_path, im2show) if step: return True return False
def predict1(): data = {"success": False} im_info1 = {} # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() start = time.time() max_per_image = 100 thresh = 0.05 vis = True file_dir = os.path.join(basedir, 'upload/') print('file_dir',file_dir) webcam_num = args.webcam_num # Set up webcam or get image directories if webcam_num >= 0: cap = cv2.VideoCapture(webcam_num) num_images = 0 else: imglist = os.listdir(file_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) while (num_images >= 0): total_tic = time.time() if webcam_num == -1: num_images -= 1 # Get image from the webcam if webcam_num >= 0: if not cap.isOpened(): raise RuntimeError("Webcam could not open. Please check connection.") ret, frame = cap.read() im_in = np.array(frame) # Load the demo image else: im_file = os.path.join(file_dir, imglist[num_images]) print("im_fileeeeeee",im_file) # im = cv2.imread(im_file) im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() jindex = [] info = {} info['predictions'] = list() filename = os.path.split(im_file) print("filename",filename[1]) info['filename'] = filename[1] image1 = Image.open(im_file); print('image1.size', image1.size); info['width'] = image1.size[0] info['height'] = image1.size[1] if vis: im2show = np.copy(im) for j in range(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] print('j', j) cls_dets.cpu().numpy() jindex.append(j) if vis: im2show = vis_detections(im2show, j, cls_dets.cpu().numpy(), 0.5) pred = vis_results(j,cls_dets.cpu().numpy(),0.5) print('pred',pred) if(pred!=[]): info['predictions'].append(pred) # print("cls_dets.cpu().numpy()",cls_dets.cpu().numpy()) # print('cls_dets',cls_dets) # box_re = cls_dets.cpu().numpy() # print('box_re',box_re) # # Loop over the results and add them to the list of returned predictions # info = {} # filename = os.path.split(im_file) # print("filename",filename[1]) # info['filename'] = filename[1] # image1 = Image.open(im_file); # print('image1.size', image1.size); # info['width'] = image1.size[0] # info['height'] = image1.size[1] # info['predictions'] = list() # j = 0 # for box in box_re: # r = {"BoxList": [str(i) for i in np.rint(box[:4]).astype(int)]} # r["BoxList"].append(jindex[j]) # j=j+1 # info['predictions'].append(r) # # Indicate that the request was a success. # s = {} data["success"] = True # s = {im_file: info} im_info1[filename[1]]=info data['im_info'] = im_info1 print(data) new_data = process(data) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join(file_dir, imglist[num_images][:-4] + "_det.jpg") # cv2.imwrite(result_path, im2show) else: im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB) cv2.imshow("frame", im2showRGB) total_toc = time.time() total_time = total_toc - total_tic frame_rate = 1 / total_time print('Frame rate:', frame_rate) if cv2.waitKey(1) & 0xFF == ord('q'): break if webcam_num >= 0: cap.release() cv2.destroyAllWindows() return flask.jsonify(new_data)
def detect(self, dataset, foldername, filename, ch, vis, bbox_log): image_num = os.path.splitext(filename)[0] output_folder = 'output/' + dataset + "_ch" + str(ch) if not os.path.exists(output_folder): os.mkdir(output_folder) total_tic = time.time() # im = cv2.imread(im_file) im_file = foldername + "/" + filename im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): self.im_data.resize_(im_data_pt.size()).copy_(im_data_pt) self.im_info.resize_(im_info_pt.size()).copy_(im_info_pt) self.gt_boxes.resize_(1, 1, 5).zero_() self.num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, rois_label = self.fasterRCNN( self.im_data, self.im_info, self.gt_boxes, self.num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if self.myargs.class_agnostic: if self.myargs.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if self.myargs.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im2show = np.copy(im) for j in xrange(1, len(self.pascal_classes)): inds = torch.nonzero(scores[:, j] > self.thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if self.myargs.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if bbox_log: bbox_list = cls_dets.cpu().numpy() for bb in bbox_list: start_x = int(bb[0]) start_y = int(bb[1]) end_x = int(bb[2]) end_y = int(bb[3]) confidence = bb[4] if confidence > 0.5: fo.write( str(ch) + "," + image_num + "," + str(start_x) + "," + str(start_y) + "," + str(end_x) + "," + str(end_y) + "," + str(confidence) + "\n" ) if vis: im2show = vis_detections(im2show, self.pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic # sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ # .format(num_images + 1, len(imglist), detect_time, nms_time)) # sys.stdout.flush() if vis: result_path = os.path.join(output_folder, str(image_num) + ".jpg") cv2.imwrite(result_path, im2show)
def forward(self, im_data, im_info, gt_boxes, gt_boxes_sens, num_boxes): batch_size = im_data[0].size(0) im_info = im_info.data gt_boxes = gt_boxes.data gt_boxes_sens = gt_boxes_sens.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat_c = self.RCNN_base_c(im_data[0]) base_feat_t = self.RCNN_base_t(im_data[1]) base_feat_fused = 0.5 * (base_feat_c + base_feat_t) base_feat_fused = self.RCNN_base_fused(base_feat_fused) conv5_c = self.RCNN_base_f1(base_feat_c) conv5_t = self.RCNN_base_f2(base_feat_t) # feed fused base feature map to RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat_fused, im_info, gt_boxes, num_boxes) # if it is training phase, then use ground truth bboxes for refining if self.training: # 50% jitter probability if np.random.rand(1)[0]>0.5: jitter = (torch.randn(1,256,4)/20).cuda() else: jitter = (torch.zeros(1,256,4)).cuda() # feed jitter to obtain rois_align_target roi_data = self.RCNN_proposal_target(rois, gt_boxes, gt_boxes_sens, num_boxes, jitter, im_info) rois, rois_jittered, rois_label, rois_target, rois_align_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_align_target = Variable(rois_align_target.view(-1, rois_align_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_jittered = copy.deepcopy(rois) rois_label = None rois_target = None rois_align_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 # Region Feature Alignment module ctx_rois = bbox_contextual_batch(rois) clip_boxes(ctx_rois[:,:,1:], im_info, batch_size) ctx_rois = Variable(ctx_rois) ctx_rois_jittered = bbox_contextual_batch(rois_jittered) clip_boxes(ctx_rois_jittered[:,:,1:], im_info, batch_size) ctx_rois_jittered = Variable(ctx_rois_jittered) if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(ctx_rois.view(-1, 5), conv5_c.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat_c = self.RCNN_roi_crop(conv5_c, Variable(grid_yx).detach()) grid_xy = _affine_grid_gen(ctx_rois_jittered.view(-1, 5), conv5_t.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat_t = self.RCNN_roi_crop(conv5_t, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat_c = F.max_pool2d(pooled_feat_c, 2, 2) pooled_feat_t = F.max_pool2d(pooled_feat_t, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat_c = self.RCNN_roi_align(conv5_c, ctx_rois.view(-1, 5)) pooled_feat_t = self.RCNN_roi_align(conv5_t, ctx_rois_jittered.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat_c = self.RCNN_roi_pool(conv5_c, ctx_rois.view(-1,5)) pooled_feat_t = self.RCNN_roi_pool(conv5_t, ctx_rois_jittered.view(-1,5)) pooled_feat_res = pooled_feat_t - pooled_feat_c # feed pooled features to top model pooled_feat_res = self._head_to_tail_align(pooled_feat_res) bbox_align_pred = self.RCNN_bbox_align_pred(pooled_feat_res) RCNN_loss_bbox_align = 0 # Apply bounding-box regression deltas box_deltas = bbox_align_pred.data box_deltas_zeros = torch.zeros(box_deltas.shape).cuda() box_deltas = torch.cat((box_deltas, box_deltas_zeros), 1) # Optionally normalize targets by a precomputed mean and stdev # The roi alignment process is class_agnostic box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(batch_size, -1, 4) rois_sens = rois_jittered.new(rois_jittered.size()).zero_() rois_sens[:,:,1:5] = bbox_transform_inv(rois_jittered[:,:,1:5], box_deltas, batch_size) clip_boxes(rois_sens[:,:,1:5], im_info, batch_size) rois = Variable(rois) rois_sens = Variable(rois_sens) if cfg.POOLING_MODE == 'crop': grid_xy = _affine_grid_gen(rois.view(-1, 5), conv5_c.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat_c = self.RCNN_roi_crop(conv5_c, Variable(grid_yx).detach()) grid_xy = _affine_grid_gen(rois_sens.view(-1, 5), conv5_t.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat_t = self.RCNN_roi_crop(conv5_t, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat_c = F.max_pool2d(pooled_feat_c, 2, 2) pooled_feat_t = F.max_pool2d(pooled_feat_t, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat_c = self.RCNN_roi_align(conv5_c, rois.view(-1, 5)) pooled_feat_t = self.RCNN_roi_align(conv5_t, rois_sens.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat_c = self.RCNN_roi_pool(conv5_c, rois.view(-1, 5)) pooled_feat_t = self.RCNN_roi_pool(conv5_t, rois_sens.view(-1, 5)) cls_score_ref = self.confidence_ref(self.RCNN_top_ref(pooled_feat_c.view(pooled_feat_c.size(0), -1))) cls_score_sens = self.confidence_sens(self.RCNN_top_sens(pooled_feat_t.view(pooled_feat_t.size(0), -1))) cls_prob_ref = F.softmax(cls_score_ref, 1) cls_prob_sens = F.softmax(cls_score_sens, 1) confidence_ref = torch.abs(cls_prob_ref[:,1]-cls_prob_ref[:,0]) confidence_sens = torch.abs(cls_prob_sens[:,1]-cls_prob_sens[:,0]) confidence_ref = confidence_ref.unsqueeze(1).unsqueeze(2).unsqueeze(3) confidence_sens = confidence_sens.unsqueeze(1).unsqueeze(2).unsqueeze(3) pooled_feat_c = confidence_ref * pooled_feat_c pooled_feat_t = confidence_sens * pooled_feat_t pooled_feat = pooled_feat_c + pooled_feat_t # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_cls_ref = 0 RCNN_loss_cls_sens = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) RCNN_loss_cls_ref = F.cross_entropy(cls_score_ref, rois_label) RCNN_loss_cls_sens = F.cross_entropy(cls_score_sens, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) RCNN_loss_bbox_align = _smooth_l1_loss(bbox_align_pred, rois_align_target[:,:2], rois_inside_ws[:,:2], rois_outside_ws[:,:2]) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, rois_sens, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_cls_ref, RCNN_loss_cls_sens, RCNN_loss_bbox, RCNN_loss_bbox_align, rois_label
if args.class_agnostic: # 不用标注具体类别 只识别物体 # 乘上标准差再加上方差 得到较准确的bbox box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: # 不仅要识别物体 还要标注类别 # 乘上标准差再加上方差 得到较准确的bbox box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() # *4的原因是bbox有4个回归系数 # 下面的一步可能是对于一个bbox 对每个class都计算4个回归系数 取score最大的或者用mask过滤 # 可能组成一个高度为bbox数量 宽度为4*class数量的矩阵 box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) # ?? pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) # 根据图片的尺寸信息(im_info)裁剪图片外的box else: # 没有训练回归器的情形 # Simply repeat the boxes, once for each class # tile()是沿某个维度复制数组的元素 这里是对于每个类都复制一次 ?? pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2] # 把shape中为1的维度去掉 scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) # opencv读入图片
def __call__(self, ori_img): thresh = 0.5 allbox = [] assert isinstance(ori_img, np.ndarray), "input must be a numpy array!" if len(ori_img.shape) == 2: ori_img = ori_img[:, :, np.newaxis] ori_img = np.concatenate((ori_img, ori_img, ori_img), axis=2) blobs, im_scales = _get_image_blob(ori_img) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if self.device == "cuda": im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() # infer rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = self.net(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if self.device == "cuda": box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_class)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() im2show = np.copy(ori_img) for j in xrange(1, len(self.pascal_class)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] im2show = vis_detections(im2show, self.pascal_class[j], cls_dets.cpu().numpy(), 0.5) return im2show, pred_boxes, scores, cls_dets.cpu().numpy()
def eval_result(args, logger, epoch, output_dir): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) args.batch_size = 1 imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False) imdb.competition_mode(on=True) load_name = os.path.join(output_dir, 'thundernet_epoch_{}.pth'.format(epoch, )) layer = int(args.net.split("_")[1]) _RCNN = snet(imdb.classes, layer, pretrained_path=None, class_agnostic=args.class_agnostic) _RCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage ) # Load all tensors onto the CPU _RCNN.load_state_dict(checkpoint['model']) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # hm = torch.FloatTensor(1) # reg_mask = torch.LongTensor(1) # wh = torch.FloatTensor(1) # offset = torch.FloatTensor(1) # ind = torch.LongTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # hm = hm.cuda() # reg_mask = reg_mask.cuda() # wh = wh.cuda() # offset = offset.cuda() # ind = ind.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) # hm = Variable(hm) # reg_mask = Variable(reg_mask) # wh = Variable(wh) # offset = Variable(offset) # ind = Variable(ind) if args.cuda: cfg.CUDA = True if args.cuda: _RCNN.cuda() start = time.time() max_per_image = 100 vis = True if vis: thresh = 0.5 else: thresh = 0.5 save_name = args.net num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(args.dataset, save_name) # dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ # imdb.num_classes, training=False, normalize=False) # dataset = roibatchLoader(roidb, imdb.num_classes, training=False) dataset = Detection(roidb, num_classes=imdb.num_classes, transform=BaseTransform(cfg.TEST.SIZE, cfg.PIXEL_MEANS), training=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') _RCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) # hm.resize_(data[4].size()).copy_(data[4]) # reg_mask.resize_(data[5].size()).copy_(data[5]) # wh.resize_(data[6].size()).copy_(data[6]) # offset.resize_(data[7].size()).copy_(data[7]) # ind.resize_(data[8].size()).copy_(data[8]) det_tic = time.time() with torch.no_grad(): time_measure, \ rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = _RCNN(im_data, im_info, gt_boxes, num_boxes, # hm,reg_mask,wh,offset,ind ) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(args.batch_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(args.batch_size, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # pred_boxes /= data[1][0][2].item() pred_boxes[:, :, 0::2] /= data[1][0][2].item() pred_boxes[:, :, 1::2] /= data[1][0][3].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] #keep = gpu_nms(cls_dets.cpu().numpy(), cfg.TEST.NMS) #keep = torch.from_numpy(np.array(keep)) cls_dets_np = cls_dets.cpu().numpy() keep = cpu_soft_nms(cls_dets_np, sigma=0.7, Nt=0.5, threshold=0.4, method=0) cls_dets_np = cls_dets_np[keep] #cls_dets = cls_dets[keep.view(-1).long()] if vis: vis_detections(im2show, imdb.classes[j], color_list[j - 1].tolist(), cls_dets_np, 0.6) all_boxes[j][i] = cls_dets_np else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write( 'im_detect: {:d}/{:d} Detect: {:.3f}s (RPN: {:.3f}s, Pre-RoI: {:.3f}s, RoI: {:.3f}s, Subnet: {:.3f}s) NMS: {:.3f}s\n' \ .format(i + 1, num_images, detect_time, time_measure[0], time_measure[1], time_measure[2], time_measure[3], nms_time)) sys.stdout.flush() if vis and i % 200 == 0 and args.use_tfboard: im2show = im2show[:, :, ::-1] logger.add_image('pred_image_{}'.format(i), trans.ToTensor()(Image.fromarray( im2show.astype('uint8'))), global_step=i) # cv2.imwrite('result.png', im2show) # pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') ap_50 = imdb.evaluate_detections(all_boxes, output_dir) logger.add_scalar("map_50", ap_50, global_step=epoch) end = time.time() print("test time: %0.4fs" % (end - start))
def test_model_while_training(fasterRCNN, args): # args = parse_args() # args = set_dataset_args(args, test=True) # np.random.seed(cfg.RNG_SEED) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.TRAIN.USE_FLIPPED = False # args.imdbval_name = 'clipart_test' imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name_target, False) # breakpoint() imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True # if args.cuda: # fasterRCNN.cuda() start = time.time() max_per_image = 100 thresh = 0.0 save_name = args.load_name.split('/')[-1] num_images = len(imdb.image_index) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize = False, path_return=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) #print(data[0].size()) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] # d_pred = d_pred.data path = data[4] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] # misc_toc = time.time() sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s \r' \ .format(i + 1, num_images, detect_time)) sys.stdout.flush() imdb.evaluate_detections(all_boxes, output_dir)
def test(args, model=None): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # Load dataset imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb( args.imdbval_name, False) imdb_vu.competition_mode(on=True) dataset_vu = roibatchLoader(roidb_vu, ratio_list_vu, ratio_index_vu, query_vu, 1, imdb_vu._classes, training=False) # initilize the network here. if not model: if args.net == 'vgg16': fasterRCNN = vgg16(imdb_vu.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb_vu.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb_vu.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb_vu.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") fasterRCNN.create_architecture() # Load checkpoint print("load checkpoint %s" % (args.weights)) checkpoint = torch.load(args.weights) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') else: # evaluate constructed model fasterRCNN = model # initialize the tensor holder here. im_data = torch.FloatTensor(1) query = torch.FloatTensor(1) im_info = torch.FloatTensor(1) catgory = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: cfg.CUDA = True fasterRCNN.cuda() im_data = im_data.cuda() query = query.cuda() im_info = im_info.cuda() catgory = catgory.cuda() gt_boxes = gt_boxes.cuda() # record time start = time.time() # visiualization vis = args.vis if hasattr(args, 'vis') else None if vis: thresh = 0.05 else: thresh = 0.0 max_per_image = 100 fasterRCNN.eval() dataset_vu.query_position = 0 test_scales = cfg.TEST.SCALES multiscale_iterators = [] for i_scale, test_scale in enumerate(test_scales): cur_dataloader_vu = torch.utils.data.DataLoader(dataset_vu, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) cur_data_iter_vu = iter(cur_dataloader_vu) multiscale_iterators.append(cur_data_iter_vu) # total quantity of testing images, each images include multiple detect class num_images_vu = len(imdb_vu.image_index) num_detect = len(ratio_index_vu[0]) all_boxes = [[[] for _ in range(num_images_vu)] for _ in range(imdb_vu.num_classes)] _t = {'im_detect': time.time(), 'misc': time.time()} for i, index in enumerate(ratio_index_vu[0]): det_tic = time.time() multiscale_boxes = [] multiscale_scores = [] for i_scale, (data_iter_vu, test_scale) in enumerate( zip(multiscale_iterators, test_scales)): # need to rewrite cfg.TRAIN.SCALES - very hacky! BACKUP_TRAIN_SCALES = cfg.TRAIN.SCALES cfg.TRAIN.SCALES = [test_scale] data = next(data_iter_vu) cfg.TRAIN.SCALES = BACKUP_TRAIN_SCALES with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) query.resize_(data[1].size()).copy_(data[1]) im_info.resize_(data[2].size()).copy_(data[2]) gt_boxes.resize_(data[3].size()).copy_(data[3]) catgory.data.resize_(data[4].size()).copy_(data[4]) # Run Testing if not hasattr(args, "class_image_augmentation" ) or not args.class_image_augmentation: queries = [query] elif args.class_image_augmentation.lower() == "rotation90": queries = [query] for _ in range(3): queries.append(queries[-1].rot90(1, [2, 3])) else: raise RuntimeError( "Unknown class_image_augmentation: {}".format( args.class_image_augmentation)) for q in queries: rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, _, RCNN_loss_bbox, \ rois_label, weight = fasterRCNN(im_data, q, im_info, gt_boxes, catgory) scores = cls_prob.data boxes = rois.data[:, :, 1:5] # Apply bounding-box regression if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view( 1, -1, 4 * len(imdb_vu.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # Resize to original ratio pred_boxes /= data[2][0][2].item() # Remove batch_size dimension scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() multiscale_scores.append(scores) multiscale_boxes.append(pred_boxes) scores = torch.cat(multiscale_scores, dim=0) pred_boxes = torch.cat(multiscale_boxes, dim=0) # Record time det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() # Post processing inds = torch.nonzero(scores > thresh).view(-1) if inds.numel() > 0: # remove useless indices cls_scores = scores[inds] cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # rearrange order _, order = torch.sort(cls_scores, 0, True) cls_dets = cls_dets[order] # NMS keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] all_boxes[catgory][index] = cls_dets.cpu().numpy() # Limit to max_per_image detections *over all classes* if max_per_image > 0: try: image_scores = all_boxes[catgory][index][:, -1] if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] keep = np.where( all_boxes[catgory][index][:, -1] >= image_thresh)[0] all_boxes[catgory][index] = all_boxes[catgory][index][ keep, :] except: pass misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_detect, detect_time, nms_time)) sys.stdout.flush() # save test image if vis and i % 1 == 0: im2show = cv2.imread( dataset_vu._roidb[dataset_vu.ratio_index[i]]['image']) im2show = vis_detections(im2show, 'shot', cls_dets.cpu().numpy(), 0.3) o_query = data[1][0].permute(1, 2, 0).contiguous().cpu().numpy() o_query *= [0.229, 0.224, 0.225] o_query += [0.485, 0.456, 0.406] o_query *= 255 o_query = o_query[:, :, ::-1] (h, w, c) = im2show.shape o_query = cv2.resize(o_query, (h, h), interpolation=cv2.INTER_LINEAR) im2show = np.concatenate((im2show, o_query), axis=1) vis_path = "./test_img" if not os.path.isdir(vis_path): os.makedirs(vis_path) cv2.imwrite(os.path.join(vis_path, "%d_d.png" % (i)), im2show) print('Evaluating detections') mAP = imdb_vu.evaluate_detections(all_boxes, None) end = time.time() print("test time: %0.4fs" % (end - start)) return mAP
if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes):
def eval_test(fasterRCNN, args, cfg, imdb, dataloader, output_dir): # initialize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) start = time.time() max_per_image = 100 vis = args.vis if vis: thresh = 0.05 else: thresh = 0.0 save_name = "faster_rcnn_10" num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] data_iter = iter(dataloader) _t = {"im_detect": time.time(), "misc": time.time()} det_file = os.path.join(output_dir, "detections.pkl") fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) # im_data.data.resize_(data[0].size()).copy_(data[0]) # im_info.data.resize_(data[1].size()).copy_(data[1]) # gt_boxes.data.resize_(data[2].size()).copy_(data[2]) # num_boxes.data.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, RCNN_loss_cls, RCNN_loss_bbox, rois_label = fasterRCNN( im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = (box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()) box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = (box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()) box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write("im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r".format( i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite("result.png", im2show) pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) with open(det_file, "wb") as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print("Evaluating detections") imdb.evaluate_detections(all_boxes, output_dir) end = time.time() print("test time: %0.4fs" % (end - start)) if "coco" in args.dataset: return imdb.coco_eval
def rcnn_im_detect(net, im, boxes, feat_list=()): """Detect object classes in an image given object proposals. Arguments: net (caffe.Net): Fast R-CNN network to use im (ndarray): color image to test (in BGR order) boxes (ndarray): R x 4 array of object proposals or None (for RPN) feat_list: a list that contains feature names you need. (SUPPORT: conv1-conv5, fc, and logit) Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes attr_scores (ndarray): R x M array of attribute class scores """ feat_dict = { "conv1": "conv1", "conv2": "res2c", "conv3": "res3b3", "conv4": "res4b22", "conv5": "res5c", "fc": "pool5_flat", "logit": "cls_score" } blobs, im_scales = _get_blobs(im, boxes) # Purpose: save computation resource for duplicated ROIs. if cfg.DEDUP_BOXES > 0: v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) blobs['rois'] = blobs['rois'][index, :] boxes = boxes[index, :] im_blob = blobs['data'] blobs['im_info'] = np.array( [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]], dtype=np.float32) # reshape network inputs net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['rois'].reshape(*(blobs['rois'].shape)) if 'im_info' in net.blobs: net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) # do forward forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)} forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False) if 'im_info' in net.blobs: forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32, copy=False) blobs_out = net.forward(**forward_kwargs) feats = [] if len(feat_list) > 0: for f in feat_list: feats.append(net.blobs[feat_dict[f]]) # use softmax estimated probabilities scores = blobs_out['cls_prob'] if cfg.TEST.COMMON.BBOX_REG: # Apply bounding-box regression deltas box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, im.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) if cfg.DEDUP_BOXES > 0: # Map scores and predictions back to the original set of boxes scores = scores[inv_index, :] pred_boxes = pred_boxes[inv_index, :] if 'attr_prob' in net.blobs: attr_scores = blobs_out['attr_prob'] else: attr_scores = None if 'rel_prob' in net.blobs: rel_scores = blobs_out['rel_prob'] else: rel_scores = None return scores, pred_boxes, attr_scores, rel_scores, feats
def eval_frcnn(frcnn_extra, device, fasterRCNN, is_break=False): _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(frcnn_extra.output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) data_iter_test = iter(frcnn_extra.dataloader_test) for i in range(frcnn_extra.num_images_test): data_test = next(data_iter_test) im_data = data_test[0].to(device) im_info = data_test[1].to(device) gt_boxes = data_test[2].to(device) num_boxes = data_test[3].to(device) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if frcnn_extra.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(frcnn_extra.imdb_test.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data_test[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() for j in range(1, frcnn_extra.imdb_test.num_classes): inds = torch.nonzero(scores[:, j] > frcnn_extra.thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if frcnn_extra.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] frcnn_extra.all_boxes[j][i] = cls_dets.cpu().numpy() else: frcnn_extra.all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if frcnn_extra.max_per_image > 0: image_scores = np.hstack([frcnn_extra.all_boxes[j][i][:, -1] for j in range(1, frcnn_extra.imdb_test.num_classes)]) if len(image_scores) > frcnn_extra.max_per_image: image_thresh = np.sort(image_scores)[-frcnn_extra.max_per_image] for j in range(1, frcnn_extra.imdb_test.num_classes): keep = np.where(frcnn_extra.all_boxes[j][i][:, -1] >= image_thresh)[0] frcnn_extra.all_boxes[j][i] = frcnn_extra.all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic if is_break: break ap = frcnn_extra.imdb_test.evaluate_detections(frcnn_extra.all_boxes, frcnn_extra.output_dir) return ap
def get_detections_from_im(fasterRCNN, classes, im_file, args, conf_thresh=0.2): """obtain the image_info for each image, im_file: the path of the image return: dict of {'image_id', 'image_h', 'image_w', 'num_boxes', 'boxes', 'features'} boxes: the coordinate of each box """ # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() #load images # im = cv2.imread(im_file) im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] vis = True blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() # the region features[box_num * 2048] are required. rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label, pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, pool_feat = True) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() max_conf = torch.zeros((pred_boxes.shape[0])) if args.cuda > 0: max_conf = max_conf.cuda() if vis: im2show = np.copy(im) for j in xrange(1, len(classes)): inds = torch.nonzero(scores[:, j] > conf_thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] index = inds[order[keep]] max_conf[index] = torch.where(scores[index, j] > max_conf[index], scores[index, j], max_conf[index]) if vis: im2show = vis_detections(im2show, classes[j], cls_dets.cpu().numpy(), 0.5) if args.cuda > 0: keep_boxes = torch.where(max_conf >= conf_thresh, max_conf, torch.tensor(0.0).cuda()) else: keep_boxes = torch.where(max_conf >= conf_thresh, max_conf, torch.tensor(0.0)) keep_boxes = torch.squeeze(torch.nonzero(keep_boxes), dim=-1) if len(keep_boxes) < MIN_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] objects = torch.argmax(scores[keep_boxes][:, 1:], dim=1) box_dets = np.zeros((len(keep_boxes), 4)) boxes = pred_boxes[keep_boxes] name_list = [] box_caption_feature = np.zeros((len(keep_boxes), 300)) box_caption_mask = np.ones(len(keep_boxes)) for i in range(len(keep_boxes)): kind = objects[i] + 1 bbox = boxes[i, kind * 4:(kind + 1) * 4] tmp_dets = np.array(bbox.cpu()) if (tmp_dets[2] - tmp_dets[0]) * (tmp_dets[3] - tmp_dets[1]) <= 10: box_caption_mask[i] = 0 class_name = classes[1:][objects[i]] box_dets[i] = tmp_dets name_list.append(class_name) doc = nlp1(class_name) token_vector = nlp2(doc[0].text).vector box_caption_feature[i, :] = token_vector return { 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), #'boxes': box_dets, # region shape 4 * 36, 4 is the xy positions #'features': (pooled_feat[keep_boxes].cpu()).detach().numpy(), 'text': name_list, #'text_feature': box_caption_feature, # 'text_mask': box_caption_mask }
def inference(input_np, fasterRCNN, own_data_classes, cfg_file='cfgs/vgg16.yml', cuda=True, cfg_list=None): cfg_from_file(cfg_file) if not (cfg_list is None): cfg_from_list(cfg_list) cfg.USE_GPU_NMS = cuda np.random.seed(cfg.RNG_SEED) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) if cuda: cfg.CUDA = True else: cfg.CUDA = False fasterRCNN.eval() # Load the demo image im_in = input_np # im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:,:,np.newaxis] im_in = np.concatenate((im_in,im_in,im_in), axis=2) # rgb -> bgr im = im_in[:,:,::-1] blobs, im_scales = _get_image_blob(im, cfg) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: if cuda: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(own_data_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() boxes_output = np.empty(shape=[0, 4], dtype=np.uint16) gt_classes_output = [] ishards_output = np.empty(shape=[0], dtype=np.int32) thresh = 0.05 #循环分析每个图片 for j in xrange(1, len(own_data_classes)): inds = torch.nonzero(scores[:,j]>thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:,j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] for i_box in range(cls_dets.shape[0]): if cls_dets[i_box,4]>thresh and filter_bndbox(cls_dets[i_box,:4], ratio=0.2): boxes_output = np.append(boxes_output, np.expand_dims(cls_dets[i_box,:4], axis=0), axis=0).astype(np.uint16) # ishard is 0 as default. ishards_output = np.append(ishards_output, [0], axis=0) gt_classes_output.append(own_data_classes[j]) objs_info = {'boxes': boxes_output, 'classes_name': gt_classes_output, 'gt_ishard': ishards_output} return objs_info
box_delta_right = box_delta_right.view(1, -1, 4 * len(kitti_classes)) dim_orien = dim_orien.view(1, -1, 5 * len(kitti_classes)) kpts_delta = kpts_delta.view(1, -1, 1) left_delta = left_delta.view(1, -1, 1) right_delta = right_delta.view(1, -1, 1) max_prob = max_prob.view(1, -1, 1) pred_boxes_left = bbox_transform_inv(boxes_left, box_delta_left, 1) pred_boxes_right = bbox_transform_inv(boxes_right, box_delta_right, 1) pred_kpts, kpts_type = kpts_transform_inv(boxes_left, kpts_delta, cfg.KPTS_GRID) pred_left = border_transform_inv(boxes_left, left_delta, cfg.KPTS_GRID) pred_right = border_transform_inv(boxes_left, right_delta, cfg.KPTS_GRID) pred_boxes_left = clip_boxes(pred_boxes_left, im_info.data, 1) pred_boxes_right = clip_boxes(pred_boxes_right, im_info.data, 1) pred_boxes_left /= im_info[0, 2].data pred_boxes_right /= im_info[0, 2].data pred_kpts /= im_info[0, 2].data pred_left /= im_info[0, 2].data pred_right /= im_info[0, 2].data scores = scores.squeeze() pred_boxes_left = pred_boxes_left.squeeze() pred_boxes_right = pred_boxes_right.squeeze() pred_kpts = torch.cat( (pred_kpts, kpts_type, max_prob, pred_left, pred_right), 2) pred_kpts = pred_kpts.squeeze()
def main(finput, foutput, fmodel, fclass): """ Predict images from `fileinput` using `model` and saves predictions in `output`. """ if not foutput: foutput = join(dirname(finput), 'predictions.csv') fout = open(foutput, 'w') fout.write('Frame;xmin;ymin;xmax;ymax;id_class;score\n') check_files([finput, fmodel, fclass]) pascal_classes = load_classes(fclass) dic_classes = load_classes(fclass, dic=True, inverse=True) load_name = fmodel # initilize the network here. #if args.net == 'vgg16': #fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=args.class_agnostic) fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=False) fasterRCNN.create_architecture() logger.info("Load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] logger.info('load model successfully!') # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) fasterRCNN.cuda() fasterRCNN.eval() max_per_image = 100 thresh = 0.05 imglist = load_image_paths(finput) num_images = len(imglist) logger.info('Loaded Photo: {} images.'.format(num_images)) pb = pbar.ProgressBar(num_images) for im_file in imglist: im_in = np.array(imread(im_file)) # rgb -> bgr im = im_in[:,:,::-1] im_blob, im_scales = _get_image_blob(im) im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] # Apply bounding-box regression deltas box_deltas = bbox_pred.data # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() im2show = np.copy(im) coordinates = [] for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:,j]>thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:,j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] #im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) class_name = pascal_classes[j] write_detections(fout, basename(im_file)[:-4], dic_classes[class_name], cls_dets.cpu().numpy(), 0.5) #result_path = os.path.join('/home/roger/', basename(im_file)[:-4] + "_det.jpg") #logger.info('Saved file: {}'.format(result_path)) #cv2.imwrite(result_path, im2show) pb.update() fout.close()
def evaluator(model, args, evl_rec=False): fasterRCNN = model np.random.seed(cfg.RNG_SEED) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True if args.cuda: fasterRCNN.cuda() start = time.time() max_per_image = 100 vis = False if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) # These models are pytorch pretrained with RGB channel rgb = True if args.net in ('res18', 'res34', 'inception') else False dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize = False, rgb=rgb) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) if evl_rec: true_postive, ground_truth = 0.0, 0.0 recall = AverageMeter() for i in range(num_images): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() if evl_rec: # evluate rpn recall only boxes_per_img = boxes.squeeze().cpu().numpy() / data[1][0][2].item( ) #pdb.set_trace() #TP, GT = evaluate_final_recall(pred_boxes.squeeze().cpu().numpy(), i, imdb, thr=0.5) TP, GT = evaluate_recall(boxes_per_img, i, imdb, thr=0.5) recall.update(TP, GT) sys.stdout.write('TP/GT: {}/{} | Recall: {:.3f} \r'.format( TP, GT, recall.avg)) sys.stdout.flush() continue scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('result.png', im2show) pdb.set_trace() #cv2.imshow('test', im2show) #cv2.waitKey(0) if evl_rec: print('\r\nThe average rpn recall is: {:.4f}'.format(recall.avg)) return recall.avg with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') mAP = imdb.evaluate_detections(all_boxes, output_dir) end = time.time() print("test time: %0.4fs" % (end - start)) return mAP
def stomata_count(fasterRCNN, image, cuda, pascal_classes): if cuda: cfg.USE_GPU_NMS = True im_in = image if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) blobs, im_scales = _get_image_blob(im_in) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) # initilize the tensor holder here. im_data = torch.FloatTensor() im_info = torch.FloatTensor() num_boxes = torch.LongTensor() gt_boxes = torch.FloatTensor() # ship to cuda if cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] class_agnostic = False if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if class_agnostic: if cuda: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if cuda: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() num_stomata = 0 label_stomata = np.copy(image) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > int(0.5)).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] dets = cls_dets.cpu().numpy() label_stomata, num_stomata = vis_detections(label_stomata, pascal_classes[j], dets, 0.9) return num_stomata, label_stomata
box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1]))) pred_boxes = _.cuda() if args.cuda > 0 else _ pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im2show = np.copy(im) for j in xrange(1, len(pascal_classes)):
def eval_one_dataloader(save_dir_test_out, dataloader_t, fasterRCNN, device, imdb, target_num=0, class_agnostic=False, thresh=0.0, max_per_image=100, return_ap_class=False): save_name = save_dir_test_out + '_test_in_' num_images = len(imdb.image_index) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) data_iter = iter(dataloader_t) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() #fasterRCNN.training = False empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) im_data = data[0].to(device) im_info = data[1].to(device) gt_boxes = data[2].to(device) num_boxes = data[3].to(device) with torch.no_grad(): if isinstance(fasterRCNN, frcnn_htcn) or isinstance(fasterRCNN, frcnn_htcn_m): det_tic = time.time() rois , cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label, _, _, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, target_num=target_num) elif isinstance(fasterRCNN, frcnn_saito): det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) else: det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] # d_pred = d_pred.data # path = data[4] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() # [1, 300, 2] -> [300, 2] pred_boxes = pred_boxes.squeeze() # [1, 300, 8] -> [300, 8] det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh, as_tuple=False).view(-1) # [300] # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] # [300] _, order = torch.sort(cls_scores, 0, True) if class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] # [300, 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # [300, 5] # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) # [N, 1] cls_dets = cls_dets[keep.view(-1).long()] # [N, 5] all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) # [M,] if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic # sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ # .format(i + 1, num_images, detect_time, nms_time)) # sys.stdout.flush() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') map, ap_per_class = imdb.evaluate_detections(all_boxes, output_dir) #fasterRCNN.training = del scores del boxes del all_boxes del pred_boxes del rois del cls_prob del bbox_pred del rpn_loss_cls del rpn_loss_box del RCNN_loss_cls del RCNN_loss_bbox del rois_label if return_ap_class: return map, ap_per_class return map