# if there is det if inds.numel() > 0: cls_scores = scores[:,j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join(args.image_dir, imglist[num_images][:-4] + "_det.jpg") cv2.imwrite(result_path, im2show) else:
# if there is det if inds.numel() > 0: # numel(): 返回张量中所有元素的个数 cls_scores = scores[:, j][inds] # 取出这些高于阈值的类的得分 _, order = torch.sort(cls_scores, 0, True) # 把得分排序 if args.class_agnostic: # 要在测试图片上标注每一个类 cls_boxes = pred_boxes[inds, :] # 在预测的boxes中取出所有得分高于阈值的box的位置信息 else: # 只识别不标注 cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] # ?? cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # 把box和对应的得分组合起来 # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # 按照order排序 keep = nms(cls_dets, cfg.TEST.NMS) # 非极大值抑制 cls_dets = cls_dets[keep.view(-1).long()] # 经过NMS筛选 if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) # ?? all_boxes[j][i] = cls_dets.cpu().numpy() else: # 没有得分高于阈值的 all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* # 可能是每张图片最多识别出的box的数量 # (下面没有细看) if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: # 如果超过了 image_thresh = np.sort(image_scores)[-max_per_image] # 就重新定一个阈值 for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :]
def eval_result(args, logger, epoch, output_dir): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) args.batch_size = 1 imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False) imdb.competition_mode(on=True) load_name = os.path.join(output_dir, 'thundernet_epoch_{}.pth'.format(epoch, )) layer = int(args.net.split("_")[1]) _RCNN = snet(imdb.classes, layer, pretrained_path=None, class_agnostic=args.class_agnostic) _RCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage ) # Load all tensors onto the CPU _RCNN.load_state_dict(checkpoint['model']) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # hm = torch.FloatTensor(1) # reg_mask = torch.LongTensor(1) # wh = torch.FloatTensor(1) # offset = torch.FloatTensor(1) # ind = torch.LongTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # hm = hm.cuda() # reg_mask = reg_mask.cuda() # wh = wh.cuda() # offset = offset.cuda() # ind = ind.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) # hm = Variable(hm) # reg_mask = Variable(reg_mask) # wh = Variable(wh) # offset = Variable(offset) # ind = Variable(ind) if args.cuda: cfg.CUDA = True if args.cuda: _RCNN.cuda() start = time.time() max_per_image = 100 vis = True if vis: thresh = 0.5 else: thresh = 0.5 save_name = args.net num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(args.dataset, save_name) # dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ # imdb.num_classes, training=False, normalize=False) # dataset = roibatchLoader(roidb, imdb.num_classes, training=False) dataset = Detection(roidb, num_classes=imdb.num_classes, transform=BaseTransform(cfg.TEST.SIZE, cfg.PIXEL_MEANS), training=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') _RCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) # hm.resize_(data[4].size()).copy_(data[4]) # reg_mask.resize_(data[5].size()).copy_(data[5]) # wh.resize_(data[6].size()).copy_(data[6]) # offset.resize_(data[7].size()).copy_(data[7]) # ind.resize_(data[8].size()).copy_(data[8]) det_tic = time.time() with torch.no_grad(): time_measure, \ rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = _RCNN(im_data, im_info, gt_boxes, num_boxes, # hm,reg_mask,wh,offset,ind ) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(args.batch_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(args.batch_size, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # pred_boxes /= data[1][0][2].item() pred_boxes[:, :, 0::2] /= data[1][0][2].item() pred_boxes[:, :, 1::2] /= data[1][0][3].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] #keep = gpu_nms(cls_dets.cpu().numpy(), cfg.TEST.NMS) #keep = torch.from_numpy(np.array(keep)) cls_dets_np = cls_dets.cpu().numpy() keep = cpu_soft_nms(cls_dets_np, sigma=0.7, Nt=0.5, threshold=0.4, method=0) cls_dets_np = cls_dets_np[keep] #cls_dets = cls_dets[keep.view(-1).long()] if vis: vis_detections(im2show, imdb.classes[j], color_list[j - 1].tolist(), cls_dets_np, 0.6) all_boxes[j][i] = cls_dets_np else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write( 'im_detect: {:d}/{:d} Detect: {:.3f}s (RPN: {:.3f}s, Pre-RoI: {:.3f}s, RoI: {:.3f}s, Subnet: {:.3f}s) NMS: {:.3f}s\n' \ .format(i + 1, num_images, detect_time, time_measure[0], time_measure[1], time_measure[2], time_measure[3], nms_time)) sys.stdout.flush() if vis and i % 200 == 0 and args.use_tfboard: im2show = im2show[:, :, ::-1] logger.add_image('pred_image_{}'.format(i), trans.ToTensor()(Image.fromarray( im2show.astype('uint8'))), global_step=i) # cv2.imwrite('result.png', im2show) # pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') ap_50 = imdb.evaluate_detections(all_boxes, output_dir) logger.add_scalar("map_50", ap_50, global_step=epoch) end = time.time() print("test time: %0.4fs" % (end - start))
box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) # re-scale boxes to the origin img scale pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() thresh = 0.05 inds = torch.nonzero(scores[:, 1] > thresh).view(-1) cls_scores = scores[:, 1][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] end_time = time.time() im2show = vis_detections(im2show, ' ', cls_dets.cpu().numpy(), 0.85) output_path = os.path.join(CWD, 'output/visualization', 'tmp.jpg') cv2.imwrite(output_path, im2show[:, :, ::-1]) # print(cls_dets.size()) print(cls_dets) print(end_time - start_time)
cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] # if args.soft_nms: # np_dets = cls_dets.cpu().numpy().astype(np.float32) # keep = soft_nms(np_dets, method=cfg.TEST.SOFT_NMS_METHOD) # np_dets will be changed # keep = torch.from_numpy(keep).type_as(cls_dets).int() # cls_dets = torch.from_numpy(np_dets).type_as(cls_dets) # else: keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] cls_dets = cls_dets.cpu().numpy() else: cls_dets = np.array([]) if vis: im2show = vis_detections(im2show, classes[j], cls_dets, thresh=0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join(args.result_dir, imglist[i][:-4] + "_det.jpg") cv2.imwrite(result_path, im2show)
inds = np.where((scores[:, j] > thresh))[0] # if there is det if len(inds) > 0: cls_scores = scores[inds, j] top_inds = np.argsort(-cls_scores) cls_boxes = pred_boxes[inds, :] cls_dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32, copy=False) cls_dets = cls_dets[top_inds, :] _, keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets, 0.3) all_boxes[j][i] = cls_dets.copy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time()
for ap in aps: print('{:.3f}'.format(ap)) print('~~~~~~~~') print('') def evaluate_detections(self, all_boxes, output_dir): self._write_voc_results_file(all_boxes, output_dir) self._do_python_eval(output_dir) if self.config['cleanup']: for cls in self._classes: if cls == '__background__': continue filename = self._get_DALY_results_file_template(output_dir).format(cls) os.remove(filename) if __name__ == '__main__': d = DALY('DALY_RGB_1_split_1_raw_train') pdb.set_trace() res = d.roidb import cv2 from model.utils.net_utils import vis_detections for i in range(50): ri = np.random.randint(d.num_images) im = cv2.imread(d.image_path_at(ri)) gt_cls = d.classes[res[ri]['gt_classes'][0]] gt_bbox = res[ri]['boxes'] im2show = vis_detections(im, gt_cls, gt_bbox, 0.5) cv2.imwrite(str(i)+'.jpg', im2show) # from IPython import embed; embed()
if args.frame != 'fcgn': keep = nms(cls_dets, cfg.TEST.COMMON.NMS) cls_dets = cls_dets[keep.view(-1).long()] obj_index_end = obj_index_begin + ( cls_dets[:, -1] > cfg.TEST.COMMON.OBJ_DET_THRESHOLD).sum() if args.frame == 'mgn' or args.frame == 'all_in_one': cur_grasp = cur_grasp[keep.view(-1).long()] if vis: if args.frame == 'mgn' or args.frame == 'all_in_one': im2show_obj, im2show_gr = vis_detections( im2show_obj, im2show_gr, imdb.classes[j], cls_dets.cpu().numpy(), cfg.TEST.COMMON.OBJ_DET_THRESHOLD, cur_grasp.cpu().numpy(), color_dict=color_dict, index=range(obj_index_begin, obj_index_end)) else: im2show_obj, im2show_gr = vis_detections( im2show_obj, im2show_gr, imdb.classes[j], cls_dets.cpu().numpy(), cfg.TEST.COMMON.OBJ_DET_THRESHOLD, color_dict=color_dict, index=range(obj_index_begin, obj_index_end))
if args.class_agnostic: cls_boxes=pred_boxes[inds,:] else: # 得到对应类别的框坐标 cls_boxes=pred_boxes[inds][:,i*4:(i+1)*4] # [x1, y1, x2, y2, cls_prob] cls_dets=torch.cat([cls_boxes,cls_scores.unsqueeze(1)],dim=1) cls_dets=cls_dets[order] # 合并重复框 keep_inds=nms(cls_boxes[order,:],cls_scores[order],cfg.TEST.NMS) cls_dets=cls_dets[keep_inds.view(-1).long()] if vis: # max_vis_boxes设置单类别框可视化最多数目,thresh设置可视化置信度阈值 image_show=vis_detections(image_show,pascal_classes[i],cls_dets.cpu().numpy(),max_vis_boxes=10, thresh=0.5) if vis and webcam_num==-1: result_path=os.path.join(args.image_dir,images_list[num_images][:-4]+'_{}'.format(args.net)+'_det.jpg') cv2.imwrite(result_path,image_show) else: im2showRGB = cv2.cvtColor(image_show, cv2.COLOR_BGR2RGB) cv2.imshow("frame", im2showRGB) if cv2.waitKey(1) & 0xFF == ord('q'): break if webcam_num >= 0: cap.release() cv2.destroyAllWindows()
def evaluate(student_net, dataset): imdb, roidb, ratio_list, ratio_index = combined_roidb(dataset, False) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) cfg.CUDA = True start = time.time() max_per_image = 100 num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) class_agnostic = False vis = False thresh = 0.0 # calcolare i bbbox student_net.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() s_RPN_mask, s_RPN_reg, s_RPN_cls, s_RCN_mask, s_RCN_reg, s_RCN_cls = student_net( im_data, im_info, gt_boxes, num_boxes) # Region Proposal Network Classification Z_s = s_RPN_cls[0] fg_bg_label = s_RPN_cls[1] rpn_loss_cls = s_RPN_cls[2] # Region Proposal Network Regression y_reg = s_RPN_reg[0] R_s = s_RPN_reg[1] rpn_loss_box = s_RPN_reg[2] # Region Proposal Network Mask iw = s_RPN_mask[0] ow = s_RPN_mask[1] # Region Classification Network Classification rcn_cls_score = s_RCN_cls[0] cls_prob = s_RCN_cls[1] RCNN_loss_cls = s_RCN_cls[2] # Region Classification Network Regression rois = s_RCN_reg[0] rois_label = s_RCN_reg[1] rois_target = s_RCN_reg[2] bbox_pred = s_RCN_reg[3] RCNN_loss_bbox = s_RCN_reg[4] # Region Classification Network Mask rois_inside_ws = s_RCN_mask[0] rois_outside_ws = s_RCN_mask[1] scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() map = imdb.evaluate_detections_MAP(all_boxes) return map
cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets_with_center = torch.cat( (cls_boxes, cls_centers, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] cls_dets_with_center = cls_dets_with_center[order] keep = torch.arange( cls_dets.shape[0]) # nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] cls_dets_with_center = cls_dets_with_center[keep.view( -1).long()] if vis: im2show = vis_detections( im2show, imdb.classes[j], cls_dets_with_center.cpu().numpy(), gt_boxes.cpu().numpy(), fixed_center.cpu().numpy(), num_boxes.cpu().numpy(), 0.8) all_boxes[j][i] = cls_dets_with_center.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes):
cls_dets_right = cls_dets_right[keep] cls_dim_orien = cls_dim_orien[keep] cls_kpts = cls_kpts[keep] # optional operation, can check the regressed borderline keypoint using 2D box inference infered_kpts = kitti_utils.infer_boundary( im2show_left.shape, cls_dets_left.cpu().numpy()) infered_kpts = torch.from_numpy(infered_kpts).type_as( cls_dets_left) for detect_idx in range(cls_dets_left.size()[0]): if cls_kpts[detect_idx,4] - cls_kpts[detect_idx,3] < \ 0.5*(infered_kpts[detect_idx,1]-infered_kpts[detect_idx,0]): cls_kpts[detect_idx, 3:5] = infered_kpts[detect_idx] im2show_left = vis_detections(im2show_left, imdb._classes[j], \ cls_dets_left.cpu().numpy(), vis_thresh, cls_kpts.cpu().numpy()) im2show_right = vis_detections(im2show_right, imdb._classes[j], \ cls_dets_right.cpu().numpy(), vis_thresh) # read intrinsic f = calib.p2[0, 0] cx, cy = calib.p2[0, 2], calib.p2[1, 2] bl = (calib.p2[0, 3] - calib.p3[0, 3]) / f boxes_all = cls_dets_left.new(0, 5) kpts_all = cls_dets_left.new(0, 5) poses_all = cls_dets_left.new(0, 8) solve_tic = time.time() for detect_idx in range(cls_dets_left.size()[0]): if cls_dets_left[detect_idx, -1] > eval_thresh:
misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() # prepare gt if args.vis: img_with_selected_gtbox = np.copy(im) gt_dets = gt_boxes[0].cpu().numpy() gt_dets /= data[1][0][2].item() gt_dets[:, 4] = 1. gt_class_name = imdb._classes[int(gt_boxes[0, 0, 4])] img_with_selected_gtbox = vis_detections( img_with_selected_gtbox, gt_class_name, gt_dets, 0.8) img_pred = torch.from_numpy( im2show[:, :, ::-1].copy()).permute( 2, 0, 1).unsqueeze(0).float() img_gt = torch.from_numpy( img_with_selected_gtbox[:, :, ::-1].copy()).permute( 2, 0, 1).unsqueeze(0).float() support = support_ims.squeeze()[0].permute(1, 2, 0).cpu().numpy() h, w = img_pred.size(2), img_pred.size(3) support = cv2.resize(support, (w, h), interpolation=cv2.INTER_LINEAR) support = torch.from_numpy(support).permute( 2, 0, 1).unsqueeze(0).float() inv_idx = torch.arange(2, -1, -1)
def __call__(self, *args, **kwargs): net = 'vgg16' checksession = 1 checkepoch = 6 checkpoint = 10021 load_dir = './mydetector/model' cfgs = 'vgg16.vml' set_cfgs = None dataset = 'imagenet' image_dir = 'images' webcam_num = -1 cfg_file = './mydetector/cfgs/vgg16.yml' vis = False cfg.CUDA = True cfg_from_file(cfg_file) if set_cfgs is not None: cfg_from_list(set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(1) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. #加载预训练模型 input_dir = load_dir + "/" + net + "/" + dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(checksession, checkepoch, checkpoint)) pascal_classes = np.asarray([ '__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]) # initilize the network here. if net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=False) elif net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=False) elif net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=False) elif net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=False) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # pdb.set_trace() print("load checkpoint %s" % (load_name)) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) fasterRCNN.cuda() fasterRCNN.eval() start = time.time() max_per_image = 100 thresh = 0.05 vis = True imglist = os.listdir(image_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) while (num_images >= 0): total_tic = time.time() if webcam_num == -1: num_images -= 1 im_file = os.path.join(image_dir, imglist[num_images]) # im = cv2.imread(im_file) im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im2show = np.copy(im) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join( image_dir, imglist[num_images][:-4] + "_det.jpg") cv2.imwrite(result_path, im2show) else: im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB) cv2.imshow("frame", im2showRGB) total_toc = time.time() total_time = total_toc - total_tic frame_rate = 1 / total_time print('Frame rate:', frame_rate) if cv2.waitKey(1) & 0xFF == ord('q'): break
boxes_of_i = np.array( [_[img_index] for _ in all_boxes_save_for_vis]) # filter boxes with lower score # It is 0 for batch size is 1 gt_boxes_cpu = gt_boxes.cpu().numpy()[0] gt_boxes_cpu[:, 0:4] /= float(im_info[0][2].cpu().numpy()) save_vis_root_path = './savevis/{}/{}/{}_{}_{}/'.format( model_name, args.imdbval_name, args.checksession, args.checkepoch, args.checkpoint) # show ground-truth for gt_b in gt_boxes_cpu: im2show = vis_detections( im2show, id2chn[imdb.classes[int(gt_b[-1])]], gt_b[np.newaxis, :], 0.1, (204, 0, 0)) i_row, i_c, _ = im2show.shape im2show = cv2.resize(im2show, (int(i_c / 2), int(i_row / 2))) # 1.gt未检测到 # 2. gt类别错误(TODO) for gt_b in gt_boxes_cpu: gt_cls_idx = int(gt_b[4]) # 1 && 2 if len(boxes_of_i[gt_cls_idx]) == 0: save_vis_path = save_vis_root_path + \ 'FN/' + id2chn[imdb.classes[int(gt_cls_idx)]] if not os.path.exists(save_vis_path): os.makedirs(save_vis_path)
if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if args.vis and i < 100: # save first 100 images. try : keep = nms(torch.FloatTensor(all_boxes[j][i]).cuda(), 0.3) cls_det = torch.FloatTensor(all_boxes[j][i]).cuda() cls_det = cls_det[keep.view(-1).long()] if j==0 and ('2012' not in args.dataset): im2show = vis_gts (im, imdb.image_path_at(i)) im2show = vis_detections(im, imdb.classes[j], all_boxes[j][i][keep.view(-1).long()],0.8) except : error_flag = True pdb.set_trace() # CorLoc index = np.argmax(scores[:, j]) all_boxes_corloc[j][i] = \ np.hstack((boxes[index, j*4:(j+1)*4].reshape(1, -1), np.array([[scores[index, j]]]))) if args.vis and not error_flag and i < 100: path = '{}/images/{:06d}.png'.format(output_dir, i) if i % 100 == 0 : print(path) cv2.imwrite(path, im2show)
cls_dets = cls_dets[keep.view(-1).long()] dets = cls_dets.cpu().numpy() image_number = os.path.splitext( os.path.basename(imglist[num_images]))[0] for i in range(np.minimum(10, dets.shape[0])): score = dets[i, -1] bbox = tuple(int(np.round(x)) for x in dets[i, :4]) th = 0.5 class_name = KAIST_classes[j].strip().replace(" ", "_") if score > th: fo2.write(class_name + ' ' + str(score) + ' ' + str(bbox[0]) + ' ' + str(bbox[1]) + ' ' + str(bbox[2]) + ' ' + str(bbox[3]) + '\n') if vis: im2show = vis_detections(im2show, KAIST_classes[j], cls_dets.cpu().numpy(), 0.5) fo2.close() misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join(output_img_folder, imglist[num_images][:-4] + ".jpg") cv2.imwrite(result_path, im2show)
def raber_detection(im_in): # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if use_gpu: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() thresh = 0.05 vis = False if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() # pdb.set_trace() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1]))) pred_boxes = _.cuda() if args.cuda > 0 else _ pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() if vis: im2show = np.copy(im) for j in range(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) box_results = cls_dets.cpu().numpy() if vis: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join(args.result_dir, "OnlineDet.jpg") cv2.imwrite(result_path, im2show) return box_results
# cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] mat = np.matrix(cls_dets) with open( epoch_dir + '/' + imglist[num_images][:-4] + '_det_.txt', 'wb') as f: for line in mat: np.savetxt(f, line, fmt='%.4f') if vis: im2show = vis_detections( im2show, detection_classes[j], cls_dets.cpu().numpy(), np.array(args.minconfid).astype(np.float)) #im2show = vis_detections(im2show, detection_classes[j], cls_dets.cpu().numpy(), 0.5) #im2show = vis_detections(im2show, detection_classes[j], cls_dets.cpu().numpy(), args.minconfid.astype(np.float)) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show) # cv2.waitKey(0)
cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] #pdb.set_trace() #for locs in range(cls_dets.shape[0]): if vis: #pdb.set_trace() im2show = vis_detections(im2show, labels[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time()
cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] #import pdb #pdb.set_trace() #keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_dets, 0.5, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] #pdb.set_trace() print(cls_dets.shape) if vis: im2show, res = vis_detections(im2show, j, i + 1, cls_dets.cpu().numpy(), 0) final_res += res misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() #import pdb #pdb.set_trace() #result_path = os.path.join('images/results', imglist[i][:-4] + "_det.png") #result_path = os.path.join('/cluster_home/my_data/hongyi.dong/val_results', imglist[i]) #print(result_path) #import pdb
def detect(self, dataset, foldername, filename, ch, vis, bbox_log): image_num = os.path.splitext(filename)[0] output_folder = 'output/' + dataset + "_ch" + str(ch) if not os.path.exists(output_folder): os.mkdir(output_folder) total_tic = time.time() # im = cv2.imread(im_file) im_file = foldername + "/" + filename im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): self.im_data.resize_(im_data_pt.size()).copy_(im_data_pt) self.im_info.resize_(im_info_pt.size()).copy_(im_info_pt) self.gt_boxes.resize_(1, 1, 5).zero_() self.num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, rois_label = self.fasterRCNN( self.im_data, self.im_info, self.gt_boxes, self.num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if self.myargs.class_agnostic: if self.myargs.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if self.myargs.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im2show = np.copy(im) for j in xrange(1, len(self.pascal_classes)): inds = torch.nonzero(scores[:, j] > self.thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if self.myargs.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if bbox_log: bbox_list = cls_dets.cpu().numpy() for bb in bbox_list: start_x = int(bb[0]) start_y = int(bb[1]) end_x = int(bb[2]) end_y = int(bb[3]) confidence = bb[4] if confidence > 0.5: fo.write( str(ch) + "," + image_num + "," + str(start_x) + "," + str(start_y) + "," + str(end_x) + "," + str(end_y) + "," + str(confidence) + "\n" ) if vis: im2show = vis_detections(im2show, self.pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic # sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ # .format(num_images + 1, len(imglist), detect_time, nms_time)) # sys.stdout.flush() if vis: result_path = os.path.join(output_folder, str(image_num) + ".jpg") cv2.imwrite(result_path, im2show)
def run_model(support_im_paths, query_path, cnt_shot, output_path_folder): # support # support_root_dir = 'datasets/supports' # class_dir = 'horse' # n_shot = 2 # im_paths = list(Path(os.path.join(support_root_dir, class_dir)).glob('*.jpg')) CWD = os.getcwd() print(support_im_paths) n_shot = len(support_im_paths) random.seed(0) im_path_list = random.sample(support_im_paths, k=n_shot) im_list = [] #fig = plt.figure(num=None, figsize=(8, 8), dpi=50, facecolor='w', edgecolor='k') for i, im_path in enumerate(im_path_list): im = Image.open(im_path) im_list.append(np.asarray(im)) support_data = support_im_preprocess(im_list, cfg, 320, n_shot) # query im = np.asarray(Image.open(query_path)) im2show = im.copy() query_data, im_info, gt_boxes, num_boxes = query_im_preprocess(im, cfg) # prepare data data = [query_data, im_info, gt_boxes, num_boxes, support_data] im_data, im_info, num_boxes, gt_boxes, support_ims = prepare_variable() with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) support_ims.resize_(data[4].size()).copy_(data[4]) # model cfg_from_list( ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']) model_dir = os.path.join(CWD, 'models') load_path = os.path.join(model_dir, 'faster_rcnn_{}_{}_{}.pth'.format(1, 11, 34467)) model = get_model('multi', load_path, n_shot) start_time = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = model(im_data, im_info, gt_boxes, num_boxes, support_ims, gt_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) # re-scale boxes to the origin img scale pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() thresh = 0.05 inds = torch.nonzero(scores[:, 1] > thresh).view(-1) cls_scores = scores[:, 1][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] for i in range(cls_dets.shape[0]): w = cls_dets[i, 2] - cls_dets[i, 0] h = cls_dets[i, 3] - cls_dets[i, 1] if w > 0.5 * im2show.shape[1] or h > 0.5 * im2show.shape[0]: cls_dets[i, 4] = 0 end_time = time.time() im2show = vis_detections(im2show, ' ', cls_dets.cpu().numpy(), 0.5) output_path = os.path.join(output_path_folder, 'result' + str(cnt_shot) + '.jpg') cv2.imwrite(output_path, im2show[:, :, ::-1]) print(cls_dets) print(end_time - start_time)
def predict1(): data = {"success": False} im_info1 = {} # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() start = time.time() max_per_image = 100 thresh = 0.05 vis = True file_dir = os.path.join(basedir, 'upload/') print('file_dir',file_dir) webcam_num = args.webcam_num # Set up webcam or get image directories if webcam_num >= 0: cap = cv2.VideoCapture(webcam_num) num_images = 0 else: imglist = os.listdir(file_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) while (num_images >= 0): total_tic = time.time() if webcam_num == -1: num_images -= 1 # Get image from the webcam if webcam_num >= 0: if not cap.isOpened(): raise RuntimeError("Webcam could not open. Please check connection.") ret, frame = cap.read() im_in = np.array(frame) # Load the demo image else: im_file = os.path.join(file_dir, imglist[num_images]) print("im_fileeeeeee",im_file) # im = cv2.imread(im_file) im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() jindex = [] info = {} info['predictions'] = list() filename = os.path.split(im_file) print("filename",filename[1]) info['filename'] = filename[1] image1 = Image.open(im_file); print('image1.size', image1.size); info['width'] = image1.size[0] info['height'] = image1.size[1] if vis: im2show = np.copy(im) for j in range(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] print('j', j) cls_dets.cpu().numpy() jindex.append(j) if vis: im2show = vis_detections(im2show, j, cls_dets.cpu().numpy(), 0.5) pred = vis_results(j,cls_dets.cpu().numpy(),0.5) print('pred',pred) if(pred!=[]): info['predictions'].append(pred) # print("cls_dets.cpu().numpy()",cls_dets.cpu().numpy()) # print('cls_dets',cls_dets) # box_re = cls_dets.cpu().numpy() # print('box_re',box_re) # # Loop over the results and add them to the list of returned predictions # info = {} # filename = os.path.split(im_file) # print("filename",filename[1]) # info['filename'] = filename[1] # image1 = Image.open(im_file); # print('image1.size', image1.size); # info['width'] = image1.size[0] # info['height'] = image1.size[1] # info['predictions'] = list() # j = 0 # for box in box_re: # r = {"BoxList": [str(i) for i in np.rint(box[:4]).astype(int)]} # r["BoxList"].append(jindex[j]) # j=j+1 # info['predictions'].append(r) # # Indicate that the request was a success. # s = {} data["success"] = True # s = {im_file: info} im_info1[filename[1]]=info data['im_info'] = im_info1 print(data) new_data = process(data) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join(file_dir, imglist[num_images][:-4] + "_det.jpg") # cv2.imwrite(result_path, im2show) else: im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB) cv2.imshow("frame", im2showRGB) total_toc = time.time() total_time = total_toc - total_tic frame_rate = 1 / total_time print('Frame rate:', frame_rate) if cv2.waitKey(1) & 0xFF == ord('q'): break if webcam_num >= 0: cap.release() cv2.destroyAllWindows() return flask.jsonify(new_data)
for j in xrange(1, len(args.classes)): inds = torch.nonzero(scores[:, j] > args.cls_thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, args.classes[j], cls_dets.cpu().numpy(), args.cls_thresh) misc_toc = time.time() nms_time = misc_toc - misc_tic print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r'.format( i + 1, num_images, detect_time, nms_time)) if vis: if not os.path.exists(os.path.join(args.image_dir, 'det')): os.makedirs(os.path.join(args.image_dir, 'det')) result_path = os.path.join(args.image_dir, 'det', imglist[i][:-4] + "_det.jpg") cv2.imwrite(result_path, im2show)
cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] for i_box in range(cls_dets.shape[0]): if cls_dets[i_box,4]>0.5: boxes_output = np.append(boxes_output, np.expand_dims(cls_dets[i_box,:4], axis=0), axis=0).astype(np.uint16) # ishard is 0 as default. ishards_output = np.append(ishards_output, [0], axis=0) gt_classes_output.append(own_data_classes[j]) if vis: im2show = vis_detections(im2show, own_data_classes[j], cls_dets.cpu().numpy(), 0.5) objs_info = {'boxes': boxes_output, 'gt_classes_name': gt_classes_output, 'gt_ishard': ishards_output} misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if webcam_num == -1: # cv2.imshow('test', im2show)
+ torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * (len(classes) - 1)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) scores = scores.squeeze() pred_boxes /= im_scale pred_boxes = pred_boxes.squeeze() nms_cfg = {'type': 'nms', 'iou_threshold': 0.5} thresh = 0.001 det_bboxes, det_labels = multiclass_nms(pred_boxes, scores, thresh, nms_cfg, 100) keep = det_bboxes[:, 4] > thresh det_bboxes = det_bboxes[keep] det_labels = det_labels[keep] for j in range(0, len(classes) - 1): inds = torch.nonzero(det_labels == j, as_tuple=False).view(-1) # if there is det if inds.numel() > 0: cls_dets = det_bboxes[inds] im2show = vis_detections(im2show, classes[j], cls_dets.cpu().numpy(), color=colors[int(j)]) if not os.path.exists('results/detection'): os.makedirs('results/detection') cv2.imwrite( os.path.join('results/detection', img_name.replace('jpg', 'png')), im2show) print("GradCAM completed")
misc_tic = time.time() if vis: im2show = np.copy(im) for j in range(1, 21): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = pred_boxes[inds, :] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: im2show = vis_detections(im2show, classes[j], cls_dets) misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join(args.image_dir, imglist[i][:-4] + "_det.jpg") cv2.imwrite(result_path, im2show)
cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join(args.image_dir, imglist[num_images][:-4] + "_det.jpg") cv2.imwrite(result_path, im2show)
def detect(self, bbx): with torch.no_grad(): vis = False thresh = 0.05 im_data = torch.FloatTensor(1).to(self.device) im_info = torch.FloatTensor(1).to(self.device) num_boxes = torch.LongTensor(1).to(self.device) gt_boxes = torch.FloatTensor(1).to(self.device) # total_tic = time.time() x, y, w, h = [int(p) for p in bbx] x = max(x, 0) y = max(y, 0) im = self.img[y:(y + h), x:(x + w)] # print ' (x=%d, y=%d), %d * %d, (%d, %d) - cropsize: %d * %d' % (x, y, w, h, x+w, y+h, im.shape[1], im.shape[0]) w, h = im.shape[1], im.shape[0] refine_bbx = [0, 0, w, h] if w * h == 0: print 'What? %d * %d' % (w, h) # raw_input('Continue?') return False blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() # pdb.set_trace() # det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = self.fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).to(self.device) box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1]))) pred_boxes = _.to(self.device) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() # det_toc = time.time() # detect_time = det_toc - det_tic # misc_tic = time.time() if vis: im2show = np.copy(im) j = 15 inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det step = 0 if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) cls_dets = cls_dets[keep.view(-1).long()] dets = cls_dets.cpu().numpy() for i in range(dets.shape[0]): if dets[i, -1] > cf: x1, y1, w1, h1 = dets[i][:4] det = [x1, y1, w1 - x1, h1 - y1] ratio = self.a_train_set.IOU(det, refine_bbx) if ratio[0] > iou: # IOU between prediction and detection should not be limited step += 1 if vis: print cls_dets dets = cls_dets.cpu().numpy() # for i in range(dets.shape[0]): # bbox = tuple(int(np.round(x)) for x in dets[i, :4]) # score = dets[i, -1] # if score > thresh: # crop = im[bbox[1]:bbox[3], bbox[0]:bbox[2]] # cv2.imwrite('in_place/%02d.jpg'%step, crop) # step += 1 im2show = vis_detections(im2show, self.pascal_classes[j], dets) # misc_toc = time.time() # nms_time = misc_toc - misc_tic if vis: cv2.imshow('test', im2show) cv2.waitKey(0) # result_path = os.path.join('results', imglist[num_images][:-4] + "_det.jpg") # cv2.imwrite(result_path, im2show) if step: return True return False
inds = torch.nonzero(scores[:,j]>thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:,j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time()
def run(args): lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY try: xrange # Python 2 except NameError: xrange = range # Python 3 #args = parse_args() print('Called with args:') print(args) if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) np.random.seed(cfg.RNG_SEED) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "coco": args.imdb_name = "coco_2014_train+coco_2014_valminusminival" args.imdbval_name = "coco_2014_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "vg": args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True if args.cuda: fasterRCNN.cuda() start = time.time() max_per_image = 100 vis = args.vis if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] #pdb.set_trace() output_dir = get_output_dir(imdb, save_name) dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize = False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) # cls_dets = cls_dets[order] # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) # cls_dets = cls_dets[keep.view(-1).long()] cls_dets = cls_dets[order] # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) keep = softnms_cpu_torch(cls_dets) # cls_dets = cls_dets[keep.view(-1).long()] cls_dets = keep if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('result.png', im2show) pdb.set_trace() #cv2.imshow('test', im2show) #cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir) end = time.time() print("test time: %0.4fs" % (end - start))