# Test parameters conf_thresh = 0.05 nms_thresh = 0.4 match_thresh = 0.5 iou_thresh = 0.5 im_width = 640 im_height = 480 # Specify which gpus to use torch.manual_seed(seed) if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) # Specifiy the model and the loss model = Darknet(cfgfile) region_loss = model.loss # Model settings # model.load_weights(weightfile) model.load_weights_until_last(weightfile) model.print_network() model.seen = 0 region_loss.iter = model.iter region_loss.seen = model.seen processed_batches = model.seen // batch_size init_width = model.width init_height = model.height init_epoch = model.seen // nsamples # Variable to save
def valid(datacfg, cfgfile, weightfile): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i # Parse data configuration files data_options = read_data_cfg(datacfg) valid_images = data_options['valid'] meshname = data_options['mesh'] name = data_options['name'] im_width = int(data_options['im_width']) im_height = int(data_options['im_height']) fx = float(data_options['fx']) fy = float(data_options['fy']) u0 = float(data_options['u0']) v0 = float(data_options['v0']) # Parse net configuration file net_options = parse_cfg(cfgfile)[0] loss_options = parse_cfg(cfgfile)[-1] conf_thresh = float(net_options['conf_thresh']) num_keypoints = int(net_options['num_keypoints']) num_classes = int(loss_options['classes']) num_anchors = int(loss_options['num']) anchors = [float(anchor) for anchor in loss_options['anchors'].split(',')] # Read object model information, get 3D bounding box corners, get intrinsics mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) diam = float(data_options['diam']) intrinsic_calibration = get_camera_intrinsic(u0, v0, fx, fy) # camera params # Network I/O params num_labels = 2 * num_keypoints + 3 # +2 for width, height, +1 for object class errs_2d = [] # to save with open(valid_images) as fp: # validation file names tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] # Compute-related Parameters use_cuda = True # whether to use cuda or no kwargs = {'num_workers': 4, 'pin_memory': True} # number of workers etc. # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode model = Darknet(cfgfile) model.load_weights(weightfile) model.cuda() model.eval() # Get the dataloader for the test dataset valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.width, model.height), shuffle=False, objclass=name, transform=transforms.Compose([ transforms.ToTensor(), ])) test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=False, **kwargs) # Iterate through test batches (Batch size for test data is 1) logging('Testing {}...'.format(name)) for batch_idx, (data, target) in enumerate(test_loader): t1 = time.time() # Pass data to GPU if use_cuda: data = data.cuda() # target = target.cuda() # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference data = Variable(data, volatile=True) t2 = time.time() # Forward pass output = model(data).data t3 = time.time() # Using confidence threshold, eliminate low-confidence predictions trgt = target[0].view(-1, num_labels) all_boxes = get_multi_region_boxes(output, conf_thresh, num_classes, num_keypoints, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) t4 = time.time() # Iterate through all images in the batch for i in range(output.size(0)): # For each image, get all the predictions boxes = all_boxes[i] # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image) truths = target[i].view(-1, num_labels) # Get how many object are present in the scene num_gts = truths_length(truths) # Iterate through each ground-truth object for k in range(num_gts): box_gt = list() for j in range(1, num_labels): box_gt.append(truths[k][j]) box_gt.extend([1.0, 1.0]) box_gt.append(truths[k][0]) # If the prediction has the highest confidence, choose it as our prediction best_conf_est = -sys.maxsize for j in range(len(boxes)): if (boxes[j][2 * num_keypoints] > best_conf_est) and (boxes[j][2 * num_keypoints + 2] == int(truths[k][0])): best_conf_est = boxes[j][2 * num_keypoints] box_pr = boxes[j] match = corner_confidence( box_gt[:2 * num_keypoints], torch.FloatTensor(boxes[j][:2 * num_keypoints])) # Denormalize the corner predictions corners2D_gt = np.array(np.reshape(box_gt[:2 * num_keypoints], [-1, 2]), dtype='float32') corners2D_pr = np.array(np.reshape(box_pr[:2 * num_keypoints], [-1, 2]), dtype='float32') corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height corners2D_gt_corrected = fix_corner_order( corners2D_gt) # Fix the order of corners # Compute [R|t] by pnp objpoints3D = np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32') K = np.array(intrinsic_calibration, dtype='float32') R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K) R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K) # Compute pixel error Rt_gt = np.concatenate((R_gt, t_gt), axis=1) Rt_pr = np.concatenate((R_pr, t_pr), axis=1) proj_2d_gt = compute_projection(vertices, Rt_gt, intrinsic_calibration) proj_2d_pred = compute_projection(vertices, Rt_pr, intrinsic_calibration) proj_corners_gt = np.transpose( compute_projection(corners3D, Rt_gt, intrinsic_calibration)) proj_corners_pr = np.transpose( compute_projection(corners3D, Rt_pr, intrinsic_calibration)) norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0) pixel_dist = np.mean(norm) errs_2d.append(pixel_dist) t5 = time.time() # Compute 2D projection score eps = 1e-5 for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]: acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / ( len(errs_2d) + eps) # Print test statistics logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc))
def valid(datacfg, cfgfile, weightfile, conf_th): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i # Parse configuration files options = read_data_cfg(datacfg) valid_images = options['valid'] meshname = options['mesh'] name = options['name'] prefix = 'results' # Read object model information, get 3D bounding box corners mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) diam = float(options['diam']) # Read intrinsic camera parameters internal_calibration = get_camera_intrinsic() # Get validation file names with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode model = Darknet(cfgfile) model.load_weights(weightfile) model.cuda() model.eval() test_width = 544 test_height = 544 # Get the parser for the test dataset valid_dataset = dataset_multi.listDataset(valid_images, shape=(test_width, test_height), shuffle=False, objclass=name, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 1 # Specify the number of workers for multiple processing, get the dataloader for the test dataset kwargs = {'num_workers': 4, 'pin_memory': True} test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) # Parameters use_cuda = True num_classes = 2 anchors = [ 1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851 ] num_anchors = 5 eps = 1e-5 conf_thresh = conf_th iou_thresh = 0.5 # Parameters to save errs_2d = [] edges = [[1, 2], [1, 3], [1, 5], [2, 4], [2, 6], [3, 4], [3, 7], [4, 8], [5, 6], [5, 7], [6, 8], [7, 8]] edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]] # Iterate through test batches (Batch size for test data is 1) logging('Testing {}...'.format(name)) for batch_idx, (data, target) in enumerate(test_loader): t1 = time.time() # Pass data to GPU if use_cuda: data = data.cuda() # target = target.cuda() # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference data = Variable(data, volatile=True) t2 = time.time() # Forward pass output = model(data).data t3 = time.time() # Using confidence threshold, eliminate low-confidence predictions trgt = target[0].view(-1, 21) all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) t4 = time.time() # Iterate through all images in the batch for i in range(output.size(0)): # For each image, get all the predictions boxes = all_boxes[i] # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image) truths = target[i].view(-1, 21) if debug_multi: print(type(truth)) # Get how many object are present in the scene num_gts = truths_length(truths) if debug_multi: print('numbers of ground truth: ' + str(num_gts)) # Iterate through each ground-truth object for k in range(num_gts): if debug_multi: print('object class in label is: ' + str(truths[k][0])) box_gt = [ truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0] ] best_conf_est = -1 # If the prediction has the highest confidence, choose it as our prediction for j in range(len(boxes)): if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int( truths[k][0])): best_conf_est = boxes[j][18] box_pr = boxes[j] bb2d_gt = get_2d_bb(box_gt[:18], output.size(3)) bb2d_pr = get_2d_bb(box_pr[:18], output.size(3)) iou = bbox_iou(bb2d_gt, bb2d_pr) match = corner_confidence9( box_gt[:18], torch.FloatTensor(boxes[j][:18])) # Denormalize the corner predictions corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32') corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32') corners2D_gt[:, 0] = corners2D_gt[:, 0] * 1280 corners2D_gt[:, 1] = corners2D_gt[:, 1] * 720 corners2D_pr[:, 0] = corners2D_pr[:, 0] * 1280 corners2D_pr[:, 1] = corners2D_pr[:, 1] * 720 #corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of corners # don't fix corner since the order is already correct corners2D_gt_corrected = corners2D_gt if debug_multi: print('2d corners ground truth: ') print(type(corners2D_gt_corrected)) print(corners2D_gt_corrected) # Compute [R|t] by pnp objpoints3D = np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32') # make correction to 3D points for class 2 & 3 (i.e. upperPortRed and uppoerPortBlue) correspondingclass = boxes[j][20] if (correspondingclass == 2 or correspondingclass == 3): x_min_3d = 0 x_max_3d = 1.2192 y_min_3d = 0 y_max_3d = 1.1176 z_min_3d = 0 z_max_3d = 0.003302 centroid = [(x_min_3d + x_max_3d) / 2, (y_min_3d + y_max_3d) / 2, (z_min_3d + z_max_3d) / 2] objpoints3D = np.array([centroid,\ [ x_min_3d, y_min_3d, z_min_3d],\ [ x_min_3d, y_min_3d, z_max_3d],\ [ x_min_3d, y_max_3d, z_min_3d],\ [ x_min_3d, y_max_3d, z_max_3d],\ [ x_max_3d, y_min_3d, z_min_3d],\ [ x_max_3d, y_min_3d, z_max_3d],\ [ x_max_3d, y_max_3d, z_min_3d],\ [ x_max_3d, y_max_3d, z_max_3d]]) K = np.array(internal_calibration, dtype='float32') _, R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K) _, R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K) # Compute pixel error Rt_gt = np.concatenate((R_gt, t_gt), axis=1) Rt_pr = np.concatenate((R_pr, t_pr), axis=1) proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration) proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) proj_corners_gt = np.transpose( compute_projection(corners3D, Rt_gt, internal_calibration)) proj_corners_pr = np.transpose( compute_projection(corners3D, Rt_pr, internal_calibration)) norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0) pixel_dist = np.mean(norm) errs_2d.append(pixel_dist) t5 = time.time() # Compute 2D projection score for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]: acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / ( len(errs_2d) + eps) # Print test statistics logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc))
img = cv2.line(img, points[3], points[7], rgb, thickness) img = cv2.line(img, points[7], points[6], rgb, thickness) #img = cv2.rectangle(img, (x1,y1), (x2,y2), rgb, 1) if savename: print("save plot results to %s" % savename) cv2.imwrite(savename, img) return img if __name__ == '__main__': #datacfg = 'cfg/ape.data' modelcfg = 'multi_obj_pose_estimation/cfg/yolo-pose-multi.cfg' weightfile = '../Assets/trained/multi.weights' #模型初始化 model = Darknet(modelcfg) model.load_weights(weightfile) model = model.cuda() model.eval() #加载模型用 net_options = parse_cfg(modelcfg)[0] loss_options = parse_cfg(modelcfg)[-1] conf_thresh = float(net_options['conf_thresh']) num_keypoints = int(net_options['num_keypoints']) num_classes = int(loss_options['classes']) num_anchors = int(loss_options['num']) anchors = [float(anchor) for anchor in loss_options['anchors'].split(',')] test_width = 416 test_height = 416
scales = [0.5, 0.5, 0.5, 0.5, 0.1, 0.1, 0.1, 0.1] best_acc = -1 # Test parameters conf_thresh = 0.05 nms_thresh = 0.4 match_thresh = 0.5 iou_thresh = 0.5 im_width = 640 im_height = 480 # Specify which gpus to use torch.manual_seed(seed) # Specifiy the model and the loss model = Darknet(cfgfile) region_loss = model.loss # Model settings # model.load_weights(weightfile) # Model settings if pre: model.load_weights_until_last(weightfile) #max_epochs = 200 model.print_network() else: #model.print_network() model.load_weights(weightfile) #pass model.seen = 0 region_loss.iter = model.iter
def test(datacfg, cfgfile, weightfile, imgfile): # ******************************************# # PARAMETERS PREPARATION # # ******************************************# #parse configuration files options = read_data_cfg(datacfg) meshname = options['mesh'] name = options['name'] #Parameters for the network seed = int(time.time()) gpus = '0' # define gpus to use test_width = 544 # define test image size test_height = 544 torch.manual_seed(seed) # seed torch random use_cuda = True if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) # seed cuda random conf_thresh = 0.1 # Read object 3D model, get 3D Bounding box corners mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) diam = float(options['diam']) # now configure camera intrinsics internal_calibration = get_camera_intrinsic() # ******************************************# # NETWORK CREATION # # ******************************************# # Create the network based on cfg file model = Darknet(cfgfile) model.print_network() model.load_weights(weightfile) # Pass the model to GPU if use_cuda: # model = model.cuda() model = torch.nn.DataParallel(model, device_ids=[0]).cuda() # Multiple GPU parallelism model.eval() num_classes = model.module.num_classes anchors = model.module.anchors num_anchors = model.module.num_anchors # ******************************************# # INPUT IMAGE PREPARATION FOR NN # # ******************************************# # Now prepare image: convert to RGB, resize, transform to Tensor # use cuda, img = Image.open(imgfile).convert('RGB') ori_size = img.size # store original size img = img.resize((test_width, test_height)) t1 = time.time() img = transforms.Compose([transforms.ToTensor(),])(img)#.float() img = Variable(img, requires_grad = True) img = img.unsqueeze(0) # add a fake batch dimension img = img.cuda() # ******************************************# # PASS IT TO NETWORK AND GET PREDICTION # # ******************************************# # Forward pass output = model(img).data #print("Output Size: {}".format(output.size(0))) t2 = time.time() # Reload Original img img = cv2.imread(imgfile) for k in range(num_classes): all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, k, only_objectness=0) t4 = time.time() for i in range(output.size(0)): # For each image, get all the predictions boxes = all_boxes[i] best_conf_est = -1 # If the prediction has the highest confidence, choose it as our prediction for j in range(len(boxes)): if (boxes[j][18] > best_conf_est) and (boxes[j][20] == k): best_conf_est = boxes[j][18] box_pr = boxes[j] #bb2d_gt = get_2d_bb(box_gt[:18], output.size(3)) bb2d_pr = get_2d_bb(box_pr[:18], output.size(3)) #for a,b in zip(bb2d_gt, bb2d_pr): # print(type(a),type(b)) #iou = bbox_iou(bb2d_gt, bb2d_pr) #match = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18])) corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32') corners2D_pr[:, 0] = corners2D_pr[:, 0] * ori_size[0] # Width corners2D_pr[:, 1] = corners2D_pr[:, 1] * ori_size[1] # Heightt t3 = time.time() # draw each predicted 2D point for v, (x,y) in enumerate(corners2D_pr): # get colors to draw the lines col1 = 28*v col2 = 255 - (28*v) col3 = np.random.randint(0,256) cv2.circle(img, (x,y), 3, (col1,col2,col3), -1) cv2.putText(img, str(v), (int(x) + 5, int(y) + 5),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (col1, col2, col3), 1) # Get each predicted point and the centroid p1 = corners2D_pr[1] p2 = corners2D_pr[2] p3 = corners2D_pr[3] p4 = corners2D_pr[4] p5 = corners2D_pr[5] p6 = corners2D_pr[6] p7 = corners2D_pr[7] p8 = corners2D_pr[8] center = corners2D_pr[0] # Draw cube lines around detected object # draw front face line_point = 3 cv2.line(img,(p1[0],p1[1]),(p2[0],p2[1]), (0,255,0),line_point) cv2.line(img,(p2[0],p2[1]),(p4[0],p4[1]), (0,255,0),line_point) cv2.line(img,(p4[0],p4[1]),(p3[0],p3[1]), (0,255,0),line_point) cv2.line(img,(p3[0],p3[1]),(p1[0],p1[1]), (0,255,0),line_point) # draw back face cv2.line(img,(p5[0],p5[1]),(p6[0],p6[1]), (0,255,0),line_point) cv2.line(img,(p7[0],p7[1]),(p8[0],p8[1]), (0,255,0),line_point) cv2.line(img,(p6[0],p6[1]),(p8[0],p8[1]), (0,255,0),line_point) cv2.line(img,(p5[0],p5[1]),(p7[0],p7[1]), (0,255,0),line_point) # draw right face cv2.line(img,(p2[0],p2[1]),(p6[0],p6[1]), (0,255,0),line_point) cv2.line(img,(p1[0],p1[1]),(p5[0],p5[1]), (0,255,0),line_point) # draw left face cv2.line(img,(p3[0],p3[1]),(p7[0],p7[1]), (0,255,0),line_point) cv2.line(img,(p4[0],p4[1]),(p8[0],p8[1]), (0,255,0),line_point) # create a window to display image wname = "Prediction" cv2.namedWindow(wname) # Show the image and wait key press cv2.imshow(wname, img) cv2.waitKey() print(output.shape)