def train(epoch): global processed_batches # Initialize timer t0 = time.time() # Get the dataloader for training dataset train_loader = torch.utils.data.DataLoader(dataset_multi.listDataset( trainlist, shape=(init_width, init_height), shuffle=True, transform=transforms.Compose([ transforms.ToTensor(), ]), train=True, seen=model.module.seen, batch_size=batch_size, num_workers=num_workers, bg_file_names=bg_file_names), batch_size=batch_size, shuffle=False, **kwargs) # TRAINING lr = adjust_learning_rate(optimizer, processed_batches) logging('epoch %d, processed %d samples, lr %f' % (epoch, epoch * len(train_loader.dataset), lr)) # Start training model.train() t1 = time.time() avg_time = torch.zeros(9) niter = 0 # Iterate through batches for batch_idx, (data, target) in enumerate(train_loader): t2 = time.time() # adjust learning rate adjust_learning_rate(optimizer, processed_batches) processed_batches = processed_batches + 1 # Pass the data to GPU if use_cuda: data = data.cuda() t3 = time.time() # Wrap tensors in Variable class for automatic differentiation data, target = Variable(data), Variable(target) t4 = time.time() # Zero the gradients before running the backward pass optimizer.zero_grad() t5 = time.time() # Forward pass output = model(data) t6 = time.time() region_loss.seen = region_loss.seen + data.data.size(0) # Compute loss, grow an array of losses for saving later on loss = region_loss(output, target) training_iters.append( epoch * math.ceil(len(train_loader.dataset) / float(batch_size)) + niter) training_losses.append(convert2cpu(loss.data)) niter += 1 t7 = time.time() # Backprop: compute gradient of the loss with respect to model parameters loss.backward() t8 = time.time() # Update weights optimizer.step() t9 = time.time() # Print time statistics if False and batch_idx > 1: avg_time[0] = avg_time[0] + (t2 - t1) avg_time[1] = avg_time[1] + (t3 - t2) avg_time[2] = avg_time[2] + (t4 - t3) avg_time[3] = avg_time[3] + (t5 - t4) avg_time[4] = avg_time[4] + (t6 - t5) avg_time[5] = avg_time[5] + (t7 - t6) avg_time[6] = avg_time[6] + (t8 - t7) avg_time[7] = avg_time[7] + (t9 - t8) avg_time[8] = avg_time[8] + (t9 - t1) print('-------------------------------') print(' load data : %f' % (avg_time[0] / (batch_idx))) print(' cpu to cuda : %f' % (avg_time[1] / (batch_idx))) print('cuda to variable : %f' % (avg_time[2] / (batch_idx))) print(' zero_grad : %f' % (avg_time[3] / (batch_idx))) print(' forward feature : %f' % (avg_time[4] / (batch_idx))) print(' forward loss : %f' % (avg_time[5] / (batch_idx))) print(' backward : %f' % (avg_time[6] / (batch_idx))) print(' step : %f' % (avg_time[7] / (batch_idx))) print(' total : %f' % (avg_time[8] / (batch_idx))) t1 = time.time() t1 = time.time() return epoch * math.ceil( len(train_loader.dataset) / float(batch_size)) + niter - 1
def eval(niter, datacfg, cfgfile): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i # Parse configuration files options = read_data_cfg(datacfg) valid_images = options['valid'] meshname = options['mesh'] backupdir = options['backup'] name = options['name'] prefix = 'results' # Read object model information, get 3D bounding box corners mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) # Read intrinsic camera parameters internal_calibration = get_camera_intrinsic() # Get validation file names with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] # Specify model, load pretrained weights, pass to GPU and set the module in evaluation mode model.eval() # Get the parser for the test dataset valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.module.width, model.module.height), shuffle=False, objclass=name, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 1 # Specify the number of workers for multiple processing, get the dataloader for the test dataset kwargs = {'num_workers': 4, 'pin_memory': True} test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) # Parameters num_classes = model.module.num_classes anchors = model.module.anchors num_anchors = model.module.num_anchors testing_error_pixel = 0.0 testing_samples = 0.0 errs_2d = [] logging(" Number of test samples: %d" % len(test_loader.dataset)) # Iterate through test examples for batch_idx, (data, target) in enumerate(test_loader): t1 = time.time() # Pass the data to GPU if use_cuda: data = data.cuda() target = target.cuda() # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference data = Variable(data) t2 = time.time() # Formward pass with torch.no_grad(): output = model(data).data t3 = time.time() # Using confidence threshold, eliminate low-confidence predictions trgt = target[0].view(-1, 21) all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) t4 = time.time() # Iterate through all batch elements for i in range(output.size(0)): # For each image, get all the predictions boxes = all_boxes[i] # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image) truths = target[i].view(-1, 21) # Get how many objects are present in the scene num_gts = truths_length(truths) # Iterate through each ground-truth object for k in range(num_gts): box_gt = [ truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0] ] best_conf_est = -1 # If the prediction has the highest confidence, choose it as our prediction for j in range(len(boxes)): if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int( truths[k][0])): best_conf_est = boxes[j][18] box_pr = boxes[j] bb2d_gt = get_2d_bb(box_gt[:18], output.size(3)) bb2d_pr = get_2d_bb(box_pr[:18], output.size(3)) #iou = bbox_iou(bb2d_gt, bb2d_pr) #match = corner_confidence9(box_gt[:18], torch.FloatTensor(boxes[j][:18])) # Denormalize the corner predictions corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32') corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32') corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height corners2D_gt_corrected = fix_corner_order( corners2D_gt) # Fix the order of the corners in OCCLUSION # Compute [R|t] by pnp objpoints3D = np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32') K = np.array(internal_calibration, dtype='float32') R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K) R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K) # Compute pixel error Rt_gt = np.concatenate((R_gt, t_gt), axis=1) Rt_pr = np.concatenate((R_pr, t_pr), axis=1) proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration) proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) proj_corners_gt = np.transpose( compute_projection(corners3D, Rt_gt, internal_calibration)) proj_corners_pr = np.transpose( compute_projection(corners3D, Rt_pr, internal_calibration)) norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0) pixel_dist = np.mean(norm) errs_2d.append(pixel_dist) # Sum errors testing_error_pixel += pixel_dist testing_samples += 1 t5 = time.time() # Compute 2D reprojection score for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]: acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / ( len(errs_2d) + eps) logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc)) if True: logging('-----------------------------------') logging(' tensor to cuda : %f' % (t2 - t1)) logging(' predict : %f' % (t3 - t2)) logging('get_region_boxes : %f' % (t4 - t3)) logging(' eval : %f' % (t5 - t4)) logging(' total : %f' % (t5 - t1)) logging('-----------------------------------') # Register losses and errors for saving later on testing_iters.append(niter) testing_errors_pixel.append(testing_error_pixel / (float(testing_samples) + eps)) testing_accuracies.append(acc)
def valid(datacfg, cfgfile, weightfile): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i # Parse data configuration files data_options = read_data_cfg(datacfg) valid_images = data_options['valid'] meshname = data_options['mesh'] name = data_options['name'] im_width = int(data_options['im_width']) im_height = int(data_options['im_height']) fx = float(data_options['fx']) fy = float(data_options['fy']) u0 = float(data_options['u0']) v0 = float(data_options['v0']) # Parse net configuration file net_options = parse_cfg(cfgfile)[0] loss_options = parse_cfg(cfgfile)[-1] conf_thresh = float(net_options['conf_thresh']) num_keypoints = int(net_options['num_keypoints']) num_classes = int(loss_options['classes']) num_anchors = int(loss_options['num']) anchors = [float(anchor) for anchor in loss_options['anchors'].split(',')] # Read object model information, get 3D bounding box corners, get intrinsics mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) diam = float(data_options['diam']) intrinsic_calibration = get_camera_intrinsic(u0, v0, fx, fy) # camera params # Network I/O params num_labels = 2 * num_keypoints + 3 # +2 for width, height, +1 for object class errs_2d = [] # to save with open(valid_images) as fp: # validation file names tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] # Compute-related Parameters use_cuda = True # whether to use cuda or no kwargs = {'num_workers': 4, 'pin_memory': True} # number of workers etc. # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode model = Darknet(cfgfile) model.load_weights(weightfile) model.cuda() model.eval() # Get the dataloader for the test dataset valid_dataset = dataset_multi.listDataset(valid_images, shape=(model.width, model.height), shuffle=False, objclass=name, transform=transforms.Compose([ transforms.ToTensor(), ])) test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=False, **kwargs) # Iterate through test batches (Batch size for test data is 1) logging('Testing {}...'.format(name)) for batch_idx, (data, target) in enumerate(test_loader): t1 = time.time() # Pass data to GPU if use_cuda: data = data.cuda() # target = target.cuda() # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference data = Variable(data, volatile=True) t2 = time.time() # Forward pass output = model(data).data t3 = time.time() # Using confidence threshold, eliminate low-confidence predictions trgt = target[0].view(-1, num_labels) all_boxes = get_multi_region_boxes(output, conf_thresh, num_classes, num_keypoints, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) t4 = time.time() # Iterate through all images in the batch for i in range(output.size(0)): # For each image, get all the predictions boxes = all_boxes[i] # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image) truths = target[i].view(-1, num_labels) # Get how many object are present in the scene num_gts = truths_length(truths) # Iterate through each ground-truth object for k in range(num_gts): box_gt = list() for j in range(1, num_labels): box_gt.append(truths[k][j]) box_gt.extend([1.0, 1.0]) box_gt.append(truths[k][0]) # If the prediction has the highest confidence, choose it as our prediction best_conf_est = -sys.maxsize for j in range(len(boxes)): if (boxes[j][2 * num_keypoints] > best_conf_est) and (boxes[j][2 * num_keypoints + 2] == int(truths[k][0])): best_conf_est = boxes[j][2 * num_keypoints] box_pr = boxes[j] match = corner_confidence( box_gt[:2 * num_keypoints], torch.FloatTensor(boxes[j][:2 * num_keypoints])) # Denormalize the corner predictions corners2D_gt = np.array(np.reshape(box_gt[:2 * num_keypoints], [-1, 2]), dtype='float32') corners2D_pr = np.array(np.reshape(box_pr[:2 * num_keypoints], [-1, 2]), dtype='float32') corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height corners2D_gt_corrected = fix_corner_order( corners2D_gt) # Fix the order of corners # Compute [R|t] by pnp objpoints3D = np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32') K = np.array(intrinsic_calibration, dtype='float32') R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K) R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K) # Compute pixel error Rt_gt = np.concatenate((R_gt, t_gt), axis=1) Rt_pr = np.concatenate((R_pr, t_pr), axis=1) proj_2d_gt = compute_projection(vertices, Rt_gt, intrinsic_calibration) proj_2d_pred = compute_projection(vertices, Rt_pr, intrinsic_calibration) proj_corners_gt = np.transpose( compute_projection(corners3D, Rt_gt, intrinsic_calibration)) proj_corners_pr = np.transpose( compute_projection(corners3D, Rt_pr, intrinsic_calibration)) norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0) pixel_dist = np.mean(norm) errs_2d.append(pixel_dist) t5 = time.time() # Compute 2D projection score eps = 1e-5 for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]: acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / ( len(errs_2d) + eps) # Print test statistics logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc))
def valid(datacfg, cfgfile, weightfile, conf_th): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i # Parse configuration files options = read_data_cfg(datacfg) valid_images = options['valid'] meshname = options['mesh'] name = options['name'] prefix = 'results' # Read object model information, get 3D bounding box corners mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) diam = float(options['diam']) # Read intrinsic camera parameters internal_calibration = get_camera_intrinsic() # Get validation file names with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode model = Darknet(cfgfile) model.load_weights(weightfile) model.cuda() model.eval() test_width = 544 test_height = 544 # Get the parser for the test dataset valid_dataset = dataset_multi.listDataset(valid_images, shape=(test_width, test_height), shuffle=False, objclass=name, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 1 # Specify the number of workers for multiple processing, get the dataloader for the test dataset kwargs = {'num_workers': 4, 'pin_memory': True} test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) # Parameters use_cuda = True num_classes = 2 anchors = [ 1.4820, 2.2412, 2.0501, 3.1265, 2.3946, 4.6891, 3.1018, 3.9910, 3.4879, 5.8851 ] num_anchors = 5 eps = 1e-5 conf_thresh = conf_th iou_thresh = 0.5 # Parameters to save errs_2d = [] edges = [[1, 2], [1, 3], [1, 5], [2, 4], [2, 6], [3, 4], [3, 7], [4, 8], [5, 6], [5, 7], [6, 8], [7, 8]] edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]] # Iterate through test batches (Batch size for test data is 1) logging('Testing {}...'.format(name)) for batch_idx, (data, target) in enumerate(test_loader): t1 = time.time() # Pass data to GPU if use_cuda: data = data.cuda() # target = target.cuda() # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference data = Variable(data, volatile=True) t2 = time.time() # Forward pass output = model(data).data t3 = time.time() # Using confidence threshold, eliminate low-confidence predictions trgt = target[0].view(-1, 21) all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) t4 = time.time() # Iterate through all images in the batch for i in range(output.size(0)): # For each image, get all the predictions boxes = all_boxes[i] # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image) truths = target[i].view(-1, 21) if debug_multi: print(type(truth)) # Get how many object are present in the scene num_gts = truths_length(truths) if debug_multi: print('numbers of ground truth: ' + str(num_gts)) # Iterate through each ground-truth object for k in range(num_gts): if debug_multi: print('object class in label is: ' + str(truths[k][0])) box_gt = [ truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0] ] best_conf_est = -1 # If the prediction has the highest confidence, choose it as our prediction for j in range(len(boxes)): if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int( truths[k][0])): best_conf_est = boxes[j][18] box_pr = boxes[j] bb2d_gt = get_2d_bb(box_gt[:18], output.size(3)) bb2d_pr = get_2d_bb(box_pr[:18], output.size(3)) iou = bbox_iou(bb2d_gt, bb2d_pr) match = corner_confidence9( box_gt[:18], torch.FloatTensor(boxes[j][:18])) # Denormalize the corner predictions corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32') corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32') corners2D_gt[:, 0] = corners2D_gt[:, 0] * 1280 corners2D_gt[:, 1] = corners2D_gt[:, 1] * 720 corners2D_pr[:, 0] = corners2D_pr[:, 0] * 1280 corners2D_pr[:, 1] = corners2D_pr[:, 1] * 720 #corners2D_gt_corrected = fix_corner_order(corners2D_gt) # Fix the order of corners # don't fix corner since the order is already correct corners2D_gt_corrected = corners2D_gt if debug_multi: print('2d corners ground truth: ') print(type(corners2D_gt_corrected)) print(corners2D_gt_corrected) # Compute [R|t] by pnp objpoints3D = np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32') # make correction to 3D points for class 2 & 3 (i.e. upperPortRed and uppoerPortBlue) correspondingclass = boxes[j][20] if (correspondingclass == 2 or correspondingclass == 3): x_min_3d = 0 x_max_3d = 1.2192 y_min_3d = 0 y_max_3d = 1.1176 z_min_3d = 0 z_max_3d = 0.003302 centroid = [(x_min_3d + x_max_3d) / 2, (y_min_3d + y_max_3d) / 2, (z_min_3d + z_max_3d) / 2] objpoints3D = np.array([centroid,\ [ x_min_3d, y_min_3d, z_min_3d],\ [ x_min_3d, y_min_3d, z_max_3d],\ [ x_min_3d, y_max_3d, z_min_3d],\ [ x_min_3d, y_max_3d, z_max_3d],\ [ x_max_3d, y_min_3d, z_min_3d],\ [ x_max_3d, y_min_3d, z_max_3d],\ [ x_max_3d, y_max_3d, z_min_3d],\ [ x_max_3d, y_max_3d, z_max_3d]]) K = np.array(internal_calibration, dtype='float32') _, R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K) _, R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K) # Compute pixel error Rt_gt = np.concatenate((R_gt, t_gt), axis=1) Rt_pr = np.concatenate((R_pr, t_pr), axis=1) proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration) proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) proj_corners_gt = np.transpose( compute_projection(corners3D, Rt_gt, internal_calibration)) proj_corners_pr = np.transpose( compute_projection(corners3D, Rt_pr, internal_calibration)) norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0) pixel_dist = np.mean(norm) errs_2d.append(pixel_dist) t5 = time.time() # Compute 2D projection score for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]: acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / ( len(errs_2d) + eps) # Print test statistics logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc))
def train(epoch): global processed_batches # Initialize timer t0 = time.time() # Get the dataloader for training dataset train_loader = torch.utils.data.DataLoader(dataset.listDataset( trainlist, shape=(init_width, init_height), shuffle=True, transform=transforms.Compose([ transforms.ToTensor(), ]), train=True, seen=model.seen, batch_size=batch_size, num_workers=num_workers, bg_file_names=bg_file_names), batch_size=batch_size, shuffle=False, **kwargs) # TRAINING lr = adjust_learning_rate(optimizer, epoch) logging('epoch %d, processed %d samples, lr %f' % (epoch, epoch * len(train_loader.dataset), lr * 1000)) #log_file.write('epoch %d, processed %d samples, lr %f' % (epoch, epoch * len(train_loader.dataset), lr)) # Start training model.train() t1 = time.time() avg_time = torch.zeros(9) niter = 0 # Iterate through batches training_losses = [] for data, target in tqdm(iter(train_loader)): t2 = time.time() # adjust learning rate processed_batches = processed_batches + 1 # Pass the data to GPU data = data.cuda() t3 = time.time() # Wrap tensors in Variable class for automatic differentiation data, target = Variable(data), Variable(target) t4 = time.time() # Zero the gradients before running the backward pass optimizer.zero_grad() t5 = time.time() # Forward pass output = model(data) t6 = time.time() model.seen = model.seen + data.data.size(0) region_loss.seen = region_loss.seen + data.data.size(0) # Compute loss, grow an array of losses for saving later on loss = region_loss(output, target) #training_iters.append(epoch * math.ceil(len(train_loader.dataset) / float(batch_size) ) + niter) niter += 1 t7 = time.time() # Backprop: compute gradient of the loss with respect to model parameters loss.backward() t8 = time.time() # Update weights optimizer.step() t9 = time.time() # Print time statistics t1 = time.time() training_losses.append(float(loss.item()) / batch_size) t1 = time.time() avg = sum(training_losses) / len(training_losses) print('%d\t%f\t%f\n' % (epoch + 1, lr * 1000, avg)) log_file.write('%d\t%f\t%f\n' % (epoch + 1, lr * 1000, avg)) return epoch * math.ceil( len(train_loader.dataset) / float(batch_size)) + niter - 1, avg
def eval(epoch, datacfg, cfgfile): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i # Parse configuration files options = read_data_cfg(datacfg) valid_images = options['valid'] meshname = options['mesh'] #backupdir = options['backup'] name = options['name'] diam = float(options['diam']) vx_threshold = diam * 0.1 prefix = 'results' # Read object model information, get 3D bounding box corners mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) # Read intrinsic camera parameters internal_calibration = get_camera_intrinsic() # Get validation file names with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] # Specify model, load pretrained weights, pass to GPU and set the module in evaluation mode model.eval() # Get the parser for the test dataset valid_dataset = dataset.listDataset(valid_images, shape=(init_width, init_height), shuffle=False, objclass=name, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 1 # Specify the number of workers for multiple processing, get the dataloader for the test dataset kwargs = {'num_workers': 4, 'pin_memory': True} test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) # Parameters num_classes = model.num_classes anchors = model.anchors num_anchors = model.num_anchors testing_error_trans = 0.0 testing_error_angle = 0.0 testing_error_pixel = 0.0 testing_samples = 0.0 errs_2d = [] errs_3d = [] errs_trans = [] errs_angle = [] errs_corner2D = [] ts = [0.0, 0.0, 0.0, 0.0, 0.0] count = 0 logging(" Number of test samples: %d" % len(test_loader.dataset)) # Iterate through test examples for data, target in tqdm(iter(test_loader)): t1 = time.time() # Pass the data to GPU if use_cuda: data = data.cuda() # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference with torch.no_grad(): data = Variable(data) t2 = time.time() # Formward pass output = model(data).data.cpu() t3 = time.time() # Using confidence threshold, eliminate low-confidence predictions #trgt = target[0].view(-1, 21) #all_boxes = get_corresponding_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, int(trgt[0][0]), only_objectness=0) all_boxes = [] for b in range(output.size(0)): boxes = {} for i in range(num_anchors): results = merge_kps_by_regions(output[b, i].squeeze()) boxes[i] = results # Iterate through all batch elements for i in range(output.size(0)): # For each image, get all the predictions boxes = all_boxes[i] # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image) truths = target[i].view(-1, 21) # Get how many objects are present in the scene num_gts = truths_length(truths) # Iterate through each ground-truth object for k in range(num_gts): box_gt = [ truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0] ] best_conf_est = -1 # If the prediction has the highest confidence, choose it as our prediction for j in range(len(boxes)): if (boxes[j][18] > best_conf_est) and (boxes[j][20] == int( truths[k][0])): best_conf_est = boxes[j][18] box_pr = boxes[j] bb2d_gt = get_2d_bb(box_gt[:18], output.size(3)) bb2d_pr = get_2d_bb(box_pr[:18], output.size(3)) iou = bbox_iou(bb2d_gt, bb2d_pr) match = corner_confidence9( box_gt[:18], torch.FloatTensor(boxes[j][:18])) # Denormalize the corner predictions corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32') corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32') corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height corners2D_gt_corrected = fix_corner_order( corners2D_gt) # Fix the order of the corners in OCCLUSION # Compute corner prediction error corner_norm = np.linalg.norm(corners2D_gt_corrected - corners2D_pr, axis=1) corner_dist = np.mean(corner_norm) errs_corner2D.append(corner_dist) # Compute [R|t] by pnp objpoints3D = np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32') K = np.array(internal_calibration, dtype='float32') R_gt, t_gt = pnp(objpoints3D, corners2D_gt_corrected, K) R_pr, t_pr = pnp(objpoints3D, corners2D_pr, K) # Compute translation error trans_dist = np.sqrt(np.sum(np.square(t_gt - t_pr))) errs_trans.append(trans_dist) # Compute angle error angle_dist = calcAngularDistance(R_gt, R_pr) errs_angle.append(angle_dist) # Compute pixel error Rt_gt = np.concatenate((R_gt, t_gt), axis=1) Rt_pr = np.concatenate((R_pr, t_pr), axis=1) proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration) proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) proj_corners_gt = np.transpose( compute_projection(corners3D, Rt_gt, internal_calibration)) proj_corners_pr = np.transpose( compute_projection(corners3D, Rt_pr, internal_calibration)) norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0) pixel_dist = np.mean(norm) errs_2d.append(pixel_dist) # Compute 3D distances transform_3d_gt = compute_transformation(vertices, Rt_gt) transform_3d_pred = compute_transformation(vertices, Rt_pr) norm3d = np.linalg.norm(transform_3d_gt - transform_3d_pred, axis=0) vertex_dist = np.mean(norm3d) errs_3d.append(vertex_dist) # Sum errors testing_error_trans += trans_dist testing_error_angle += angle_dist testing_error_pixel += pixel_dist testing_samples += 1 t5 = time.time() ts[0] += t2 - t1 ts[1] += (t3 - t2) ts[2] += (t4 - t3) ts[3] += (t5 - t4) ts[4] += (t5 - t1) count += 1 # Compute 2D reprojection score s = name + '\t' for px_threshold in [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]: acc = len(np.where(np.array(errs_2d) <= px_threshold)[0]) * 100. / ( len(errs_2d) + eps) logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc)) s += str(acc) + '\t' if True: logging('-----------------------------------') logging(' tensor to cuda : %f' % (t2 - t1)) logging(' predict : %f' % (t3 - t2)) logging('get_region_boxes : %f' % (t4 - t3)) logging(' eval : %f' % (t5 - t4)) logging(' total : %f' % (t5 - t1)) logging('-----------------------------------') tt = '' for i in range(5): ts[i] /= count tt += '%f\t' % ts[i] print(tt) # Register losses and errors for saving later on px_threshold = 5 acc = len(np.where( np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d) + eps) acc_50 = len( np.where(np.array(errs_2d) <= 50)[0]) * 100. / (len(errs_2d) + eps) acc3d = len(np.where( np.array(errs_3d) <= vx_threshold)[0]) * 100. / (len(errs_3d) + eps) acc5cm5deg = len( np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) + eps) corner_acc = len(np.where(np.array(errs_corner2D) <= px_threshold) [0]) * 100. / (len(errs_corner2D) + eps) mean_err_2d = np.mean(errs_2d) mean_corner_err_2d = np.mean(errs_corner2D) nts = float(testing_samples) logging(" Mean corner error is %f" % (mean_corner_err_2d)) logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc)) logging(' Acc using {} vx 3D Transformation = {:.2f}%'.format( vx_threshold, acc3d)) logging(' Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg)) logging(' Translation error: %f, angle error: %f' % (testing_error_trans / (nts + eps), testing_error_angle / (nts + eps))) test_log_file.write(s + '\n') #test_log_file.write('%s\t%d\t%f\t%f\t%f\t%f\t%f\t%f\t%f\n' % (name,epoch+1,mean_corner_err_2d,acc,acc3d,acc5cm5deg,testing_error_trans/(nts+eps), testing_error_angle/(nts+eps),acc_50)) #test_log_file.write(tt+'\n') return acc