def get_iou(bb1, bb2): """ Compute IoU of two bounding boxes. ** Define your bod IoU function HERE ** """ iou3d, iou2d = box3d_iou(bb1, bb2) return iou3d
def eval_ref_one_sample(pred_bbox, gt_bbox): """ Evaluate one reference prediction Args: pred_bbox: 8 corners of prediction bounding box, (8, 3) gt_bbox: 8 corners of ground truth bounding box, (8, 3) Returns: iou: intersection over union score """ iou = box3d_iou(pred_bbox, gt_bbox) return iou
def align_predictions_groundtruths(batch_pred_corners_3d, batch_gt_corners_3d, end_points, iou_threshold=0.5): """ Args: batch_pred_corners_3d: ndarray (num_batch, num_proposals, 8, 3) predicted bounding boxes (represented by 8 corner points) in upright_camera coordinate batch_gt_corners_3d: ndarray (num_batch, MAX_NUM_OBJ, 8, 3) ground truth bounding boxes (represented by 8 corner points) in upright_camera coordinate end_points: dict {box_label_mask, ...} Returns: batch_gt_corners_3d_aligned: ndarray (num_batch, num_proposals, 8, 3) clostest ground truth bounding boxes corresponding to each predicted bbox batch_confidence_scores: ndarray (num_batch, num_proposals, 1), value is 0 or 1 the fitness between each predicted bbox and gt bbox, if the overlap larger than threshold, fitness is 1 batch_sem_cls_labels: ndarray (num_batch, num_proposals), value is [0, num_class-1] the semantic class of the aligned ground truth bboxes """ bsize = batch_pred_corners_3d.shape[0] num_proposal = batch_pred_corners_3d.shape[1] box_label_mask = end_points['box_label_mask'].detach().cpu().numpy() sem_cls_label = end_points['sem_cls_label'].detach().cpu().numpy() batch_sem_cls_labels = np.zeros((bsize, num_proposal,1), dtype=np.int64) batch_confidence_scores = np.zeros((bsize, num_proposal,1), dtype=np.float32) batch_gt_corners_3d_aligned = np.zeros((bsize, num_proposal, 8, 3), dtype=np.float32) for i in range(bsize): cur_mask = np.nonzero(box_label_mask[i]) gt_corners_3d = batch_gt_corners_3d[i][cur_mask] gt_classes = sem_cls_label[i][cur_mask] for j in range(num_proposal): BB = batch_pred_corners_3d[i,j,:,:] iou_list = [] for BBGT in gt_corners_3d: iou, _ = box3d_iou(BB, BBGT) iou_list.append(iou) if len(iou_list) != 0: iou_list = np.array(iou_list) max_ind = np.argmax(iou_list) batch_gt_corners_3d_aligned[i,j,:,:] = gt_corners_3d[max_ind] batch_sem_cls_labels[i,j] = gt_classes[max_ind] if iou_list.max() >= iou_threshold: batch_confidence_scores[i,j] = 1. return batch_gt_corners_3d_aligned, batch_confidence_scores, batch_sem_cls_labels
def get_iou_obb(bb1, bb2): iou3d = box3d_iou(bb1, bb2) return iou3d
def dump_results(args, scanrefer, data, config): dump_dir = os.path.join(CONF.PATH.OUTPUT, args.folder, "vis") os.makedirs(dump_dir, exist_ok=True) # from inputs ids = data['scan_idx'].detach().cpu().numpy() point_clouds = data['point_clouds'].cpu().numpy() batch_size = point_clouds.shape[0] pcl_color = data["pcl_color"].detach().cpu().numpy() if args.use_color: pcl_color = (pcl_color * 256 + MEAN_COLOR_RGB).astype(np.int64) # from network outputs # detection pred_objectness = torch.argmax(data['objectness_scores'], 2).float().detach().cpu().numpy() pred_center = data['center'].detach().cpu().numpy() # (B,K,3) pred_heading_class = torch.argmax(data['heading_scores'], -1) # B,num_proposal pred_heading_residual = torch.gather( data['heading_residuals'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 pred_heading_class = pred_heading_class.detach().cpu().numpy( ) # B,num_proposal pred_heading_residual = pred_heading_residual.squeeze( 2).detach().cpu().numpy() # B,num_proposal pred_size_class = torch.argmax(data['size_scores'], -1) # B,num_proposal pred_size_residual = torch.gather( data['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat( 1, 1, 1, 3)) # B,num_proposal,1,3 pred_size_residual = pred_size_residual.squeeze( 2).detach().cpu().numpy() # B,num_proposal,3 # reference pred_ref_scores = data["cluster_ref"].detach().cpu().numpy() pred_ref_scores_softmax = F.softmax( data["cluster_ref"] * torch.argmax(data['objectness_scores'], 2).float() * data['pred_mask'], dim=1).detach().cpu().numpy() # post-processing nms_masks = data['pred_mask'].detach().cpu().numpy() # B,num_proposal # ground truth gt_center = data['center_label'].cpu().numpy() # (B,MAX_NUM_OBJ,3) gt_heading_class = data['heading_class_label'].cpu().numpy() # B,K2 gt_heading_residual = data['heading_residual_label'].cpu().numpy() # B,K2 gt_size_class = data['size_class_label'].cpu().numpy() # B,K2 gt_size_residual = data['size_residual_label'].cpu().numpy() # B,K2,3 # reference gt_ref_labels = data["ref_box_label"].detach().cpu().numpy() for i in range(batch_size): # basic info idx = ids[i] scene_id = scanrefer[idx]["scene_id"] object_id = scanrefer[idx]["object_id"] object_name = scanrefer[idx]["object_name"] ann_id = scanrefer[idx]["ann_id"] # scene_output scene_dump_dir = os.path.join(dump_dir, scene_id) if not os.path.exists(scene_dump_dir): os.mkdir(scene_dump_dir) # # Dump the original scene point clouds mesh = align_mesh(scene_id) mesh.write(os.path.join(scene_dump_dir, 'mesh.ply')) write_ply_rgb(point_clouds[i], pcl_color[i], os.path.join(scene_dump_dir, 'pc.ply')) # filter out the valid ground truth reference box assert gt_ref_labels[i].shape[0] == gt_center[i].shape[0] gt_ref_idx = np.argmax(gt_ref_labels[i], 0) # visualize the gt reference box # NOTE: for each object there should be only one gt reference box object_dump_dir = os.path.join( dump_dir, scene_id, "gt_{}_{}.ply".format(object_id, object_name)) gt_obb = config.param2obb(gt_center[i, gt_ref_idx, 0:3], gt_heading_class[i, gt_ref_idx], gt_heading_residual[i, gt_ref_idx], gt_size_class[i, gt_ref_idx], gt_size_residual[i, gt_ref_idx]) gt_bbox = get_3d_box(gt_obb[3:6], gt_obb[6], gt_obb[0:3]) if not os.path.exists(object_dump_dir): write_bbox( gt_obb, 0, os.path.join(scene_dump_dir, 'gt_{}_{}.ply'.format(object_id, object_name))) # find the valid reference prediction pred_masks = nms_masks[i] * pred_objectness[i] == 1 assert pred_ref_scores[i].shape[0] == pred_center[i].shape[0] pred_ref_idx = np.argmax(pred_ref_scores[i] * pred_masks, 0) assigned_gt = torch.gather( data["ref_box_label"], 1, data["object_assignment"]).detach().cpu().numpy() # visualize the predicted reference box pred_obb = config.param2obb(pred_center[i, pred_ref_idx, 0:3], pred_heading_class[i, pred_ref_idx], pred_heading_residual[i, pred_ref_idx], pred_size_class[i, pred_ref_idx], pred_size_residual[i, pred_ref_idx]) pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3]) iou = box3d_iou(gt_bbox, pred_bbox) write_bbox( pred_obb, 1, os.path.join( scene_dump_dir, 'pred_{}_{}_{}_{:.5f}_{:.5f}.ply'.format( object_id, object_name, ann_id, pred_ref_scores_softmax[i, pred_ref_idx], iou)))
def evaluate(args): pred_path = os.path.join(CONF.PATH.OUTPUT, args.folder, "pred.json") if not os.path.isfile(pred_path): print( "please run `benchmark/predict.py` first to generate bounding boxes" ) exit() organized_gt = organize_gt() with open(pred_path) as f: predictions = json.load(f) ious = [] masks = [] others = [] print("evaluating...") for data in tqdm(predictions): scene_id = data["scene_id"] object_id = data["object_id"] ann_id = data["ann_id"] pred_bbox = np.array(data["bbox"]) mask = data["unique_multiple"] other = data["others"] try: gt_bbox = np.array( organized_gt[scene_id][object_id][ann_id]["bbox"]) # iou, _ = box3d_iou(pred_bbox, gt_bbox) iou = box3d_iou(pred_bbox, gt_bbox) except KeyError: iou = 0 ious.append(iou) masks.append(mask) others.append(other) # ious = np.array(ious) # iou_rate_025 = ious[ious >= 0.25].shape[0] / ious.shape[0] # iou_rate_05 = ious[ious >= 0.5].shape[0] / ious.shape[0] # print("\[email protected]: {}".format(iou_rate_025)) # print("[email protected]: {}".format(iou_rate_05)) ious = np.array(ious) masks = np.array(masks) others = np.array(others) multiple_dict = {"unique": 0, "multiple": 1} others_dict = {"not_in_others": 0, "in_others": 1} # evaluation stats stats = {k: np.sum(masks == v) for k, v in multiple_dict.items()} stats["overall"] = masks.shape[0] stats = {} for k, v in multiple_dict.items(): stats[k] = {} for k_o, v_o in others_dict.items(): stats[k][k_o] = np.sum( np.logical_and(masks == v, others == v_o)) stats[k]["overall"] = np.sum(masks == v) stats["overall"] = {} for k_o, v_o in others_dict.items(): stats["overall"][k_o] = np.sum(others == v_o) stats["overall"]["overall"] = masks.shape[0] # aggregate scores scores = {} for k, v in multiple_dict.items(): for k_o in others_dict.keys(): acc_025iou = ious[np.logical_and(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o]), ious >= 0.25)].shape[0] \ / ious[np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])].shape[0] \ if np.sum(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])) > 0 else 0 acc_05iou = ious[np.logical_and(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o]), ious >= 0.5)].shape[0] \ / ious[np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])].shape[0] \ if np.sum(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])) > 0 else 0 if k not in scores: scores[k] = {k_o: {} for k_o in others_dict.keys()} scores[k][k_o]["[email protected]"] = acc_025iou scores[k][k_o]["[email protected]"] = acc_05iou acc_025iou = ious[np.logical_and(masks == multiple_dict[k], ious >= 0.25)].shape[0] \ / ious[masks == multiple_dict[k]].shape[0] if np.sum(masks == multiple_dict[k]) > 0 else 0 acc_05iou = ious[np.logical_and(masks == multiple_dict[k], ious >= 0.5)].shape[0] \ / ious[masks == multiple_dict[k]].shape[0] if np.sum(masks == multiple_dict[k]) > 0 else 0 scores[k]["overall"] = {} scores[k]["overall"]["[email protected]"] = acc_025iou scores[k]["overall"]["[email protected]"] = acc_05iou scores["overall"] = {} for k_o in others_dict.keys(): acc_025iou = ious[np.logical_and(others == others_dict[k_o], ious >= 0.25)].shape[0] \ / ious[others == others_dict[k_o]].shape[0] if np.sum(others == others_dict[k_o]) > 0 else 0 acc_05iou = ious[np.logical_and(others == others_dict[k_o], ious >= 0.5)].shape[0] \ / ious[others == others_dict[k_o]].shape[0] if np.sum(others == others_dict[k_o]) > 0 else 0 # aggregate scores["overall"][k_o] = {} scores["overall"][k_o]["[email protected]"] = acc_025iou scores["overall"][k_o]["[email protected]"] = acc_05iou acc_025iou = ious[ious >= 0.25].shape[0] / ious.shape[0] acc_05iou = ious[ious >= 0.5].shape[0] / ious.shape[0] # aggregate scores["overall"]["overall"] = {} scores["overall"]["overall"]["[email protected]"] = acc_025iou scores["overall"]["overall"]["[email protected]"] = acc_05iou # report print("\nstats:") for k_s in stats.keys(): for k_o in stats[k_s].keys(): print("{} | {}: {}".format(k_s, k_o, stats[k_s][k_o])) for k_s in scores.keys(): print("\n{}:".format(k_s)) for k_m in scores[k_s].keys(): for metric in scores[k_s][k_m].keys(): print("{} | {} | {}: {}".format(k_s, k_m, metric, scores[k_s][k_m][metric]))
def get_loss(data_dict, config, reference=False, use_lang_classifier=False, use_max_iou=False, post_processing=None): """ Loss functions Args: data_dict: dict config: dataset config instance reference: flag (False/True) post_processing: config dict Returns: loss: pytorch scalar tensor data_dict: dict """ # Vote loss vote_loss = compute_vote_loss(data_dict) data_dict['vote_loss'] = vote_loss # Obj loss objectness_loss, objectness_label, objectness_mask, object_assignment = compute_objectness_loss( data_dict) data_dict['objectness_loss'] = objectness_loss data_dict['objectness_label'] = objectness_label data_dict['objectness_mask'] = objectness_mask data_dict['object_assignment'] = object_assignment total_num_proposal = objectness_label.shape[0] * objectness_label.shape[1] data_dict['pos_ratio'] = torch.sum( objectness_label.float().cuda()) / float(total_num_proposal) data_dict['neg_ratio'] = torch.sum(objectness_mask.float()) / float( total_num_proposal) - data_dict['pos_ratio'] # Box loss and sem cls loss center_loss, heading_cls_loss, heading_reg_loss, size_cls_loss, size_reg_loss, sem_cls_loss = compute_box_and_sem_cls_loss( data_dict, config) data_dict['center_loss'] = center_loss data_dict['heading_cls_loss'] = heading_cls_loss data_dict['heading_reg_loss'] = heading_reg_loss data_dict['size_cls_loss'] = size_cls_loss data_dict['size_reg_loss'] = size_reg_loss data_dict['sem_cls_loss'] = sem_cls_loss box_loss = center_loss + 0.1 * heading_cls_loss + heading_reg_loss + 0.1 * size_cls_loss + size_reg_loss data_dict['box_loss'] = box_loss if reference: # Reference loss ref_loss, lang_loss, cluster_preds_scores, cluster_labels = compute_reference_loss( data_dict, config, use_lang_classifier, use_max_iou) data_dict["ref_loss"] = ref_loss data_dict["lang_loss"] = lang_loss objectness_preds_batch = torch.argmax(data_dict['objectness_scores'], 2).long() objectness_labels_batch = objectness_label.long() if post_processing: _ = parse_predictions(data_dict, post_processing) nms_masks = torch.LongTensor(data_dict['pred_mask']).cuda() # construct valid mask pred_masks = (nms_masks * objectness_preds_batch == 1).float() label_masks = (objectness_labels_batch == 1).float() else: # construct valid mask pred_masks = (objectness_preds_batch == 1).float() label_masks = (objectness_labels_batch == 1).float() data_dict["pred_mask"] = pred_masks data_dict["label_mask"] = label_masks cluster_preds = torch.argmax(cluster_preds_scores * pred_masks, 1).long().unsqueeze(1).repeat( 1, pred_masks.shape[1]) preds = torch.zeros(pred_masks.shape).cuda() preds = preds.scatter_(1, cluster_preds, 1) cluster_preds = preds cluster_labels = cluster_labels.float() cluster_labels *= label_masks # compute classification scores corrects = torch.sum((cluster_preds == 1) * (cluster_labels == 1), dim=1).float() labels = torch.ones(corrects.shape[0]).cuda() ref_acc = corrects / (labels + 1e-8) # store data_dict["ref_acc"] = ref_acc.cpu().numpy().tolist() # compute localization metrics pred_ref = torch.argmax( data_dict['cluster_ref'] * data_dict['pred_mask'], 1).detach().cpu().numpy() # (B,) pred_center = data_dict['center'].detach().cpu().numpy() # (B,K,3) pred_heading_class = torch.argmax(data_dict['heading_scores'], -1) # B,num_proposal pred_heading_residual = torch.gather( data_dict['heading_residuals'], 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 pred_heading_class = pred_heading_class.detach().cpu().numpy( ) # B,num_proposal pred_heading_residual = pred_heading_residual.squeeze( 2).detach().cpu().numpy() # B,num_proposal pred_size_class = torch.argmax(data_dict['size_scores'], -1) # B,num_proposal pred_size_residual = torch.gather( data_dict['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat( 1, 1, 1, 3)) # B,num_proposal,1,3 pred_size_class = pred_size_class.detach().cpu().numpy() pred_size_residual = pred_size_residual.squeeze( 2).detach().cpu().numpy() # B,num_proposal,3 gt_ref = torch.argmax(data_dict["ref_box_label"], 1).detach().cpu().numpy() gt_center = data_dict['center_label'].cpu().numpy( ) # (B,MAX_NUM_OBJ,3) gt_heading_class = data_dict['heading_class_label'].cpu().numpy( ) # B,K2 gt_heading_residual = data_dict['heading_residual_label'].cpu().numpy( ) # B,K2 gt_size_class = data_dict['size_class_label'].cpu().numpy() # B,K2 gt_size_residual = data_dict['size_residual_label'].cpu().numpy( ) # B,K2,3 ious = [] multiple = [] for i in range(pred_ref.shape[0]): # compute the iou pred_ref_idx, gt_ref_idx = pred_ref[i], gt_ref[i] pred_obb = config.param2obb(pred_center[i, pred_ref_idx, 0:3], pred_heading_class[i, pred_ref_idx], pred_heading_residual[i, pred_ref_idx], pred_size_class[i, pred_ref_idx], pred_size_residual[i, pred_ref_idx]) gt_obb = config.param2obb(gt_center[i, gt_ref_idx, 0:3], gt_heading_class[i, gt_ref_idx], gt_heading_residual[i, gt_ref_idx], gt_size_class[i, gt_ref_idx], gt_size_residual[i, gt_ref_idx]) pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3]) gt_bbox = get_3d_box(gt_obb[3:6], gt_obb[6], gt_obb[0:3]) iou, _ = box3d_iou(pred_bbox, gt_bbox) ious.append(iou) # construct the multiple mask num_bbox = data_dict["num_bbox"][i] sem_cls_label = data_dict["sem_cls_label"][i] sem_cls_label[num_bbox:] -= 1 num_choices = torch.sum( data_dict["object_cat"][i] == sem_cls_label) if num_choices > 1: multiple.append(1) else: multiple.append(0) # store data_dict["ref_iou"] = ious data_dict["ref_iou_rate_0.25"] = np.array(ious)[ np.array(ious) >= 0.25].shape[0] / np.array(ious).shape[0] data_dict["ref_iou_rate_0.5"] = np.array(ious)[ np.array(ious) >= 0.5].shape[0] / np.array(ious).shape[0] data_dict["ref_multiple_mask"] = multiple else: ref_loss = torch.zeros(1)[0].cuda() lang_loss = torch.zeros(1)[0].cuda() # Final loss function if use_max_iou: loss = vote_loss + 0.5 * objectness_loss + box_loss + 0.1 * sem_cls_loss + 0.1 * ref_loss + lang_loss else: loss = vote_loss + 0.5 * objectness_loss + box_loss + 0.1 * sem_cls_loss + 0.01 * ref_loss + lang_loss loss *= 10 # amplify data_dict['loss'] = loss # -------------------------------------------- # Some other statistics obj_pred_val = torch.argmax(data_dict['objectness_scores'], 2) # B,K obj_acc = torch.sum((obj_pred_val == objectness_label.long()).float() * objectness_mask) / (torch.sum(objectness_mask) + 1e-6) data_dict['obj_acc'] = obj_acc # precision, recall, f1 corrects = torch.sum((obj_pred_val == 1) * (objectness_label == 1), dim=1).float() preds = torch.sum(obj_pred_val == 1, dim=1).float() labels = torch.sum(objectness_label == 1, dim=1).float() precisions = corrects / (labels + 1e-8) recalls = corrects / (preds + 1e-8) f1s = 2 * precisions * recalls / (precisions + recalls + 1e-8) data_dict["objectness_precision"] = precisions.cpu().numpy().tolist() data_dict["objectness_recall"] = recalls.cpu().numpy().tolist() data_dict["objectness_f1"] = f1s.cpu().numpy().tolist() # lang if use_lang_classifier: data_dict["lang_acc"] = (torch.argmax( data_dict['lang_scores'], 1) == data_dict["object_cat"]).float().mean() else: data_dict["lang_acc"] = torch.zeros(1)[0].cuda() return loss, data_dict