예제 #1
0
def get_iou(bb1, bb2):
    """ Compute IoU of two bounding boxes.
        ** Define your bod IoU function HERE **
    """

    iou3d, iou2d = box3d_iou(bb1, bb2)
    return iou3d
예제 #2
0
def eval_ref_one_sample(pred_bbox, gt_bbox):
    """ Evaluate one reference prediction

    Args:
        pred_bbox: 8 corners of prediction bounding box, (8, 3)
        gt_bbox: 8 corners of ground truth bounding box, (8, 3)
    Returns:
        iou: intersection over union score
    """

    iou = box3d_iou(pred_bbox, gt_bbox)

    return iou
예제 #3
0
def align_predictions_groundtruths(batch_pred_corners_3d, batch_gt_corners_3d, end_points, iou_threshold=0.5):
    """

    Args:
        batch_pred_corners_3d: ndarray (num_batch, num_proposals, 8, 3)
            predicted bounding boxes (represented by 8 corner points) in upright_camera coordinate
        batch_gt_corners_3d: ndarray (num_batch, MAX_NUM_OBJ, 8, 3)
            ground truth bounding boxes (represented by 8 corner points) in upright_camera coordinate
        end_points: dict
            {box_label_mask, ...}
    Returns:
        batch_gt_corners_3d_aligned: ndarray (num_batch, num_proposals, 8, 3)
            clostest ground truth bounding boxes corresponding to each predicted bbox
        batch_confidence_scores: ndarray (num_batch, num_proposals, 1), value is 0 or 1
            the fitness between each predicted bbox and gt bbox, if the overlap larger than threshold, fitness is 1
        batch_sem_cls_labels: ndarray  (num_batch, num_proposals), value is [0, num_class-1]
            the semantic class of the aligned ground truth bboxes
    """
    bsize = batch_pred_corners_3d.shape[0]
    num_proposal = batch_pred_corners_3d.shape[1]
    box_label_mask = end_points['box_label_mask'].detach().cpu().numpy()
    sem_cls_label = end_points['sem_cls_label'].detach().cpu().numpy()

    batch_sem_cls_labels = np.zeros((bsize, num_proposal,1), dtype=np.int64)
    batch_confidence_scores = np.zeros((bsize, num_proposal,1), dtype=np.float32)
    batch_gt_corners_3d_aligned = np.zeros((bsize, num_proposal, 8, 3), dtype=np.float32)

    for i in range(bsize):
        cur_mask = np.nonzero(box_label_mask[i])
        gt_corners_3d = batch_gt_corners_3d[i][cur_mask]
        gt_classes = sem_cls_label[i][cur_mask]
        for j in range(num_proposal):
            BB = batch_pred_corners_3d[i,j,:,:]
            iou_list = []
            for BBGT in gt_corners_3d:
                iou, _ = box3d_iou(BB, BBGT)
                iou_list.append(iou)
            if len(iou_list) != 0:
                iou_list = np.array(iou_list)
                max_ind = np.argmax(iou_list)
                batch_gt_corners_3d_aligned[i,j,:,:] = gt_corners_3d[max_ind]
                batch_sem_cls_labels[i,j] = gt_classes[max_ind]
                if iou_list.max() >= iou_threshold:
                    batch_confidence_scores[i,j] = 1.

    return batch_gt_corners_3d_aligned, batch_confidence_scores, batch_sem_cls_labels
예제 #4
0
def get_iou_obb(bb1, bb2):
    iou3d = box3d_iou(bb1, bb2)
    return iou3d
예제 #5
0
def dump_results(args, scanrefer, data, config):
    dump_dir = os.path.join(CONF.PATH.OUTPUT, args.folder, "vis")
    os.makedirs(dump_dir, exist_ok=True)

    # from inputs
    ids = data['scan_idx'].detach().cpu().numpy()
    point_clouds = data['point_clouds'].cpu().numpy()
    batch_size = point_clouds.shape[0]

    pcl_color = data["pcl_color"].detach().cpu().numpy()
    if args.use_color:
        pcl_color = (pcl_color * 256 + MEAN_COLOR_RGB).astype(np.int64)

    # from network outputs
    # detection
    pred_objectness = torch.argmax(data['objectness_scores'],
                                   2).float().detach().cpu().numpy()
    pred_center = data['center'].detach().cpu().numpy()  # (B,K,3)
    pred_heading_class = torch.argmax(data['heading_scores'],
                                      -1)  # B,num_proposal
    pred_heading_residual = torch.gather(
        data['heading_residuals'], 2,
        pred_heading_class.unsqueeze(-1))  # B,num_proposal,1
    pred_heading_class = pred_heading_class.detach().cpu().numpy(
    )  # B,num_proposal
    pred_heading_residual = pred_heading_residual.squeeze(
        2).detach().cpu().numpy()  # B,num_proposal
    pred_size_class = torch.argmax(data['size_scores'], -1)  # B,num_proposal
    pred_size_residual = torch.gather(
        data['size_residuals'], 2,
        pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(
            1, 1, 1, 3))  # B,num_proposal,1,3
    pred_size_residual = pred_size_residual.squeeze(
        2).detach().cpu().numpy()  # B,num_proposal,3
    # reference
    pred_ref_scores = data["cluster_ref"].detach().cpu().numpy()
    pred_ref_scores_softmax = F.softmax(
        data["cluster_ref"] *
        torch.argmax(data['objectness_scores'], 2).float() * data['pred_mask'],
        dim=1).detach().cpu().numpy()
    # post-processing
    nms_masks = data['pred_mask'].detach().cpu().numpy()  # B,num_proposal

    # ground truth
    gt_center = data['center_label'].cpu().numpy()  # (B,MAX_NUM_OBJ,3)
    gt_heading_class = data['heading_class_label'].cpu().numpy()  # B,K2
    gt_heading_residual = data['heading_residual_label'].cpu().numpy()  # B,K2
    gt_size_class = data['size_class_label'].cpu().numpy()  # B,K2
    gt_size_residual = data['size_residual_label'].cpu().numpy()  # B,K2,3
    # reference
    gt_ref_labels = data["ref_box_label"].detach().cpu().numpy()

    for i in range(batch_size):
        # basic info
        idx = ids[i]
        scene_id = scanrefer[idx]["scene_id"]
        object_id = scanrefer[idx]["object_id"]
        object_name = scanrefer[idx]["object_name"]
        ann_id = scanrefer[idx]["ann_id"]

        # scene_output
        scene_dump_dir = os.path.join(dump_dir, scene_id)
        if not os.path.exists(scene_dump_dir):
            os.mkdir(scene_dump_dir)

            # # Dump the original scene point clouds
            mesh = align_mesh(scene_id)
            mesh.write(os.path.join(scene_dump_dir, 'mesh.ply'))

            write_ply_rgb(point_clouds[i], pcl_color[i],
                          os.path.join(scene_dump_dir, 'pc.ply'))

        # filter out the valid ground truth reference box
        assert gt_ref_labels[i].shape[0] == gt_center[i].shape[0]
        gt_ref_idx = np.argmax(gt_ref_labels[i], 0)

        # visualize the gt reference box
        # NOTE: for each object there should be only one gt reference box
        object_dump_dir = os.path.join(
            dump_dir, scene_id, "gt_{}_{}.ply".format(object_id, object_name))
        gt_obb = config.param2obb(gt_center[i, gt_ref_idx,
                                            0:3], gt_heading_class[i,
                                                                   gt_ref_idx],
                                  gt_heading_residual[i, gt_ref_idx],
                                  gt_size_class[i, gt_ref_idx],
                                  gt_size_residual[i, gt_ref_idx])
        gt_bbox = get_3d_box(gt_obb[3:6], gt_obb[6], gt_obb[0:3])

        if not os.path.exists(object_dump_dir):
            write_bbox(
                gt_obb, 0,
                os.path.join(scene_dump_dir,
                             'gt_{}_{}.ply'.format(object_id, object_name)))

        # find the valid reference prediction
        pred_masks = nms_masks[i] * pred_objectness[i] == 1
        assert pred_ref_scores[i].shape[0] == pred_center[i].shape[0]
        pred_ref_idx = np.argmax(pred_ref_scores[i] * pred_masks, 0)
        assigned_gt = torch.gather(
            data["ref_box_label"], 1,
            data["object_assignment"]).detach().cpu().numpy()

        # visualize the predicted reference box
        pred_obb = config.param2obb(pred_center[i, pred_ref_idx, 0:3],
                                    pred_heading_class[i, pred_ref_idx],
                                    pred_heading_residual[i, pred_ref_idx],
                                    pred_size_class[i, pred_ref_idx],
                                    pred_size_residual[i, pred_ref_idx])
        pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3])
        iou = box3d_iou(gt_bbox, pred_bbox)

        write_bbox(
            pred_obb, 1,
            os.path.join(
                scene_dump_dir, 'pred_{}_{}_{}_{:.5f}_{:.5f}.ply'.format(
                    object_id, object_name, ann_id,
                    pred_ref_scores_softmax[i, pred_ref_idx], iou)))
예제 #6
0
def evaluate(args):
    pred_path = os.path.join(CONF.PATH.OUTPUT, args.folder, "pred.json")
    if not os.path.isfile(pred_path):
        print(
            "please run `benchmark/predict.py` first to generate bounding boxes"
        )
        exit()

    organized_gt = organize_gt()

    with open(pred_path) as f:
        predictions = json.load(f)
        ious = []
        masks = []
        others = []
        print("evaluating...")
        for data in tqdm(predictions):
            scene_id = data["scene_id"]
            object_id = data["object_id"]
            ann_id = data["ann_id"]
            pred_bbox = np.array(data["bbox"])
            mask = data["unique_multiple"]
            other = data["others"]

            try:
                gt_bbox = np.array(
                    organized_gt[scene_id][object_id][ann_id]["bbox"])
                # iou, _ = box3d_iou(pred_bbox, gt_bbox)
                iou = box3d_iou(pred_bbox, gt_bbox)

            except KeyError:
                iou = 0

            ious.append(iou)
            masks.append(mask)
            others.append(other)

        # ious = np.array(ious)
        # iou_rate_025 = ious[ious >= 0.25].shape[0] / ious.shape[0]
        # iou_rate_05 = ious[ious >= 0.5].shape[0] / ious.shape[0]

        # print("\[email protected]: {}".format(iou_rate_025))
        # print("[email protected]: {}".format(iou_rate_05))

        ious = np.array(ious)
        masks = np.array(masks)
        others = np.array(others)

        multiple_dict = {"unique": 0, "multiple": 1}
        others_dict = {"not_in_others": 0, "in_others": 1}

        # evaluation stats
        stats = {k: np.sum(masks == v) for k, v in multiple_dict.items()}
        stats["overall"] = masks.shape[0]
        stats = {}
        for k, v in multiple_dict.items():
            stats[k] = {}
            for k_o, v_o in others_dict.items():
                stats[k][k_o] = np.sum(
                    np.logical_and(masks == v, others == v_o))

            stats[k]["overall"] = np.sum(masks == v)

        stats["overall"] = {}
        for k_o, v_o in others_dict.items():
            stats["overall"][k_o] = np.sum(others == v_o)

        stats["overall"]["overall"] = masks.shape[0]

        # aggregate scores
        scores = {}
        for k, v in multiple_dict.items():
            for k_o in others_dict.keys():
                acc_025iou = ious[np.logical_and(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o]), ious >= 0.25)].shape[0] \
                    / ious[np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])].shape[0] \
                    if np.sum(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])) > 0 else 0
                acc_05iou = ious[np.logical_and(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o]), ious >= 0.5)].shape[0] \
                    / ious[np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])].shape[0] \
                    if np.sum(np.logical_and(masks == multiple_dict[k], others == others_dict[k_o])) > 0 else 0

                if k not in scores:
                    scores[k] = {k_o: {} for k_o in others_dict.keys()}

                scores[k][k_o]["[email protected]"] = acc_025iou
                scores[k][k_o]["[email protected]"] = acc_05iou

            acc_025iou = ious[np.logical_and(masks == multiple_dict[k], ious >= 0.25)].shape[0] \
                / ious[masks == multiple_dict[k]].shape[0] if np.sum(masks == multiple_dict[k]) > 0 else 0
            acc_05iou = ious[np.logical_and(masks == multiple_dict[k], ious >= 0.5)].shape[0] \
                / ious[masks == multiple_dict[k]].shape[0] if np.sum(masks == multiple_dict[k]) > 0 else 0

            scores[k]["overall"] = {}
            scores[k]["overall"]["[email protected]"] = acc_025iou
            scores[k]["overall"]["[email protected]"] = acc_05iou

        scores["overall"] = {}
        for k_o in others_dict.keys():
            acc_025iou = ious[np.logical_and(others == others_dict[k_o], ious >= 0.25)].shape[0] \
                / ious[others == others_dict[k_o]].shape[0] if np.sum(others == others_dict[k_o]) > 0 else 0
            acc_05iou = ious[np.logical_and(others == others_dict[k_o], ious >= 0.5)].shape[0] \
                / ious[others == others_dict[k_o]].shape[0] if np.sum(others == others_dict[k_o]) > 0 else 0

            # aggregate
            scores["overall"][k_o] = {}
            scores["overall"][k_o]["[email protected]"] = acc_025iou
            scores["overall"][k_o]["[email protected]"] = acc_05iou

        acc_025iou = ious[ious >= 0.25].shape[0] / ious.shape[0]
        acc_05iou = ious[ious >= 0.5].shape[0] / ious.shape[0]

        # aggregate
        scores["overall"]["overall"] = {}
        scores["overall"]["overall"]["[email protected]"] = acc_025iou
        scores["overall"]["overall"]["[email protected]"] = acc_05iou

        # report
        print("\nstats:")
        for k_s in stats.keys():
            for k_o in stats[k_s].keys():
                print("{} | {}: {}".format(k_s, k_o, stats[k_s][k_o]))

        for k_s in scores.keys():
            print("\n{}:".format(k_s))
            for k_m in scores[k_s].keys():
                for metric in scores[k_s][k_m].keys():
                    print("{} | {} | {}: {}".format(k_s, k_m, metric,
                                                    scores[k_s][k_m][metric]))
예제 #7
0
def get_loss(data_dict,
             config,
             reference=False,
             use_lang_classifier=False,
             use_max_iou=False,
             post_processing=None):
    """ Loss functions

    Args:
        data_dict: dict
        config: dataset config instance
        reference: flag (False/True)
        post_processing: config dict
    Returns:
        loss: pytorch scalar tensor
        data_dict: dict
    """

    # Vote loss
    vote_loss = compute_vote_loss(data_dict)
    data_dict['vote_loss'] = vote_loss

    # Obj loss
    objectness_loss, objectness_label, objectness_mask, object_assignment = compute_objectness_loss(
        data_dict)
    data_dict['objectness_loss'] = objectness_loss
    data_dict['objectness_label'] = objectness_label
    data_dict['objectness_mask'] = objectness_mask
    data_dict['object_assignment'] = object_assignment
    total_num_proposal = objectness_label.shape[0] * objectness_label.shape[1]
    data_dict['pos_ratio'] = torch.sum(
        objectness_label.float().cuda()) / float(total_num_proposal)
    data_dict['neg_ratio'] = torch.sum(objectness_mask.float()) / float(
        total_num_proposal) - data_dict['pos_ratio']

    # Box loss and sem cls loss
    center_loss, heading_cls_loss, heading_reg_loss, size_cls_loss, size_reg_loss, sem_cls_loss = compute_box_and_sem_cls_loss(
        data_dict, config)
    data_dict['center_loss'] = center_loss
    data_dict['heading_cls_loss'] = heading_cls_loss
    data_dict['heading_reg_loss'] = heading_reg_loss
    data_dict['size_cls_loss'] = size_cls_loss
    data_dict['size_reg_loss'] = size_reg_loss
    data_dict['sem_cls_loss'] = sem_cls_loss
    box_loss = center_loss + 0.1 * heading_cls_loss + heading_reg_loss + 0.1 * size_cls_loss + size_reg_loss
    data_dict['box_loss'] = box_loss

    if reference:
        # Reference loss
        ref_loss, lang_loss, cluster_preds_scores, cluster_labels = compute_reference_loss(
            data_dict, config, use_lang_classifier, use_max_iou)
        data_dict["ref_loss"] = ref_loss
        data_dict["lang_loss"] = lang_loss

        objectness_preds_batch = torch.argmax(data_dict['objectness_scores'],
                                              2).long()
        objectness_labels_batch = objectness_label.long()

        if post_processing:
            _ = parse_predictions(data_dict, post_processing)
            nms_masks = torch.LongTensor(data_dict['pred_mask']).cuda()

            # construct valid mask
            pred_masks = (nms_masks * objectness_preds_batch == 1).float()
            label_masks = (objectness_labels_batch == 1).float()
        else:
            # construct valid mask
            pred_masks = (objectness_preds_batch == 1).float()
            label_masks = (objectness_labels_batch == 1).float()

        data_dict["pred_mask"] = pred_masks
        data_dict["label_mask"] = label_masks

        cluster_preds = torch.argmax(cluster_preds_scores * pred_masks,
                                     1).long().unsqueeze(1).repeat(
                                         1, pred_masks.shape[1])
        preds = torch.zeros(pred_masks.shape).cuda()
        preds = preds.scatter_(1, cluster_preds, 1)
        cluster_preds = preds
        cluster_labels = cluster_labels.float()
        cluster_labels *= label_masks

        # compute classification scores
        corrects = torch.sum((cluster_preds == 1) * (cluster_labels == 1),
                             dim=1).float()
        labels = torch.ones(corrects.shape[0]).cuda()

        ref_acc = corrects / (labels + 1e-8)

        # store
        data_dict["ref_acc"] = ref_acc.cpu().numpy().tolist()

        # compute localization metrics
        pred_ref = torch.argmax(
            data_dict['cluster_ref'] * data_dict['pred_mask'],
            1).detach().cpu().numpy()  # (B,)
        pred_center = data_dict['center'].detach().cpu().numpy()  # (B,K,3)
        pred_heading_class = torch.argmax(data_dict['heading_scores'],
                                          -1)  # B,num_proposal
        pred_heading_residual = torch.gather(
            data_dict['heading_residuals'], 2,
            pred_heading_class.unsqueeze(-1))  # B,num_proposal,1
        pred_heading_class = pred_heading_class.detach().cpu().numpy(
        )  # B,num_proposal
        pred_heading_residual = pred_heading_residual.squeeze(
            2).detach().cpu().numpy()  # B,num_proposal
        pred_size_class = torch.argmax(data_dict['size_scores'],
                                       -1)  # B,num_proposal
        pred_size_residual = torch.gather(
            data_dict['size_residuals'], 2,
            pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat(
                1, 1, 1, 3))  # B,num_proposal,1,3
        pred_size_class = pred_size_class.detach().cpu().numpy()
        pred_size_residual = pred_size_residual.squeeze(
            2).detach().cpu().numpy()  # B,num_proposal,3

        gt_ref = torch.argmax(data_dict["ref_box_label"],
                              1).detach().cpu().numpy()
        gt_center = data_dict['center_label'].cpu().numpy(
        )  # (B,MAX_NUM_OBJ,3)
        gt_heading_class = data_dict['heading_class_label'].cpu().numpy(
        )  # B,K2
        gt_heading_residual = data_dict['heading_residual_label'].cpu().numpy(
        )  # B,K2
        gt_size_class = data_dict['size_class_label'].cpu().numpy()  # B,K2
        gt_size_residual = data_dict['size_residual_label'].cpu().numpy(
        )  # B,K2,3

        ious = []
        multiple = []
        for i in range(pred_ref.shape[0]):
            # compute the iou
            pred_ref_idx, gt_ref_idx = pred_ref[i], gt_ref[i]
            pred_obb = config.param2obb(pred_center[i, pred_ref_idx, 0:3],
                                        pred_heading_class[i, pred_ref_idx],
                                        pred_heading_residual[i, pred_ref_idx],
                                        pred_size_class[i, pred_ref_idx],
                                        pred_size_residual[i, pred_ref_idx])
            gt_obb = config.param2obb(gt_center[i, gt_ref_idx, 0:3],
                                      gt_heading_class[i, gt_ref_idx],
                                      gt_heading_residual[i, gt_ref_idx],
                                      gt_size_class[i, gt_ref_idx],
                                      gt_size_residual[i, gt_ref_idx])
            pred_bbox = get_3d_box(pred_obb[3:6], pred_obb[6], pred_obb[0:3])
            gt_bbox = get_3d_box(gt_obb[3:6], gt_obb[6], gt_obb[0:3])
            iou, _ = box3d_iou(pred_bbox, gt_bbox)
            ious.append(iou)

            # construct the multiple mask
            num_bbox = data_dict["num_bbox"][i]
            sem_cls_label = data_dict["sem_cls_label"][i]
            sem_cls_label[num_bbox:] -= 1
            num_choices = torch.sum(
                data_dict["object_cat"][i] == sem_cls_label)
            if num_choices > 1:
                multiple.append(1)
            else:
                multiple.append(0)

        # store
        data_dict["ref_iou"] = ious
        data_dict["ref_iou_rate_0.25"] = np.array(ious)[
            np.array(ious) >= 0.25].shape[0] / np.array(ious).shape[0]
        data_dict["ref_iou_rate_0.5"] = np.array(ious)[
            np.array(ious) >= 0.5].shape[0] / np.array(ious).shape[0]
        data_dict["ref_multiple_mask"] = multiple
    else:
        ref_loss = torch.zeros(1)[0].cuda()
        lang_loss = torch.zeros(1)[0].cuda()

    # Final loss function
    if use_max_iou:
        loss = vote_loss + 0.5 * objectness_loss + box_loss + 0.1 * sem_cls_loss + 0.1 * ref_loss + lang_loss
    else:
        loss = vote_loss + 0.5 * objectness_loss + box_loss + 0.1 * sem_cls_loss + 0.01 * ref_loss + lang_loss

    loss *= 10  # amplify

    data_dict['loss'] = loss

    # --------------------------------------------
    # Some other statistics
    obj_pred_val = torch.argmax(data_dict['objectness_scores'], 2)  # B,K
    obj_acc = torch.sum((obj_pred_val == objectness_label.long()).float() *
                        objectness_mask) / (torch.sum(objectness_mask) + 1e-6)
    data_dict['obj_acc'] = obj_acc
    # precision, recall, f1
    corrects = torch.sum((obj_pred_val == 1) * (objectness_label == 1),
                         dim=1).float()
    preds = torch.sum(obj_pred_val == 1, dim=1).float()
    labels = torch.sum(objectness_label == 1, dim=1).float()
    precisions = corrects / (labels + 1e-8)
    recalls = corrects / (preds + 1e-8)
    f1s = 2 * precisions * recalls / (precisions + recalls + 1e-8)
    data_dict["objectness_precision"] = precisions.cpu().numpy().tolist()
    data_dict["objectness_recall"] = recalls.cpu().numpy().tolist()
    data_dict["objectness_f1"] = f1s.cpu().numpy().tolist()
    # lang
    if use_lang_classifier:
        data_dict["lang_acc"] = (torch.argmax(
            data_dict['lang_scores'],
            1) == data_dict["object_cat"]).float().mean()
    else:
        data_dict["lang_acc"] = torch.zeros(1)[0].cuda()

    return loss, data_dict