Пример #1
0
def learn_size_bin(category_specific=True):
    """
        Learn the size distribution and generating size bins for regression

        Parameters
        ----------
        category_specific: bool
            indicates whether learn the size conditional on the category

        Returns
        -------
        size_bins: dict
            Keys: {'category'}
            The bins for each category
    """
    if category_specific:
        N_CLUSTER = 8
    else:
        N_CLUSTER = 16
    size_bins = dict()
    size_info = dict()
    for i in range(0, 10335):
        sequence = readsunrgbdframe(image_id=i + 1)
        for bdb3d in sequence.bdb3d:
            classname = bdb3d['classname'][0]
            coeffs = bdb3d['coeffs'][0]
            if classname not in size_info:
                size_info[classname] = list()
            size_info[classname].append(coeffs)
    if category_specific:
        size_avg = dict()
        for category in size_info.keys():
            size_array = np.array(size_info[category])
            mean_size = size_array.mean(axis=0)
            if category in OBJ_CATEGORY_CLEAN:
                size_avg[category] = mean_size
            m = size_array.shape[0]
            if m <= N_CLUSTER:
                kmeans = KMeans(n_clusters=int(m/2)+1, random_state=0).fit(size_array)
            else:
                kmeans = KMeans(n_clusters=N_CLUSTER, random_state=0).fit(size_info[category])
            size_bins[category] = kmeans.cluster_centers_
            print category, size_bins[category]
        with open(op.join(PATH.metadata_root, 'size_bin_category.pickle'), 'w') as f:
            pickle.dump(size_bins, f)
        f.close()
        with open(op.join(PATH.metadata_root, 'size_avg_category.pickle'), 'w') as f:
            pickle.dump(size_avg, f)
        f.close()
        print size_avg
    else:
        size_array = list()
        for key, value in size_info.items():
            size_array.extend(value)
        size_array = np.array(size_array)
        kmeans = KMeans(n_clusters=N_CLUSTER, random_state=0).fit(size_array)
        with open(op.join(PATH.metadata_root, 'size_bin_full.pickle'), 'w') as f:
            pickle.dump(kmeans.cluster_centers_, f)
        print kmeans.cluster_centers_
Пример #2
0
def prepare_data(gt_2d_bdb=False, patch_h=224, patch_w=224, shift=True, iou_threshold=0.1):
    """
        Generating the ground truth for end-to-end training

        Parameters
        ----------
        gt_2d_bdb : bool
            indicates whether to use the ground truth of 2D bounding boxes
        patch_h: int
            the height of target resized patch
        patch_w: int
            the width of target resized potch
        iou_threshold : float
            iou threshold for two 2D bounding boxes
    """
    bin = PATH.bins()
    data_root = op.join(PATH.metadata_root, 'sunrgbd_train_test_data')
    train_path = list()
    test_path = list()
    layout_centroid = list()
    layout_coeffs = list()
    # obj_category = dict()
    if not op.exists(data_root):
        os.mkdir(data_root)
    for i in range(10335):
        sequence = readsunrgbdframe(image_id=i+1)
        print i+1
        sequence._R_tilt = loadmat(op.join(PATH.metadata_root, 'updated_rtilt', str(i+1) + '.mat'))['r_tilt']
        # R_ex is cam to world
        sequence._R_ex = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]).dot(sequence.R_tilt).dot(np.array([[1, 0, 0], [0, 0, 1], [0, -1, 0]]))
        K = sequence.K
        result = []
        for bdb2d in sequence.bdb2d:
            if check_bdb(bdb2d, 2*sequence.K[0, 2], 2*sequence.K[1, 2]):
                result.append(bdb2d)
            else:
                print 'ground truth not valid'
        sequence._bdb2d = result

        bdb2d_from_3d_list = []
        with open(op.join(PATH.metadata_root, '2dbdb', str(i + 1) + '.json'), 'r') as f:
            detected_bdbs = json.load(f)
        f.close()
        boxes = list()
        for bdb3d in sequence.bdb3d:
            center = bdb3d['centroid'][0]
            coeffs = bdb3d['coeffs'][0]
            basis = bdb3d['basis'].astype('float32')
            if bdb3d['classname'][0] not in OBJ_CATEGORY_CLEAN:
                continue
            bdb2d_from_3d = project_struct_bdb_to_2d(basis, coeffs, center, sequence.R_ex.T, K)
            projected_2d_center = project_3d_points_to_2d(center.reshape(1, 3), sequence.R_ex.T, K)
            if bdb2d_from_3d is None:
                print '%s not valid' % (bdb3d['classname'][0])
                continue
            bdb2d_from_3d['classname'] = bdb3d['classname'][0]
            bdb2d_from_3d_list.append(bdb2d_from_3d)
            if gt_2d_bdb is True:
                max_iou = 0
                iou_ind = -1
                for j, bdb2d in enumerate(sequence.bdb2d):
                    if bdb2d['classname'] == bdb3d['classname'][0]:
                        iou = get_iou(bdb2d_from_3d, bdb2d)
                        if iou > iou_threshold and iou > max_iou:
                            iou_ind = j
                            max_iou = iou
                if iou_ind >= 0:
                    if shift:
                        shifted_box = random_shift_2d_box(sequence.bdb2d[iou_ind])
                        boxes.append({'2dbdb': shifted_box, '3dbdb': bdb3d,
                                      'projected_2d_center': projected_2d_center})
                    else:
                        boxes.append({'2dbdb': sequence.bdb2d[iou_ind], '3dbdb': bdb3d, 'projected_2d_center': projected_2d_center})
            else:
                max_iou = 0
                iou_ind = -1
                max_bdb = dict()
                for j, bdb2d in enumerate(detected_bdbs):
                    if bdb2d['class'] == bdb3d['classname'][0]:
                        box = bdb2d['bbox']
                        box = {'x1': box[0], 'y1': box[1], 'x2': box[2], 'y2': box[3]}
                        iou = get_iou(bdb2d_from_3d, box)
                        if iou > iou_threshold and iou > max_iou:
                            iou_ind = j
                            max_iou = iou
                            box['score'] = bdb2d['score']
                            box['classname'] = bdb2d['class']
                            max_bdb = box
                if iou_ind >= 0:
                    # print max_iou, bdb2d_from_3d, detected_bdbs[iou_ind]
                    if shift:
                        shifted_box = random_shift_2d_box(max_bdb)
                        boxes.append({'2dbdb': shifted_box, '3dbdb': bdb3d, 'projected_2d_center': projected_2d_center})
                    else:
                        boxes.append({'2dbdb': max_bdb, '3dbdb': bdb3d, 'projected_2d_center': projected_2d_center})
        # print boxes
        camera = dict()
        camera_flip = dict()
        camera['yaw_cls'], camera['yaw_reg'], camera['roll_cls'], camera['roll_reg'] = camera_cls_reg(sequence.R_ex.T, bin)
        camera['K'] = sequence.K
        # flip the camera
        camera_flip['yaw_cls'], camera_flip['yaw_reg'], camera_flip['roll_cls'], camera_flip['roll_reg'] = camera_cls_reg(sequence.R_ex.T, bin, flip=True)
        camera_flip['K'] = sequence.K
        template_path = op.join(PATH.metadata_root, 'size_avg_category.pickle')
        layout_pts = loadmat(op.join(PATH.metadata_root, '3dlayout', str(i+1) + '.mat'))['manhattan_layout'].T
        l_centroid, l_basis, l_coeffs = get_bdb_from_corners(layout_pts)
        # print l_centroid
        layout_centroid.append(l_centroid)
        layout_coeffs.append(l_coeffs)
        layout = dict()
        layout['centroid_reg'] = layout_centroid_avg_residual(l_centroid, bin['layout_centroid_avg'], bin['layout_normalize'])
        layout['coeffs_reg'] = layout_size_avg_residual(l_coeffs, bin['layout_coeffs_avg'])
        layout['ori_cls'], layout['ori_reg'] = ori_cls_reg(l_basis[1, :], bin, layout=True)
        layout_flip = dict()
        layout_flip['centroid_reg'] = layout_centroid_avg_residual(l_centroid, bin['layout_centroid_avg'], bin['layout_normalize'], flip=True)
        layout_flip['coeffs_reg'] = layout_size_avg_residual(l_coeffs, bin['layout_coeffs_avg'])
        layout_flip['ori_cls'], layout_flip['ori_reg'] = ori_cls_reg(l_basis[1, :], bin, layout=True, flip=True)
        # print layout['ori_cls'], layout_flip['ori_cls']
        # clean the ground truth
        with open(template_path, 'r') as f:
            size_template = pickle.load(f)
        f.close()
        boxes_out = list()
        boxes_out_flip = list()
        for box in boxes:
            box_set = dict()
            # box_set['ori_cls'], box_set['ori_reg'] = ori_cls_reg(box['3dbdb']['orientation'])
            box_set['ori_cls'], box_set['ori_reg'] = ori_cls_reg(box['3dbdb']['basis'][1, :], bin)
            # print box['3dbdb']['basis']
            # print basis_from_ori(num_from_bins(bin['ori_bin'], box_set['ori_cls'], box_set['ori_reg']))
            box_set['size_reg'] = size_avg_residual(box['3dbdb']['coeffs'][0], size_template, box['2dbdb']['classname'])
            box_set['bdb3d'] = get_corners_of_bb3d_no_index(box['3dbdb']['basis'], box['3dbdb']['coeffs'][0], box['3dbdb']['centroid'][0])
            box_set['x_cls'], box_set['x_reg'], box_set['y_cls'], box_set['y_reg'], box_set['z_cls'], box_set['z_reg'] = centroid_cls_reg(box['3dbdb']['centroid'][0], bin)
            box_set['bdb_pos'] = [box['2dbdb']['x1'], box['2dbdb']['y1'], box['2dbdb']['x2'], box['2dbdb']['y2']]
            box_set['bdb2d'] = [box['2dbdb']['x1'] / float(K[0, 2]), box['2dbdb']['y1'] / float(K[1, 2]), box['2dbdb']['x2'] / float(K[0, 2]), box['2dbdb']['y2'] / float(K[1, 2])]
            box_set['centroid_cls'], box_set['centroid_reg'] = bin_cls_reg(bin['centroid_bin'], np.linalg.norm(box['3dbdb']['centroid'][0]))
            delta_2d = list()
            delta_2d.append(((box_set['bdb_pos'][0] + box_set['bdb_pos'][2]) / 2 - box['projected_2d_center'][0][0]) / (box_set['bdb_pos'][2] - box_set['bdb_pos'][0]))
            delta_2d.append(((box_set['bdb_pos'][1] + box_set['bdb_pos'][3]) / 2 - box['projected_2d_center'][1][0]) / (box_set['bdb_pos'][3] - box_set['bdb_pos'][1]))
            box_set['delta_2d'] = delta_2d
            box_set['size_cls'] = OBJ_CATEGORY_CLEAN.index(box['2dbdb']['classname'])
            # print box_set['size_cls']
            # print box['2dbdb']['classname']
            boxes_out.append(box_set)
            # print box_set['3dbdb']['classname'], box_set['ori_cls'], box_set['ori_reg'], box_set['size_reg'], box_set['size_cls'], box_set['size_reg']
            # flip the boxes
            box_set_flip = dict()
            # box_set_flip['ori_cls'], box_set_flip['ori_reg'] = ori_cls_reg(box['3dbdb']['orientation'], flip=True)
            box_set_flip['ori_cls'], box_set_flip['ori_reg'] = ori_cls_reg(box['3dbdb']['basis'][1, :], bin, flip=True)
            box_set_flip['size_reg'] = size_avg_residual(box['3dbdb']['coeffs'][0], size_template, box['2dbdb']['classname'])
            box_set_flip['x_cls'], box_set_flip['x_reg'], box_set_flip['y_cls'], box_set_flip['y_reg'], box_set_flip['z_cls'], box_set_flip['z_reg'] = centroid_cls_reg(box['3dbdb']['centroid'][0], bin, flip=True)
            box_set_flip['centroid_cls'], box_set_flip['centroid_reg'] = bin_cls_reg(bin['centroid_bin'], np.linalg.norm(box['3dbdb']['centroid'][0]))
            box_set_flip['bdb_pos'] = [int(2 * K[0, 2] - box['2dbdb']['x2']), box['2dbdb']['y1'], int(2 * K[0, 2] - box['2dbdb']['x1']), box['2dbdb']['y2']]
            box_set_flip['bdb2d'] = [int(2 * K[0, 2] - box['2dbdb']['x2']) / float(K[0, 2]), box['2dbdb']['y1'] / float(K[1, 2]),
                                       int(2 * K[0, 2] - box['2dbdb']['x1']) / float(K[0, 2]), box['2dbdb']['y2'] / float(K[1, 2])]
            box_set_flip['size_cls'] = OBJ_CATEGORY_CLEAN.index(box['2dbdb']['classname'])
            coeffs_flip = size_from_template(box_set_flip['size_reg'], size_template, OBJ_CATEGORY_CLEAN[box_set_flip['size_cls']])
            centroid_flip = np.array([num_from_bins(bin['x_bin'], box_set_flip['x_cls'], box_set_flip['x_reg']), num_from_bins(bin['y_bin'], box_set_flip['y_cls'], box_set_flip['y_reg']), num_from_bins(bin['z_bin'], box_set_flip['z_cls'], box_set_flip['z_reg'])])
            basis_flip = basis_from_ori(num_from_bins(bin['ori_bin'], box_set_flip['ori_cls'], box_set_flip['ori_reg']))
            box_set_flip['bdb3d'] = get_corners_of_bb3d(basis_flip, coeffs_flip, centroid_flip)
            delta_2d_flip = [- delta_2d[0], delta_2d[1]]
            box_set_flip['delta_2d'] = delta_2d_flip
            # print box_set['delta_2d'], box_set_flip['delta_2d']
            boxes_out_flip.append(box_set_flip)
        if len(boxes_out) == 0:
            continue
        data = dict()
        data['rgb_path'] = op.join(PATH.metadata_root, 'images', '%06d.jpg' % (i+1))
        data['boxes'] = list_of_dict_to_dict_of_list(boxes_out)
        data['camera'] = camera
        data['layout'] = layout
        data['sequence_id'] = i + 1
        # fliped data
        data_flip = dict()
        data_flip['rgb_path'] = op.join(PATH.metadata_root, 'images', '%06d_flip.jpg' % (i+1))
        # img_flip = Image.open(data['rgb_path']).transpose(Image.FLIP_LEFT_RIGHT)
        # img_flip.save(data_flip['rgb_path'])
        data_flip['boxes'] = list_of_dict_to_dict_of_list(boxes_out_flip)
        data_flip['camera'] = camera_flip
        data_flip['layout'] = layout_flip
        data_flip['sequence_id'] = i + 1
        if shift:
            save_path = op.join(PATH.metadata_root, 'sunrgbd_train_test_data', str(i+1) + '_shift_5' + '.pickle')
            save_path_flip = op.join(PATH.metadata_root, 'sunrgbd_train_test_data', str(i+1) + '_flip' + '_shift_5' + '.pickle')
        else:
            save_path = op.join(PATH.metadata_root, 'sunrgbd_train_test_data', str(i + 1) + '.pickle')
            save_path_flip = op.join(PATH.metadata_root, 'sunrgbd_train_test_data', str(i + 1) + '_flip' + '.pickle')
        if (i + 1) <= 5050:
            test_path.append(save_path)
        else:
            train_path.append(save_path)
        with open(save_path, 'w') as f:
            pickle.dump(data, f)
        f.close()
        with open(save_path_flip, 'w') as f:
            pickle.dump(data_flip, f)
        f.close()
    print np.array(layout_centroid).mean(axis=0)
    print np.array(layout_coeffs).mean(axis=0)
    if not shift:
        with open(op.join(PATH.metadata_root, 'train.json'), 'w') as f:
            json.dump(train_path, f)
        f.close()
        with open(op.join(PATH.metadata_root, 'test.json'), 'w') as f:
            json.dump(test_path, f)
        f.close()