def learn_size_bin(category_specific=True): """ Learn the size distribution and generating size bins for regression Parameters ---------- category_specific: bool indicates whether learn the size conditional on the category Returns ------- size_bins: dict Keys: {'category'} The bins for each category """ if category_specific: N_CLUSTER = 8 else: N_CLUSTER = 16 size_bins = dict() size_info = dict() for i in range(0, 10335): sequence = readsunrgbdframe(image_id=i + 1) for bdb3d in sequence.bdb3d: classname = bdb3d['classname'][0] coeffs = bdb3d['coeffs'][0] if classname not in size_info: size_info[classname] = list() size_info[classname].append(coeffs) if category_specific: size_avg = dict() for category in size_info.keys(): size_array = np.array(size_info[category]) mean_size = size_array.mean(axis=0) if category in OBJ_CATEGORY_CLEAN: size_avg[category] = mean_size m = size_array.shape[0] if m <= N_CLUSTER: kmeans = KMeans(n_clusters=int(m/2)+1, random_state=0).fit(size_array) else: kmeans = KMeans(n_clusters=N_CLUSTER, random_state=0).fit(size_info[category]) size_bins[category] = kmeans.cluster_centers_ print category, size_bins[category] with open(op.join(PATH.metadata_root, 'size_bin_category.pickle'), 'w') as f: pickle.dump(size_bins, f) f.close() with open(op.join(PATH.metadata_root, 'size_avg_category.pickle'), 'w') as f: pickle.dump(size_avg, f) f.close() print size_avg else: size_array = list() for key, value in size_info.items(): size_array.extend(value) size_array = np.array(size_array) kmeans = KMeans(n_clusters=N_CLUSTER, random_state=0).fit(size_array) with open(op.join(PATH.metadata_root, 'size_bin_full.pickle'), 'w') as f: pickle.dump(kmeans.cluster_centers_, f) print kmeans.cluster_centers_
def prepare_data(gt_2d_bdb=False, patch_h=224, patch_w=224, shift=True, iou_threshold=0.1): """ Generating the ground truth for end-to-end training Parameters ---------- gt_2d_bdb : bool indicates whether to use the ground truth of 2D bounding boxes patch_h: int the height of target resized patch patch_w: int the width of target resized potch iou_threshold : float iou threshold for two 2D bounding boxes """ bin = PATH.bins() data_root = op.join(PATH.metadata_root, 'sunrgbd_train_test_data') train_path = list() test_path = list() layout_centroid = list() layout_coeffs = list() # obj_category = dict() if not op.exists(data_root): os.mkdir(data_root) for i in range(10335): sequence = readsunrgbdframe(image_id=i+1) print i+1 sequence._R_tilt = loadmat(op.join(PATH.metadata_root, 'updated_rtilt', str(i+1) + '.mat'))['r_tilt'] # R_ex is cam to world sequence._R_ex = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]).dot(sequence.R_tilt).dot(np.array([[1, 0, 0], [0, 0, 1], [0, -1, 0]])) K = sequence.K result = [] for bdb2d in sequence.bdb2d: if check_bdb(bdb2d, 2*sequence.K[0, 2], 2*sequence.K[1, 2]): result.append(bdb2d) else: print 'ground truth not valid' sequence._bdb2d = result bdb2d_from_3d_list = [] with open(op.join(PATH.metadata_root, '2dbdb', str(i + 1) + '.json'), 'r') as f: detected_bdbs = json.load(f) f.close() boxes = list() for bdb3d in sequence.bdb3d: center = bdb3d['centroid'][0] coeffs = bdb3d['coeffs'][0] basis = bdb3d['basis'].astype('float32') if bdb3d['classname'][0] not in OBJ_CATEGORY_CLEAN: continue bdb2d_from_3d = project_struct_bdb_to_2d(basis, coeffs, center, sequence.R_ex.T, K) projected_2d_center = project_3d_points_to_2d(center.reshape(1, 3), sequence.R_ex.T, K) if bdb2d_from_3d is None: print '%s not valid' % (bdb3d['classname'][0]) continue bdb2d_from_3d['classname'] = bdb3d['classname'][0] bdb2d_from_3d_list.append(bdb2d_from_3d) if gt_2d_bdb is True: max_iou = 0 iou_ind = -1 for j, bdb2d in enumerate(sequence.bdb2d): if bdb2d['classname'] == bdb3d['classname'][0]: iou = get_iou(bdb2d_from_3d, bdb2d) if iou > iou_threshold and iou > max_iou: iou_ind = j max_iou = iou if iou_ind >= 0: if shift: shifted_box = random_shift_2d_box(sequence.bdb2d[iou_ind]) boxes.append({'2dbdb': shifted_box, '3dbdb': bdb3d, 'projected_2d_center': projected_2d_center}) else: boxes.append({'2dbdb': sequence.bdb2d[iou_ind], '3dbdb': bdb3d, 'projected_2d_center': projected_2d_center}) else: max_iou = 0 iou_ind = -1 max_bdb = dict() for j, bdb2d in enumerate(detected_bdbs): if bdb2d['class'] == bdb3d['classname'][0]: box = bdb2d['bbox'] box = {'x1': box[0], 'y1': box[1], 'x2': box[2], 'y2': box[3]} iou = get_iou(bdb2d_from_3d, box) if iou > iou_threshold and iou > max_iou: iou_ind = j max_iou = iou box['score'] = bdb2d['score'] box['classname'] = bdb2d['class'] max_bdb = box if iou_ind >= 0: # print max_iou, bdb2d_from_3d, detected_bdbs[iou_ind] if shift: shifted_box = random_shift_2d_box(max_bdb) boxes.append({'2dbdb': shifted_box, '3dbdb': bdb3d, 'projected_2d_center': projected_2d_center}) else: boxes.append({'2dbdb': max_bdb, '3dbdb': bdb3d, 'projected_2d_center': projected_2d_center}) # print boxes camera = dict() camera_flip = dict() camera['yaw_cls'], camera['yaw_reg'], camera['roll_cls'], camera['roll_reg'] = camera_cls_reg(sequence.R_ex.T, bin) camera['K'] = sequence.K # flip the camera camera_flip['yaw_cls'], camera_flip['yaw_reg'], camera_flip['roll_cls'], camera_flip['roll_reg'] = camera_cls_reg(sequence.R_ex.T, bin, flip=True) camera_flip['K'] = sequence.K template_path = op.join(PATH.metadata_root, 'size_avg_category.pickle') layout_pts = loadmat(op.join(PATH.metadata_root, '3dlayout', str(i+1) + '.mat'))['manhattan_layout'].T l_centroid, l_basis, l_coeffs = get_bdb_from_corners(layout_pts) # print l_centroid layout_centroid.append(l_centroid) layout_coeffs.append(l_coeffs) layout = dict() layout['centroid_reg'] = layout_centroid_avg_residual(l_centroid, bin['layout_centroid_avg'], bin['layout_normalize']) layout['coeffs_reg'] = layout_size_avg_residual(l_coeffs, bin['layout_coeffs_avg']) layout['ori_cls'], layout['ori_reg'] = ori_cls_reg(l_basis[1, :], bin, layout=True) layout_flip = dict() layout_flip['centroid_reg'] = layout_centroid_avg_residual(l_centroid, bin['layout_centroid_avg'], bin['layout_normalize'], flip=True) layout_flip['coeffs_reg'] = layout_size_avg_residual(l_coeffs, bin['layout_coeffs_avg']) layout_flip['ori_cls'], layout_flip['ori_reg'] = ori_cls_reg(l_basis[1, :], bin, layout=True, flip=True) # print layout['ori_cls'], layout_flip['ori_cls'] # clean the ground truth with open(template_path, 'r') as f: size_template = pickle.load(f) f.close() boxes_out = list() boxes_out_flip = list() for box in boxes: box_set = dict() # box_set['ori_cls'], box_set['ori_reg'] = ori_cls_reg(box['3dbdb']['orientation']) box_set['ori_cls'], box_set['ori_reg'] = ori_cls_reg(box['3dbdb']['basis'][1, :], bin) # print box['3dbdb']['basis'] # print basis_from_ori(num_from_bins(bin['ori_bin'], box_set['ori_cls'], box_set['ori_reg'])) box_set['size_reg'] = size_avg_residual(box['3dbdb']['coeffs'][0], size_template, box['2dbdb']['classname']) box_set['bdb3d'] = get_corners_of_bb3d_no_index(box['3dbdb']['basis'], box['3dbdb']['coeffs'][0], box['3dbdb']['centroid'][0]) box_set['x_cls'], box_set['x_reg'], box_set['y_cls'], box_set['y_reg'], box_set['z_cls'], box_set['z_reg'] = centroid_cls_reg(box['3dbdb']['centroid'][0], bin) box_set['bdb_pos'] = [box['2dbdb']['x1'], box['2dbdb']['y1'], box['2dbdb']['x2'], box['2dbdb']['y2']] box_set['bdb2d'] = [box['2dbdb']['x1'] / float(K[0, 2]), box['2dbdb']['y1'] / float(K[1, 2]), box['2dbdb']['x2'] / float(K[0, 2]), box['2dbdb']['y2'] / float(K[1, 2])] box_set['centroid_cls'], box_set['centroid_reg'] = bin_cls_reg(bin['centroid_bin'], np.linalg.norm(box['3dbdb']['centroid'][0])) delta_2d = list() delta_2d.append(((box_set['bdb_pos'][0] + box_set['bdb_pos'][2]) / 2 - box['projected_2d_center'][0][0]) / (box_set['bdb_pos'][2] - box_set['bdb_pos'][0])) delta_2d.append(((box_set['bdb_pos'][1] + box_set['bdb_pos'][3]) / 2 - box['projected_2d_center'][1][0]) / (box_set['bdb_pos'][3] - box_set['bdb_pos'][1])) box_set['delta_2d'] = delta_2d box_set['size_cls'] = OBJ_CATEGORY_CLEAN.index(box['2dbdb']['classname']) # print box_set['size_cls'] # print box['2dbdb']['classname'] boxes_out.append(box_set) # print box_set['3dbdb']['classname'], box_set['ori_cls'], box_set['ori_reg'], box_set['size_reg'], box_set['size_cls'], box_set['size_reg'] # flip the boxes box_set_flip = dict() # box_set_flip['ori_cls'], box_set_flip['ori_reg'] = ori_cls_reg(box['3dbdb']['orientation'], flip=True) box_set_flip['ori_cls'], box_set_flip['ori_reg'] = ori_cls_reg(box['3dbdb']['basis'][1, :], bin, flip=True) box_set_flip['size_reg'] = size_avg_residual(box['3dbdb']['coeffs'][0], size_template, box['2dbdb']['classname']) box_set_flip['x_cls'], box_set_flip['x_reg'], box_set_flip['y_cls'], box_set_flip['y_reg'], box_set_flip['z_cls'], box_set_flip['z_reg'] = centroid_cls_reg(box['3dbdb']['centroid'][0], bin, flip=True) box_set_flip['centroid_cls'], box_set_flip['centroid_reg'] = bin_cls_reg(bin['centroid_bin'], np.linalg.norm(box['3dbdb']['centroid'][0])) box_set_flip['bdb_pos'] = [int(2 * K[0, 2] - box['2dbdb']['x2']), box['2dbdb']['y1'], int(2 * K[0, 2] - box['2dbdb']['x1']), box['2dbdb']['y2']] box_set_flip['bdb2d'] = [int(2 * K[0, 2] - box['2dbdb']['x2']) / float(K[0, 2]), box['2dbdb']['y1'] / float(K[1, 2]), int(2 * K[0, 2] - box['2dbdb']['x1']) / float(K[0, 2]), box['2dbdb']['y2'] / float(K[1, 2])] box_set_flip['size_cls'] = OBJ_CATEGORY_CLEAN.index(box['2dbdb']['classname']) coeffs_flip = size_from_template(box_set_flip['size_reg'], size_template, OBJ_CATEGORY_CLEAN[box_set_flip['size_cls']]) centroid_flip = np.array([num_from_bins(bin['x_bin'], box_set_flip['x_cls'], box_set_flip['x_reg']), num_from_bins(bin['y_bin'], box_set_flip['y_cls'], box_set_flip['y_reg']), num_from_bins(bin['z_bin'], box_set_flip['z_cls'], box_set_flip['z_reg'])]) basis_flip = basis_from_ori(num_from_bins(bin['ori_bin'], box_set_flip['ori_cls'], box_set_flip['ori_reg'])) box_set_flip['bdb3d'] = get_corners_of_bb3d(basis_flip, coeffs_flip, centroid_flip) delta_2d_flip = [- delta_2d[0], delta_2d[1]] box_set_flip['delta_2d'] = delta_2d_flip # print box_set['delta_2d'], box_set_flip['delta_2d'] boxes_out_flip.append(box_set_flip) if len(boxes_out) == 0: continue data = dict() data['rgb_path'] = op.join(PATH.metadata_root, 'images', '%06d.jpg' % (i+1)) data['boxes'] = list_of_dict_to_dict_of_list(boxes_out) data['camera'] = camera data['layout'] = layout data['sequence_id'] = i + 1 # fliped data data_flip = dict() data_flip['rgb_path'] = op.join(PATH.metadata_root, 'images', '%06d_flip.jpg' % (i+1)) # img_flip = Image.open(data['rgb_path']).transpose(Image.FLIP_LEFT_RIGHT) # img_flip.save(data_flip['rgb_path']) data_flip['boxes'] = list_of_dict_to_dict_of_list(boxes_out_flip) data_flip['camera'] = camera_flip data_flip['layout'] = layout_flip data_flip['sequence_id'] = i + 1 if shift: save_path = op.join(PATH.metadata_root, 'sunrgbd_train_test_data', str(i+1) + '_shift_5' + '.pickle') save_path_flip = op.join(PATH.metadata_root, 'sunrgbd_train_test_data', str(i+1) + '_flip' + '_shift_5' + '.pickle') else: save_path = op.join(PATH.metadata_root, 'sunrgbd_train_test_data', str(i + 1) + '.pickle') save_path_flip = op.join(PATH.metadata_root, 'sunrgbd_train_test_data', str(i + 1) + '_flip' + '.pickle') if (i + 1) <= 5050: test_path.append(save_path) else: train_path.append(save_path) with open(save_path, 'w') as f: pickle.dump(data, f) f.close() with open(save_path_flip, 'w') as f: pickle.dump(data_flip, f) f.close() print np.array(layout_centroid).mean(axis=0) print np.array(layout_coeffs).mean(axis=0) if not shift: with open(op.join(PATH.metadata_root, 'train.json'), 'w') as f: json.dump(train_path, f) f.close() with open(op.join(PATH.metadata_root, 'test.json'), 'w') as f: json.dump(test_path, f) f.close()