fy = 1067.487 px = 312.9869 py = 241.3109 zfar = 6.0 znear = 0.25 tnear = 0.5 tfar = 2.0 num_classes = 22 factor_depth = 10000.0 intrinsic_matrix = np.array([[fx, 0, px], [0, fy, py], [0, 0, 1]]) root = '/capri/YCB_Video_Dataset/data_syn/' if not os.path.exists(root): os.makedirs(root) synthesizer_ = libsynthesizer.Synthesizer(args.cad_name, args.pose_name) synthesizer_.setup(width, height) synthesizer_.init_rand(1200) parameters = np.zeros((8, ), dtype=np.float32) parameters[0] = fx parameters[1] = fy parameters[2] = px parameters[3] = py parameters[4] = znear parameters[5] = zfar parameters[6] = tnear parameters[7] = tfar i = 0 while i < num_images:
def run_network(sess, net, imdb, images, meta_data): """ :param sess: TensorFlow session :param net: Pretrained neural network to run model over. :param imdb: TODO: Find out essential features of this object. :param images: [(rgb_image[0], depth_image[0]), ...] :param meta_data: Dictionary including camera intrinsics under 'intrinsic_matrix', and scale factor under 'factor_depth' (default is 10,000). """ n_images = len(images) segmentations = [[] for _ in range(n_images)] # timers _t = {'im_segment': Timer(), 'misc': Timer()} # voxelizer voxelizer = Voxelizer(cfg.TEST.GRID_SIZE, imdb.num_classes) voxelizer.setup(-3, -3, -3, 3, 3, 4) # construct colors colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8) for i in range(imdb.num_classes): colors[i * 3 + 0] = imdb._class_colors[i][0] colors[i * 3 + 1] = imdb._class_colors[i][1] colors[i * 3 + 2] = imdb._class_colors[i][2] perm = list(range(n_images)) if (cfg.TEST.VERTEX_REG_2D and cfg.TEST.POSE_REFINE) or (cfg.TEST.VERTEX_REG_3D and cfg.TEST.POSE_REG): import libsynthesizer synthesizer = libsynthesizer.Synthesizer(cfg.CAD, cfg.POSE) synthesizer.setup(cfg.TRAIN.SYN_WIDTH, cfg.TRAIN.SYN_HEIGHT) batched_detections = [] for i in perm: raw_rgb, raw_depth = images[i] # read color image rgba = pad_im(raw_rgb, 16) if rgba.shape[2] == 4: im = np.copy(rgba[:, :, :3]) alpha = rgba[:, :, 3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba im_depth = pad_im(raw_depth, 16) _t['im_segment'].tic() labels, probs, vertex_pred, rois, poses = im_segment_single_frame( sess, net, im, im_depth, meta_data, voxelizer, imdb._extents, imdb._points_all, imdb._symmetry, imdb.num_classes) detections = [] for j in range(rois.shape[0]): cls_idx = int(rois[j, 1]) if cls_idx > 0: # projection # RT = np.zeros((3, 4), dtype=np.float32) # RT[:3, :3] = quat2mat(poses[j, :4]) # RT[:, 3] = poses[j, 4:7] # transform to world pose pose_t = np.zeros((6, ), dtype=np.float32) pose_t[:3] = poses[j, 4:7] # pose_t[[0,2]] = pose_t[[2,0]] # flip z-axis to match renderer pose_t[2] = -pose_t[2] poses[j, [1, 2]] = -poses[j, [1, 2]] pose_t[3:] = quat2euler(poses[j, :4], axes='sxyz') cls = imdb._classes[cls_idx] detections.append((cls, pose_t)) batched_detections.append(detections) labels = unpad_im(labels, 16) im_scale = cfg.TEST.SCALES_BASE[0] # build the label image im_label = imdb.labels_to_image(im, labels) poses_new = [] poses_icp = [] if cfg.TEST.VERTEX_REG_2D: if cfg.TEST.POSE_REG: # pose refinement fx = meta_data['intrinsic_matrix'][0, 0] * im_scale fy = meta_data['intrinsic_matrix'][1, 1] * im_scale px = meta_data['intrinsic_matrix'][0, 2] * im_scale py = meta_data['intrinsic_matrix'][1, 2] * im_scale factor = meta_data['factor_depth'] znear = 0.25 zfar = 6.0 poses_new = np.zeros((poses.shape[0], 7), dtype=np.float32) poses_icp = np.zeros((poses.shape[0], 7), dtype=np.float32) error_threshold = 0.01 if cfg.TEST.POSE_REFINE: labels_icp = labels.copy() rois_icp = rois if imdb.num_classes == 2: I = np.where(labels_icp > 0) labels_icp[I[0], I[1]] = imdb._cls_index rois_icp = rois.copy() rois_icp[:, 1] = imdb._cls_index im_depth = cv2.resize(im_depth, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) parameters = np.zeros((7, ), dtype=np.float32) parameters[0] = fx parameters[1] = fy parameters[2] = px parameters[3] = py parameters[4] = znear parameters[5] = zfar parameters[6] = factor height = labels_icp.shape[0] width = labels_icp.shape[1] num_roi = rois_icp.shape[0] channel_roi = rois_icp.shape[1] synthesizer.icp_python(labels_icp, im_depth, parameters, height, width, num_roi, channel_roi, \ rois_icp, poses, poses_new, poses_icp, error_threshold) _t['im_segment'].toc() _t['misc'].tic() labels_new = cv2.resize(labels, None, None, fx=1.0 / im_scale, fy=1.0 / im_scale, interpolation=cv2.INTER_NEAREST) seg = { 'labels': labels_new, 'rois': rois, 'poses': poses, 'poses_refined': poses_new, 'poses_icp': poses_icp } segmentations[i] = seg _t['misc'].toc() print(('im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i, n_images, _t['im_segment'].diff, _t['misc'].diff))) if cfg.TEST.VISUALIZE: img_dir = os.path.join("output", "vis") os.makedirs(img_dir, exist_ok=True) vertmap = _extract_vertmap(labels, vertex_pred, imdb._extents, imdb.num_classes) vis_segmentations_vertmaps_detection( im, im_depth, im_label, imdb._class_colors, vertmap, labels, rois, poses, poses_icp, meta_data['intrinsic_matrix'], imdb.num_classes, imdb._classes, imdb._points_all, f_name=os.path.join(img_dir, "%i.png") % i) return batched_detections
def render_one(intrinsic_matrix, extents, points, nb_img): synthesizer = libsynthesizer.Synthesizer(args.model_path, args.pose_path) synthesizer.setup(args.width, args.height) which_class = 0 width = args.width height = args.height fx = intrinsic_matrix[0, 0] fy = intrinsic_matrix[1, 1] px = intrinsic_matrix[0, 2] py = intrinsic_matrix[1, 2] zfar = 6.0 znear = 0.25 factor_depth = 1000.0 dataset = dict() print('Getting in the while loop') n = 0 while n < nb_img: print(n) # render a synthetic image im_syn = np.zeros((height, width, 4), dtype=np.float32) depth_syn = np.zeros((height, width, 3), dtype=np.float32) vertmap_syn = np.zeros((height, width, 3), dtype=np.float32) poses = np.zeros((1, 7), dtype=np.float32) centers = np.zeros((1, 2), dtype=np.float32) synthesizer.render_one_python(int(which_class), int(width), int(height), fx, fy, px, py, znear, zfar, \ im_syn, depth_syn, vertmap_syn, poses, centers, extents) # convert images im_syn = np.clip(255 * im_syn, 0, 255) im_syn = im_syn.astype(np.uint8) depth_syn = depth_syn[:, :, 0] # convert depth im_depth_raw = factor_depth * 2 * zfar * znear / (zfar + znear - (zfar - znear) * (2 * depth_syn - 1)) I = np.where(depth_syn == 1) im_depth_raw[I[0], I[1]] = 0 # compute labels from vertmap label = np.round(vertmap_syn[:, :, 0]) + 1 label[np.isnan(label)] = 0 I = np.where(label != which_class + 1) label[I[0], I[1]] = 0 I = np.where(label == which_class + 1) if len(I[0]) < 800: continue # convert pose qt = np.zeros((3, 4, 1), dtype=np.float32) qt[:, :3, 0] = quat2mat(poses[0, :4]) qt[:, 3, 0] = poses[0, 4:] # process the vertmap vertmap_syn[:, :, 0] = vertmap_syn[:, :, 0] - np.round(vertmap_syn[:, :, 0]) vertmap_syn[np.isnan(vertmap_syn)] = 0 # compute box x3d = np.ones((4, points.shape[1]), dtype=np.float32) cls = 1 x3d[0, :] = points[cls,:,0] x3d[1, :] = points[cls,:,1] x3d[2, :] = points[cls,:,2] RT = qt[:, :, 0] x2d = np.matmul(intrinsic_matrix, np.matmul(RT, x3d)) x2d[0, :] = np.divide(x2d[0, :], x2d[2, :]) x2d[1, :] = np.divide(x2d[1, :], x2d[2, :]) box = np.zeros((1, 4), dtype=np.float32) box[0, 0] = np.min(x2d[0, :]) box[0, 1] = np.min(x2d[1, :]) box[0, 2] = np.max(x2d[0, :]) box[0, 3] = np.max(x2d[1, :]) # metadata metadata = {'poses': qt, 'center': centers, 'box': box, \ 'cls_indexes': np.array([which_class + 1]), 'intrinsic_matrix': intrinsic_matrix, 'factor_depth': factor_depth} # construct data data = {'image': im_syn, 'depth': im_depth_raw.astype(np.uint16), 'label': label.astype(np.uint8), 'meta_data': metadata} dataset[n] = data n += 1 output_path = os.path.abspath(os.path.join(args.output_dir, 'dataset.h5')) with h5py.File(output_path, 'w') as hdf: hdf.create_dataset('data', data=json.dumps(dataset, cls=NumpyEncoder)) print('Sucessfully saved the synthesized dataset to: ', output_path)