def test_net_single_frame(sess, net, imdb, weights_filename, rig_filename, is_kfusion): output_dir = get_output_dir(imdb, weights_filename) if not os.path.exists(output_dir): os.makedirs(output_dir) seg_file = os.path.join(output_dir, 'segmentations.pkl') print imdb.name if os.path.exists(seg_file): with open(seg_file, 'rb') as fid: segmentations = cPickle.load(fid) imdb.evaluate_segmentations(segmentations, output_dir) return """Test a FCN on an image database.""" num_images = len(imdb.image_index) segmentations = [[] for _ in xrange(num_images)] # timers _t = {'im_segment' : Timer(), 'misc' : Timer()} # kinect fusion if is_kfusion: KF = kfusion.PyKinectFusion(rig_filename) # pose estimation if cfg.TEST.VERTEX_REG and cfg.TEST.RANSAC: RANSAC = ransac.PyRansac3D() # construct colors colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8) for i in range(imdb.num_classes): colors[i * 3 + 0] = imdb._class_colors[i][0] colors[i * 3 + 1] = imdb._class_colors[i][1] colors[i * 3 + 2] = imdb._class_colors[i][2] if cfg.TEST.VISUALIZE: # perm = np.random.permutation(np.arange(num_images)) perm = xrange(0, num_images, 5) else: perm = xrange(num_images) video_index = '' have_prediction = False for i in perm: # parse image name image_index = imdb.image_index[i] pos = image_index.find('/') if video_index == '': video_index = image_index[:pos] have_prediction = False else: if video_index != image_index[:pos]: have_prediction = False video_index = image_index[:pos] print 'start video {}'.format(video_index) # read color image rgba = pad_im(cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED), 16) if rgba.shape[2] == 4: im = np.copy(rgba[:,:,:3]) alpha = rgba[:,:,3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba # read depth image im_depth = pad_im(cv2.imread(imdb.depth_path_at(i), cv2.IMREAD_UNCHANGED), 16) # load meta data meta_data = scipy.io.loadmat(imdb.metadata_path_at(i)) # read label image labels_gt = pad_im(cv2.imread(imdb.label_path_at(i), cv2.IMREAD_UNCHANGED), 16) if len(labels_gt.shape) == 2: im_label_gt = imdb.labels_to_image(im, labels_gt) else: im_label_gt = np.copy(labels_gt[:,:,:3]) im_label_gt[:,:,0] = labels_gt[:,:,2] im_label_gt[:,:,2] = labels_gt[:,:,0] _t['im_segment'].tic() labels, probs, vertex_pred = im_segment_single_frame(sess, net, im, im_depth, meta_data, imdb.num_classes) if cfg.TEST.VERTEX_REG: vertmap = _extract_vertmap(labels, vertex_pred, imdb._extents, imdb.num_classes) if cfg.TEST.RANSAC: # pose estimation using RANSAC fx = meta_data['intrinsic_matrix'][0, 0] fy = meta_data['intrinsic_matrix'][1, 1] px = meta_data['intrinsic_matrix'][0, 2] py = meta_data['intrinsic_matrix'][1, 2] depth_factor = meta_data['factor_depth'][0, 0] poses = RANSAC.estimate_pose(im_depth, probs, vertex_pred[0,:,:,:] / cfg.TRAIN.VERTEX_W, imdb._extents, fx, fy, px, py, depth_factor) # print gt poses # cls_indexes = meta_data['cls_indexes'] # poses_gt = meta_data['poses'] # for j in xrange(len(cls_indexes)): # print 'object {}'.format(cls_indexes[j]) # print poses_gt[:,:,j] else: poses = [] _t['im_segment'].toc() _t['misc'].tic() labels = unpad_im(labels, 16) # build the label image im_label = imdb.labels_to_image(im, labels) if not have_prediction: if is_kfusion: KF.set_voxel_grid(-3, -3, -3, 6, 6, 7) # run kinect fusion if is_kfusion: height = im.shape[0] width = im.shape[1] labels_kfusion = np.zeros((height, width), dtype=np.int32) im_rgb = np.copy(im) im_rgb[:, :, 0] = im[:, :, 2] im_rgb[:, :, 2] = im[:, :, 0] KF.feed_data(im_depth, im_rgb, im.shape[1], im.shape[0], float(meta_data['factor_depth'])) KF.back_project(); if have_prediction: pose_world2live, pose_live2world = KF.solve_pose() KF.feed_label(im_label, probs, colors) KF.fuse_depth() labels_kfusion = KF.extract_surface(labels_kfusion) im_label_kfusion = imdb.labels_to_image(im, labels_kfusion) KF.render() filename = os.path.join(output_dir, 'images', '{:04d}'.format(i)) KF.draw(filename, 0) have_prediction = True if is_kfusion: seg = {'labels': labels_kfusion} else: seg = {'labels': labels} segmentations[i] = seg _t['misc'].toc() print 'im_segment {}: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(video_index, i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff) if cfg.TEST.VISUALIZE: if cfg.TEST.VERTEX_REG: # centers_gt = _vote_centers(labels_gt, meta_data['cls_indexes'], meta_data['center'], imdb.num_classes) vertmap_gt = pad_im(cv2.imread(imdb.vertmap_path_at(i), cv2.IMREAD_UNCHANGED), 16) vertmap_gt = vertmap_gt[:, :, (2, 1, 0)] vertmap_gt = vertmap_gt.astype(np.float32) / 255.0 vertmap_gt = _unscale_vertmap(vertmap_gt, imdb._process_label_image(labels_gt), imdb._extents, imdb.num_classes) print 'visualization' vis_segmentations_vertmaps(im, im_depth, im_label, im_label_gt, imdb._class_colors, \ vertmap_gt, vertmap, labels, labels_gt, poses, meta_data['intrinsic_matrix']) else: vis_segmentations(im, im_depth, im_label, im_label_gt, imdb._class_colors) seg_file = os.path.join(output_dir, 'segmentations.pkl') with open(seg_file, 'wb') as f: cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL) # evaluation imdb.evaluate_segmentations(segmentations, output_dir)
args = parse_args() is_save = 0 print('Called with args:') print(args) # imdb imdb = get_imdb(args.imdb_name) """Test a FCN on an image database.""" num_images = len(imdb.image_index) # voxel labels labels_voxel = np.zeros((128, 128, 128), dtype=np.int32) # kinect fusion KF = kfusion.PyKinectFusion(args.rig_name) # construct colors colors = np.zeros((3, imdb.num_classes), dtype=np.uint8) for i in range(imdb.num_classes): colors[0, i] = imdb._class_colors[i][0] colors[1, i] = imdb._class_colors[i][1] colors[2, i] = imdb._class_colors[i][2] colors[:, 0] = 255 video_index = '' have_prediction = False for i in xrange(num_images): print i # parse image name image_index = imdb.image_index[i]
def test_net(sess, net, imdb, weights_filename, rig_filename, is_kfusion): output_dir = get_output_dir(imdb, weights_filename) if not os.path.exists(output_dir): os.makedirs(output_dir) seg_file = os.path.join(output_dir, 'segmentations.pkl') print imdb.name if os.path.exists(seg_file): with open(seg_file, 'rb') as fid: segmentations = cPickle.load(fid) imdb.evaluate_segmentations(segmentations, output_dir) return """Test a FCN on an image database.""" num_images = len(imdb.image_index) segmentations = [[] for _ in xrange(num_images)] # timers _t = {'im_segment' : Timer(), 'misc' : Timer()} # voxelizer voxelizer = Voxelizer(cfg.TEST.GRID_SIZE, imdb.num_classes) voxelizer.setup(-3, -3, -3, 3, 3, 4) # voxelizer.setup(-2, -2, -2, 2, 2, 2) # kinect fusion if is_kfusion: KF = kfusion.PyKinectFusion(rig_filename) # construct colors colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8) for i in range(imdb.num_classes): colors[i * 3 + 0] = imdb._class_colors[i][0] colors[i * 3 + 1] = imdb._class_colors[i][1] colors[i * 3 + 2] = imdb._class_colors[i][2] if cfg.TEST.VISUALIZE: perm = np.random.permutation(np.arange(num_images)) else: perm = xrange(num_images) video_index = '' have_prediction = False for i in perm: rgba = pad_im(cv2.imread(imdb.image_path_at(i), cv2.IMREAD_UNCHANGED), 16) height = rgba.shape[0] width = rgba.shape[1] # parse image name image_index = imdb.image_index[i] pos = image_index.find('/') if video_index == '': video_index = image_index[:pos] have_prediction = False state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) points = np.zeros((1, height, width, 3), dtype=np.float32) else: if video_index != image_index[:pos]: have_prediction = False video_index = image_index[:pos] state = np.zeros((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) weights = np.ones((1, height, width, cfg.TRAIN.NUM_UNITS), dtype=np.float32) points = np.zeros((1, height, width, 3), dtype=np.float32) print 'start video {}'.format(video_index) # read color image if rgba.shape[2] == 4: im = np.copy(rgba[:,:,:3]) alpha = rgba[:,:,3] I = np.where(alpha == 0) im[I[0], I[1], :] = 0 else: im = rgba # read depth image im_depth = pad_im(cv2.imread(imdb.depth_path_at(i), cv2.IMREAD_UNCHANGED), 16) # load meta data meta_data = scipy.io.loadmat(imdb.metadata_path_at(i)) # backprojection for the first frame if not have_prediction: if is_kfusion: # KF.set_voxel_grid(-3, -3, -3, 6, 6, 7) KF.set_voxel_grid(voxelizer.min_x, voxelizer.min_y, voxelizer.min_z, voxelizer.max_x-voxelizer.min_x, voxelizer.max_y-voxelizer.min_y, voxelizer.max_z-voxelizer.min_z) # identity transformation RT_world = np.zeros((3,4), dtype=np.float32) RT_world[0, 0] = 1 RT_world[1, 1] = 1 RT_world[2, 2] = 1 else: # store the RT for the first frame RT_world = meta_data['rotation_translation_matrix'] # run kinect fusion if is_kfusion: im_rgb = np.copy(im) im_rgb[:, :, 0] = im[:, :, 2] im_rgb[:, :, 2] = im[:, :, 0] KF.feed_data(im_depth, im_rgb, im.shape[1], im.shape[0], float(meta_data['factor_depth'])) KF.back_project(); if have_prediction: pose_world2live, pose_live2world = KF.solve_pose() RT_live = pose_world2live else: RT_live = RT_world else: # compute camera poses RT_live = meta_data['rotation_translation_matrix'] pose_world2live = se3_mul(RT_live, se3_inverse(RT_world)) pose_live2world = se3_inverse(pose_world2live) _t['im_segment'].tic() labels, probs, state, weights, points = im_segment(sess, net, im, im_depth, state, weights, points, meta_data, voxelizer, pose_world2live, pose_live2world) _t['im_segment'].toc() # time.sleep(3) _t['misc'].tic() labels = unpad_im(labels, 16) # build the label image im_label = imdb.labels_to_image(im, labels) if is_kfusion: labels_kfusion = np.zeros((height, width), dtype=np.int32) if probs.shape[2] < 10: probs_new = np.zeros((probs.shape[0], probs.shape[1], 10), dtype=np.float32) probs_new[:,:,:imdb.num_classes] = probs probs = probs_new KF.feed_label(im_label, probs, colors) KF.fuse_depth() labels_kfusion = KF.extract_surface(labels_kfusion) im_label_kfusion = imdb.labels_to_image(im, labels_kfusion) KF.render() filename = os.path.join(output_dir, 'images', '{:04d}'.format(i)) KF.draw(filename, 0) have_prediction = True # compute the delta transformation between frames RT_world = RT_live if is_kfusion: seg = {'labels': labels_kfusion} else: seg = {'labels': labels} segmentations[i] = seg _t['misc'].toc() if cfg.TEST.VISUALIZE: # read label image labels_gt = pad_im(cv2.imread(imdb.label_path_at(i), cv2.IMREAD_UNCHANGED), 16) if len(labels_gt.shape) == 2: im_label_gt = imdb.labels_to_image(im, labels_gt) else: im_label_gt = np.copy(labels_gt[:,:,:3]) im_label_gt[:,:,0] = labels_gt[:,:,2] im_label_gt[:,:,2] = labels_gt[:,:,0] vis_segmentations(im, im_depth, im_label, im_label_gt, imdb._class_colors) print 'im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_segment'].diff, _t['misc'].diff) if is_kfusion: KF.draw(filename, 1) seg_file = os.path.join(output_dir, 'segmentations.pkl') with open(seg_file, 'wb') as f: cPickle.dump(segmentations, f, cPickle.HIGHEST_PROTOCOL) # evaluation imdb.evaluate_segmentations(segmentations, output_dir)