fy = 1067.487
    px = 312.9869
    py = 241.3109
    zfar = 6.0
    znear = 0.25
    tnear = 0.5
    tfar = 2.0
    num_classes = 22
    factor_depth = 10000.0
    intrinsic_matrix = np.array([[fx, 0, px], [0, fy, py], [0, 0, 1]])
    root = '/capri/YCB_Video_Dataset/data_syn/'

    if not os.path.exists(root):
        os.makedirs(root)

    synthesizer_ = libsynthesizer.Synthesizer(args.cad_name, args.pose_name)
    synthesizer_.setup(width, height)
    synthesizer_.init_rand(1200)

    parameters = np.zeros((8, ), dtype=np.float32)
    parameters[0] = fx
    parameters[1] = fy
    parameters[2] = px
    parameters[3] = py
    parameters[4] = znear
    parameters[5] = zfar
    parameters[6] = tnear
    parameters[7] = tfar

    i = 0
    while i < num_images:
示例#2
0
文件: run.py 项目: probcomp/PoseCNN
def run_network(sess, net, imdb, images, meta_data):
    """
    :param sess: TensorFlow session
    :param net: Pretrained neural network to run model over.
    :param imdb: TODO: Find out essential features of this object.
    :param images: [(rgb_image[0], depth_image[0]), ...]
    :param meta_data: Dictionary including camera intrinsics under 'intrinsic_matrix',
                      and scale factor under 'factor_depth' (default is 10,000).
    """

    n_images = len(images)
    segmentations = [[] for _ in range(n_images)]

    # timers
    _t = {'im_segment': Timer(), 'misc': Timer()}

    # voxelizer
    voxelizer = Voxelizer(cfg.TEST.GRID_SIZE, imdb.num_classes)
    voxelizer.setup(-3, -3, -3, 3, 3, 4)

    # construct colors
    colors = np.zeros((3 * imdb.num_classes), dtype=np.uint8)
    for i in range(imdb.num_classes):
        colors[i * 3 + 0] = imdb._class_colors[i][0]
        colors[i * 3 + 1] = imdb._class_colors[i][1]
        colors[i * 3 + 2] = imdb._class_colors[i][2]

    perm = list(range(n_images))

    if (cfg.TEST.VERTEX_REG_2D
            and cfg.TEST.POSE_REFINE) or (cfg.TEST.VERTEX_REG_3D
                                          and cfg.TEST.POSE_REG):
        import libsynthesizer
        synthesizer = libsynthesizer.Synthesizer(cfg.CAD, cfg.POSE)
        synthesizer.setup(cfg.TRAIN.SYN_WIDTH, cfg.TRAIN.SYN_HEIGHT)

    batched_detections = []

    for i in perm:

        raw_rgb, raw_depth = images[i]

        # read color image
        rgba = pad_im(raw_rgb, 16)
        if rgba.shape[2] == 4:
            im = np.copy(rgba[:, :, :3])
            alpha = rgba[:, :, 3]
            I = np.where(alpha == 0)
            im[I[0], I[1], :] = 0
        else:
            im = rgba

        im_depth = pad_im(raw_depth, 16)

        _t['im_segment'].tic()

        labels, probs, vertex_pred, rois, poses = im_segment_single_frame(
            sess, net, im, im_depth, meta_data, voxelizer, imdb._extents,
            imdb._points_all, imdb._symmetry, imdb.num_classes)

        detections = []

        for j in range(rois.shape[0]):
            cls_idx = int(rois[j, 1])
            if cls_idx > 0:
                # projection
                # RT = np.zeros((3, 4), dtype=np.float32)
                # RT[:3, :3] = quat2mat(poses[j, :4])
                # RT[:, 3] = poses[j, 4:7]

                # transform to world pose
                pose_t = np.zeros((6, ), dtype=np.float32)
                pose_t[:3] = poses[j, 4:7]
                # pose_t[[0,2]] = pose_t[[2,0]]

                # flip z-axis to match renderer
                pose_t[2] = -pose_t[2]
                poses[j, [1, 2]] = -poses[j, [1, 2]]

                pose_t[3:] = quat2euler(poses[j, :4], axes='sxyz')
                cls = imdb._classes[cls_idx]
                detections.append((cls, pose_t))

        batched_detections.append(detections)

        labels = unpad_im(labels, 16)
        im_scale = cfg.TEST.SCALES_BASE[0]
        # build the label image
        im_label = imdb.labels_to_image(im, labels)

        poses_new = []
        poses_icp = []
        if cfg.TEST.VERTEX_REG_2D:
            if cfg.TEST.POSE_REG:
                # pose refinement
                fx = meta_data['intrinsic_matrix'][0, 0] * im_scale
                fy = meta_data['intrinsic_matrix'][1, 1] * im_scale
                px = meta_data['intrinsic_matrix'][0, 2] * im_scale
                py = meta_data['intrinsic_matrix'][1, 2] * im_scale
                factor = meta_data['factor_depth']
                znear = 0.25
                zfar = 6.0
                poses_new = np.zeros((poses.shape[0], 7), dtype=np.float32)
                poses_icp = np.zeros((poses.shape[0], 7), dtype=np.float32)
                error_threshold = 0.01
                if cfg.TEST.POSE_REFINE:
                    labels_icp = labels.copy()
                    rois_icp = rois
                    if imdb.num_classes == 2:
                        I = np.where(labels_icp > 0)
                        labels_icp[I[0], I[1]] = imdb._cls_index
                        rois_icp = rois.copy()
                        rois_icp[:, 1] = imdb._cls_index
                    im_depth = cv2.resize(im_depth,
                                          None,
                                          None,
                                          fx=im_scale,
                                          fy=im_scale,
                                          interpolation=cv2.INTER_LINEAR)

                    parameters = np.zeros((7, ), dtype=np.float32)
                    parameters[0] = fx
                    parameters[1] = fy
                    parameters[2] = px
                    parameters[3] = py
                    parameters[4] = znear
                    parameters[5] = zfar
                    parameters[6] = factor

                    height = labels_icp.shape[0]
                    width = labels_icp.shape[1]
                    num_roi = rois_icp.shape[0]
                    channel_roi = rois_icp.shape[1]
                    synthesizer.icp_python(labels_icp, im_depth, parameters, height, width, num_roi, channel_roi, \
                                           rois_icp, poses, poses_new, poses_icp, error_threshold)

        _t['im_segment'].toc()

        _t['misc'].tic()
        labels_new = cv2.resize(labels,
                                None,
                                None,
                                fx=1.0 / im_scale,
                                fy=1.0 / im_scale,
                                interpolation=cv2.INTER_NEAREST)
        seg = {
            'labels': labels_new,
            'rois': rois,
            'poses': poses,
            'poses_refined': poses_new,
            'poses_icp': poses_icp
        }

        segmentations[i] = seg
        _t['misc'].toc()

        print(('im_segment: {:d}/{:d} {:.3f}s {:.3f}s' \
              .format(i, n_images, _t['im_segment'].diff, _t['misc'].diff)))

        if cfg.TEST.VISUALIZE:
            img_dir = os.path.join("output", "vis")
            os.makedirs(img_dir, exist_ok=True)
            vertmap = _extract_vertmap(labels, vertex_pred, imdb._extents,
                                       imdb.num_classes)
            vis_segmentations_vertmaps_detection(
                im,
                im_depth,
                im_label,
                imdb._class_colors,
                vertmap,
                labels,
                rois,
                poses,
                poses_icp,
                meta_data['intrinsic_matrix'],
                imdb.num_classes,
                imdb._classes,
                imdb._points_all,
                f_name=os.path.join(img_dir, "%i.png") % i)

    return batched_detections
示例#3
0
def render_one(intrinsic_matrix, extents, points, nb_img):
    synthesizer = libsynthesizer.Synthesizer(args.model_path, args.pose_path)
    synthesizer.setup(args.width, args.height)

    which_class = 0
    width = args.width
    height = args.height

    fx = intrinsic_matrix[0, 0]
    fy = intrinsic_matrix[1, 1]
    px = intrinsic_matrix[0, 2]
    py = intrinsic_matrix[1, 2]
    zfar = 6.0
    znear = 0.25
    factor_depth = 1000.0
    dataset = dict()
    print('Getting in the while loop')
    n = 0
    while n < nb_img:
        print(n)

        # render a synthetic image
        im_syn = np.zeros((height, width, 4), dtype=np.float32)
        depth_syn = np.zeros((height, width, 3), dtype=np.float32)
        vertmap_syn = np.zeros((height, width, 3), dtype=np.float32)
        poses = np.zeros((1, 7), dtype=np.float32)
        centers = np.zeros((1, 2), dtype=np.float32)

        synthesizer.render_one_python(int(which_class), int(width), int(height), fx, fy, px, py, znear, zfar, \
            im_syn, depth_syn, vertmap_syn, poses, centers, extents)

        # convert images
        im_syn = np.clip(255 * im_syn, 0, 255)
        im_syn = im_syn.astype(np.uint8)
        depth_syn = depth_syn[:, :, 0]

        # convert depth
        im_depth_raw = factor_depth * 2 * zfar * znear / (zfar + znear - (zfar - znear) * (2 * depth_syn - 1))
        I = np.where(depth_syn == 1)
        im_depth_raw[I[0], I[1]] = 0

        # compute labels from vertmap
        label = np.round(vertmap_syn[:, :, 0]) + 1
        label[np.isnan(label)] = 0

        I = np.where(label != which_class + 1)
        label[I[0], I[1]] = 0

        I = np.where(label == which_class + 1)
        if len(I[0]) < 800:
            continue

        # convert pose
        qt = np.zeros((3, 4, 1), dtype=np.float32)
        qt[:, :3, 0] = quat2mat(poses[0, :4])
        qt[:, 3, 0] = poses[0, 4:]

        # process the vertmap
        vertmap_syn[:, :, 0] = vertmap_syn[:, :, 0] - np.round(vertmap_syn[:, :, 0])
        vertmap_syn[np.isnan(vertmap_syn)] = 0

        # compute box
        x3d = np.ones((4, points.shape[1]), dtype=np.float32)
        cls = 1
        x3d[0, :] = points[cls,:,0]
        x3d[1, :] = points[cls,:,1]
        x3d[2, :] = points[cls,:,2]
        RT = qt[:, :, 0]
        x2d = np.matmul(intrinsic_matrix, np.matmul(RT, x3d))
        x2d[0, :] = np.divide(x2d[0, :], x2d[2, :])
        x2d[1, :] = np.divide(x2d[1, :], x2d[2, :])
        box = np.zeros((1, 4), dtype=np.float32)
        box[0, 0] = np.min(x2d[0, :])
        box[0, 1] = np.min(x2d[1, :])
        box[0, 2] = np.max(x2d[0, :])
        box[0, 3] = np.max(x2d[1, :])

        # metadata
        metadata = {'poses': qt, 'center': centers, 'box': box, \
                    'cls_indexes': np.array([which_class + 1]), 'intrinsic_matrix': intrinsic_matrix, 'factor_depth': factor_depth}

        # construct data
        data = {'image': im_syn, 'depth': im_depth_raw.astype(np.uint16), 'label': label.astype(np.uint8), 'meta_data': metadata}

        dataset[n] = data
        n += 1
    
    output_path = os.path.abspath(os.path.join(args.output_dir, 'dataset.h5'))
    with h5py.File(output_path, 'w') as hdf:
        hdf.create_dataset('data', data=json.dumps(dataset, cls=NumpyEncoder))
    print('Sucessfully saved the synthesized dataset to: ', output_path)