Exemplo n.º 1
0
def downsample_point_clouds():
    cfg = parse_arguments()

    vox_size = cfg.downsample_voxel_size
    synth_set = cfg.synth_set

    inp_dir = os.path.join(cfg.inp_dir, synth_set)
    files = glob.glob('{}/*.mat'.format(inp_dir))

    out_dir = cfg.out_dir
    out_synthset = os.path.join(out_dir, cfg.synth_set)
    mkdir_if_missing(out_synthset)

    for k, model_file in enumerate(files):
        print("{}/{}".format(k, len(files)))

        file_name = os.path.basename(model_file)
        sample_name, _ = os.path.splitext(file_name)

        obj = scipy.io.loadmat(model_file)

        out_filename = "{}/{}.mat".format(out_synthset, sample_name)
        if os.path.isfile(out_filename):
            print("already exists:", sample_name)
            continue

        Vgt = obj["points"]

        pcd = open3d.PointCloud()
        pcd.points = open3d.Vector3dVector(Vgt)
        downpcd = open3d.voxel_down_sample(pcd, voxel_size=vox_size)
        down_xyz = np.asarray(downpcd.points)
        scipy.io.savemat(out_filename, {"points": down_xyz})
Exemplo n.º 2
0
def train():
    cfg = app_config

    setup_environment(cfg)

    train_dir = cfg.checkpoint_dir
    mkdir_if_missing(train_dir)

    # tf.logging.set_verbosity(tf.logging.INFO)

    split_name = "val"  #train
    dataset_file = os.path.join(cfg.inp_dir,
                                f"{cfg.synth_set}_{split_name}.pkl")
    dataset = Chair_dataset(dataset_file, cfg)
    if cfg.shuffle_dataset:
        torch.manual_seed(7000)
    print("*" * 30)
    print('creating dataloader')
    train_loader = DataLoader(dataset=dataset,
                              batch_size=cfg.batch_size,
                              num_workers=8,
                              shuffle=cfg.shuffle_dataset)
    for epoch in tqdm(range(cfg.max_number_of_steps), desc='Epoch', ncols=100):
        train_size = len(train_loader)
        ts = time.time()
        print_now = 0
        for batch_idx, train_data in tqdm(enumerate(train_loader),
                                          desc='Batch',
                                          total=train_size,
                                          ncols=100):
            # global_step = tf.train.get_or_create_global_step()
            # model = model_pc_pytorch.ModelPointCloud(cfg, global_step)
            model = model_pc_pytorch.ModelPointCloud(cfg)
            inputs = preprocess(cfg, train_data)
            # print('inputs shape')
            # for i in inputs:
            #     print(i, inputs[i].shape)
            # Call Forward of model
            # outputs = model(inputs)
            # task_loss = model.get_loss(inputs, outputs)
            #             print(inputs.keys())
            loss = model.optimize_parameters(inputs)
            if print_now % 200 == 0:
                print("Epoch: %d, Step: %d, Loss: %f" %
                      (epoch, print_now, loss.item()))
            print_now += 1
            #reg_loss = regularization_loss(train_scopes, cfg)
            #loss = task_loss + reg_loss

            # break
        # break
    print("Training Complete!")
    '''
Exemplo n.º 3
0
def main(_):
    cfg = app_config

    exp_dir = cfg.checkpoint_dir
    out_dir = os.path.join(exp_dir, 'render')
    mkdir_if_missing(out_dir)
    inp_dir = os.path.join(exp_dir, cfg.save_predictions_dir)

    if cfg.models_list:
        models = parse_lines(cfg.models_list)
    else:
        dataset = Dataset3D(cfg)
        models = [sample.name for sample in dataset.data]

    for model_name in models:
        in_file = "{}/{}_pc.mat".format(inp_dir, model_name)
        if not os.path.isfile(in_file):
            in_file = "{}/{}_pc.npz".format(inp_dir, model_name)
            assert os.path.isfile(
                in_file), "no input file with saved point cloud"

        out_file = "{}/{}.png".format(out_dir, model_name)

        if os.path.isfile(out_file):
            print("{} already rendered".format(model_name))
            continue

        args = build_command_line_args(
            [["in_file", in_file], ["out_file", out_file],
             ["vis_azimuth", cfg.vis_azimuth],
             ["vis_elevation", cfg.vis_elevation], ["vis_dist", cfg.vis_dist],
             ["cycles_samples", cfg.render_cycles_samples], ["voxels", False],
             ["colored_subsets", cfg.render_colored_subsets],
             ["image_size", cfg.render_image_size]])
        render_cmd = "{} --background  -P {} -- {}".format(
            blender_exec, python_script, args)

        os.system(render_cmd)
Exemplo n.º 4
0
def create_record(synth_set, split_name, models):
    im_size = args.image_size
    num_views = args.num_views
    num_models = len(models)

    mkdir_if_missing(args.out_dir)

    # address to save the data
    train_filename = "{}/{}_{}.pkl".format(args.out_dir, synth_set, split_name)

    render_dir = os.path.join(args.inp_dir_renders, synth_set)
    voxel_dir = os.path.join(args.inp_dir_voxels, synth_set)
    imagel, maskl, namel,voxl, extl,cam_posl,depthl =[],[],[],[],[],[],[]
    for j, model in enumerate(models):
        print("{}/{}".format(j, num_models))

        if args.store_voxels:
            voxels_file = os.path.join(voxel_dir, "{}.mat".format(model))
            voxels = loadmat(voxels_file)["Volume"].astype(np.float32)

            # this needed to be compatible with the
            # PTN projections
            voxels = np.transpose(voxels, (1, 0, 2))
            voxels = np.flip(voxels, axis=1)

        im_dir = os.path.join(render_dir, model)
        images = sorted(glob.glob("{}/render_*.png".format(im_dir)))

        rgbs = np.zeros((num_views, im_size, im_size, 3), dtype=np.float32)
        masks = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32)
        cameras = np.zeros((num_views, 4, 4), dtype=np.float32)
        cam_pos = np.zeros((num_views, 3), dtype=np.float32)
        depths = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32)

        assert (len(images) >= num_views)

        for k in range(num_views):
            im_file = images[k]
            img = imread(im_file)
            rgb = img[:, :, 0:3]
            mask = img[:, :, [3]]
            mask = mask / 255.0
            if True:  # white background
                mask_fg = np.repeat(mask, 3, 2)
                mask_bg = 1.0 - mask_fg
                rgb = rgb * mask_fg + np.ones(rgb.shape) * 255.0 * mask_bg
            # plt.imshow(rgb.astype(np.uint8))
            # plt.show()
            rgb = rgb / 255.0
            actual_size = rgb.shape[0]
            if im_size != actual_size:
                rgb = im_resize(rgb, (im_size, im_size), order=3)
                mask = im_resize(mask, (im_size, im_size), order=3)
            rgbs[k, :, :, :] = rgb
            masks[k, :, :, :] = mask

            fn = os.path.basename(im_file)
            img_idx = int(re.search(r'\d+', fn).group())

            if args.store_camera:
                cam_file = "{}/camera_{}.mat".format(im_dir, img_idx)
                cam_extr, pos = read_camera(cam_file)
                cameras[k, :, :] = cam_extr
                cam_pos[k, :] = pos

            if args.store_depth:
                depth_file = "{}/depth_{}.png".format(im_dir, img_idx)
                depth = loadDepth(depth_file)
                d_max = 10.0
                d_min = 0.0
                depth = (depth - d_min) / d_max
                depth_r = im_resize(depth, (im_size, im_size), order=0)
                depth_r = depth_r * d_max + d_min
                depths[k, :, :] = np.expand_dims(depth_r, -1)
        imagel.append(_dtype_feature(rgbs))
        maskl.append(_dtype_feature(masks))
        namel.append(_string_feature(model))
        if args.store_voxels:
            voxl.append(_dtype_feature(voxels))
        if args.store_camera:
            extl.append(_dtype_feature(cameras))
            cam_posl.append(_dtype_feature(cam_pos))
        if args.store_depth:
            depthl.append(_dtype_feature(depths))
        """
        plt.imshow(np.squeeze(img[:,:,0:3]))
        plt.show()
        plt.imshow(np.squeeze(img[:,:,3]).astype(np.float32)/255.0)
        plt.show()
        """
        feature = {
            "image": imagel,
            "mask": maskl,
            "name": namel,
            "vox": voxl,
            "extrinsic": extl,
            "cam_pos": cam_posl,
            "depth": depthl
        }

    with open(train_filename, 'wb') as fp:
        #json.dump(feature, fp)
        pickle.dump(feature, fp)
def create_record(synth_set, split_name, models):
    im_size = FLAGS.image_size
    num_views = FLAGS.num_views
    num_models = len(models)

    mkdir_if_missing(FLAGS.out_dir)

    #     # address to save the TFRecords file
    #     train_filename = "{}/{}_{}.tfrecords".format(FLAGS.out_dir, synth_set, split_name)
    #     # open the TFRecords file
    #     options = tf_record_options(FLAGS)
    #     writer = tf.python_io.TFRecordWriter(train_filename, options=options)

    render_dir = os.path.join(FLAGS.inp_dir_renders, synth_set)
    voxel_dir = os.path.join(FLAGS.inp_dir_voxels, synth_set)
    for j, model in enumerate(models):
        print("{}/{}".format(j, num_models))

        #         if FLAGS.store_voxels:
        #             voxels_file = os.path.join(voxel_dir, "{}.mat".format(model))
        #             voxels = loadmat(voxels_file)["Volume"].astype(np.float32)

        #             # this needed to be compatible with the
        #             # PTN projections
        #             voxels = np.transpose(voxels, (1, 0, 2))
        #             voxels = np.flip(voxels, axis=1)

        im_dir = os.path.join(render_dir, model, 'imgs')
        images = sorted(glob.glob("{}/*.jpg".format(im_dir)))

        rgbs = np.zeros((num_views, im_size, im_size, 3), dtype=np.float32)
        masks = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32)
        cameras = np.zeros((num_views, 4, 4), dtype=np.float32)
        cam_pos = np.zeros((num_views, 3), dtype=np.float32)
        depths = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32)

        assert (len(images) >= num_views)

        for k in range(num_views):
            im_file = images[k]
            img = imread(im_file)
            mask = imread(im_file.replace('imgs', 'masks'))
            rgb = img[:, :, 0:3]
            mask = mask.reshape(mask.shape[0], mask.shape[1], 1)
            mask = mask / 255.0
            if True:  # white background
                mask_fg = np.repeat(mask, 3, 2)
                mask_bg = 1.0 - mask_fg
                rgb = rgb * mask_fg + np.ones(rgb.shape) * 255.0 * mask_bg
            # plt.imshow(rgb.astype(np.uint8))
            # plt.show()
            rgb = rgb / 255.0
            actual_size = rgb.shape[0]
            if im_size != actual_size:
                rgb = im_resize(rgb, (im_size, im_size), order=3)
                mask = im_resize(mask, (im_size, im_size), order=3)
            rgbs[k, :, :, :] = rgb
            masks[k, :, :, :] = mask

            fn = os.path.basename(im_file)
            img_idx = int(re.search(r'\d+', fn).group())

            if FLAGS.store_camera:
                cam_file = "{}/camera_{}.mat".format(im_dir, img_idx)
                cam_extr, pos = read_camera(cam_file)
                cameras[k, :, :] = cam_extr
                cam_pos[k, :] = pos

            if FLAGS.store_depth:
                depth_file = "{}/depth_{}.png".format(im_dir, img_idx)
                depth = loadDepth(depth_file)
                d_max = 10.0
                d_min = 0.0
                depth = (depth - d_min) / d_max
                depth_r = im_resize(depth, (im_size, im_size), order=0)
                depth_r = depth_r * d_max + d_min
                depths[k, :, :] = np.expand_dims(depth_r, -1)

        # Create a feature
        feature = {"image": rgbs, "mask": masks, "name": model}
        #         if FLAGS.store_voxels:
        #             feature["vox"] = voxels

        if FLAGS.store_camera:
            # feature["extrinsic"] = _dtype_feature(extrinsic)
            feature["extrinsic"] = cameras
            feature["cam_pos"] = cam_pos

        if FLAGS.store_depth:
            feature["depth"] = depths

        feature_file = "{}/{}_features.p".format(FLAGS.out_dir, model)
        with open(feature_file, 'wb') as f:
            pickle.dump(feature, f)
def compute_predictions():
    cfg = app_config

    setup_environment(cfg)

    exp_dir = cfg.checkpoint_dir

    cfg.batch_size = 1
    cfg.step_size = 1

    pc_num_points = cfg.pc_num_points
    vox_size = cfg.vox_size
    save_pred = cfg.save_predictions
    save_voxels = cfg.save_voxels
    fast_conversion = True

    pose_student = cfg.pose_predictor_student and cfg.predict_pose

    g = tf.Graph()
    with g.as_default():
        model = model_pc.ModelPointCloud(cfg)

        out = build_model(model)
        input_image = out["inputs"]
        cam_matrix = out["camera_extr_src"]
        cam_quaternion = out["cam_quaternion"]
        point_cloud = out["points_1"]
        rgb = out["rgb_1"] if cfg.pc_rgb else tf.no_op()
        projs = out["projs"]
        projs_rgb = out["projs_rgb"]
        projs_depth = out["projs_depth"]
        cam_transform = out["cam_transform"]
        z_latent = out["z_latent"]

        if pose_student:
            proj_student, camera_pose_student = model_student(
                input_image, model)

        input_pc = tf.placeholder(tf.float32, [cfg.batch_size, None, 3])
        if save_voxels:
            if fast_conversion:
                voxels, _ = pointcloud2voxels3d_fast(cfg, input_pc, None)
                voxels = tf.expand_dims(voxels, axis=-1)
                voxels = smoothen_voxels3d(cfg, voxels, model.gauss_kernel())
            else:
                voxels = pointcloud2voxels(cfg, input_pc, model.gauss_sigma())

        q_inp = tf.placeholder(tf.float32, [1, 4])
        q_matrix = as_rotation_matrix(q_inp)

        input_pc, pred_quat, gt_quat, pc_unrot = model_unrotate_points(cfg)
        pc_rot = quaternion_rotate(input_pc, pred_quat)

        config = tf.ConfigProto(device_count={'GPU': 1})
        config.gpu_options.per_process_gpu_memory_fraction = cfg.per_process_gpu_memory_fraction

        sess = tf.Session(config=config)
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        variables_to_restore = slim.get_variables_to_restore(exclude=["meta"])

    restorer = tf.train.Saver(variables_to_restore)
    checkpoint_file = tf.train.latest_checkpoint(exp_dir)
    print("restoring checkpoint", checkpoint_file)
    restorer.restore(sess, checkpoint_file)

    save_dir = os.path.join(exp_dir,
                            '{}_vis_proj'.format(cfg.save_predictions_dir))
    mkdir_if_missing(save_dir)
    save_pred_dir = os.path.join(exp_dir, cfg.save_predictions_dir)
    mkdir_if_missing(save_pred_dir)

    vis_size = cfg.vis_size

    dataset = Dataset3D(cfg)

    pose_num_candidates = cfg.pose_predict_num_candidates
    num_views = cfg.num_views
    plot_h = 4
    plot_w = 6
    num_views = int(min(num_views, plot_h * plot_w / 2))

    if cfg.models_list:
        model_names = parse_lines(cfg.models_list)
    else:
        model_names = [sample.name for sample in dataset.data]

    num_models = len(model_names)
    for k in range(num_models):
        model_name = model_names[k]
        sample = dataset.sample_by_name(model_name)

        images = sample.image
        masks = sample.mask
        if cfg.saved_camera:
            cameras = sample.camera
            cam_pos = sample.cam_pos
        if cfg.vis_depth_projs:
            depths = sample.depth
        if cfg.variable_num_views:
            num_views = sample.num_views

        print("{}/{} {}".format(k, num_models, model_name))

        if pose_num_candidates == 1:
            grid = np.empty((plot_h, plot_w), dtype=object)
        else:
            plot_w = pose_num_candidates + 1
            if pose_student:
                plot_w += 1
            grid = np.empty((num_views, plot_w), dtype=object)

        if save_pred:
            all_pcs = np.zeros((num_views, pc_num_points, 3))
            all_cameras = np.zeros((num_views, 4))
            all_voxels = np.zeros((num_views, vox_size, vox_size, vox_size))
            all_z_latent = np.zeros((num_views, cfg.fc_dim))

        for view_idx in range(num_views):
            input_image_np = images[[view_idx], :, :, :]
            gt_mask_np = masks[[view_idx], :, :, :]
            if cfg.saved_camera:
                extr_mtr = cameras[view_idx, :, :]
                cam_quaternion_np = quaternion_from_campos(
                    cam_pos[view_idx, :])
                cam_quaternion_np = np.expand_dims(cam_quaternion_np, axis=0)
            else:
                extr_mtr = np.zeros((4, 4))

            if cfg.pc_rgb:
                proj_tensor = projs_rgb
            elif cfg.vis_depth_projs:
                proj_tensor = projs_depth
            else:
                proj_tensor = projs
            (pc_np, rgb_np, proj_np, cam_transf_np, z_latent_np) = sess.run(
                [point_cloud, rgb, proj_tensor, cam_transform, z_latent],
                feed_dict={
                    input_image: input_image_np,
                    cam_matrix: extr_mtr,
                    cam_quaternion: cam_quaternion_np
                })

            if pose_student:
                (proj_student_np, camera_student_np) = sess.run(
                    [proj_student, camera_pose_student],
                    feed_dict={input_image: input_image_np})
                predicted_camera = camera_student_np
            else:
                predicted_camera = cam_transf_np

            if cfg.vis_depth_projs:
                proj_np = normalise_depthmap(proj_np)
                if depths is not None:
                    depth_np = depths[view_idx, :, :, :]
                    depth_np = normalise_depthmap(depth_np)
                else:
                    depth_np = 1.0 - np.squeeze(gt_mask_np)
                if pose_student:
                    proj_student_np = normalise_depthmap(proj_student_np)

            if cfg.predict_pose:
                if cfg.save_rotated_points:
                    ref_rot = scipy.io.loadmat(
                        "{}/final_reference_rotation.mat".format(exp_dir))
                    ref_rot = ref_rot["rotation"]
                    pc_np_unrot = sess.run(pc_rot,
                                           feed_dict={
                                               input_pc: pc_np,
                                               pred_quat: ref_rot
                                           })
                    pc_np = pc_np_unrot

            if cfg.pc_rgb:
                gt_image = input_image_np
            elif cfg.vis_depth_projs:
                gt_image = depth_np
            else:
                gt_image = gt_mask_np

            if pose_num_candidates == 1:
                view_j = view_idx * 2 // plot_w
                view_i = view_idx * 2 % plot_w

                gt_image = np.squeeze(gt_image)
                grid[view_j, view_i] = mask4vis(cfg, gt_image, vis_size)

                curr_img = np.squeeze(proj_np)
                grid[view_j, view_i + 1] = mask4vis(cfg, curr_img, vis_size)

                if cfg.save_individual_images:
                    curr_dir = os.path.join(save_dir, sample.name)
                    if not os.path.exists(curr_dir):
                        os.makedirs(curr_dir)
                    imageio.imwrite(
                        os.path.join(curr_dir,
                                     '{}_{}.png'.format(view_idx, 'rgb_gt')),
                        mask4vis(cfg, np.squeeze(input_image_np), vis_size))
                    imageio.imwrite(
                        os.path.join(curr_dir,
                                     '{}_{}.png'.format(view_idx,
                                                        'mask_pred')),
                        mask4vis(cfg, np.squeeze(proj_np), vis_size))
            else:
                view_j = view_idx

                gt_image = np.squeeze(gt_image)
                grid[view_j, 0] = mask4vis(cfg, gt_image, vis_size)

                for kk in range(pose_num_candidates):
                    curr_img = np.squeeze(proj_np[kk, :, :, :])
                    grid[view_j, kk + 1] = mask4vis(cfg, curr_img, vis_size)

                    if cfg.save_individual_images:
                        curr_dir = os.path.join(save_dir, sample.name)
                        if not os.path.exists(curr_dir):
                            os.makedirs(curr_dir)
                        imageio.imwrite(
                            os.path.join(
                                curr_dir,
                                '{}_{}_{}.png'.format(view_idx, kk,
                                                      'mask_pred')),
                            mask4vis(cfg, np.squeeze(curr_img), vis_size))

                if cfg.save_individual_images:
                    imageio.imwrite(
                        os.path.join(curr_dir,
                                     '{}_{}.png'.format(view_idx, 'mask_gt')),
                        mask4vis(cfg, np.squeeze(gt_mask_np), vis_size))

                if pose_student:
                    grid[view_j,
                         -1] = mask4vis(cfg, np.squeeze(proj_student_np),
                                        vis_size)

            if save_pred:
                all_pcs[view_idx, :, :] = np.squeeze(pc_np)
                all_z_latent[view_idx] = z_latent_np
                if cfg.predict_pose:
                    all_cameras[view_idx, :] = predicted_camera
                if save_voxels:
                    # multiplying by two is necessary because
                    # pc->voxel conversion expects points in [-1, 1] range
                    pc_np_range = pc_np
                    if not fast_conversion:
                        pc_np_range *= 2.0
                    voxels_np = sess.run(voxels,
                                         feed_dict={input_pc: pc_np_range})
                    all_voxels[view_idx, :, :, :] = np.squeeze(voxels_np)

            vis_view = view_idx == 0 or cfg.vis_all_views
            if cfg.vis_voxels and vis_view:
                rgb_np = np.squeeze(rgb_np) if cfg.pc_rgb else None
                vis_pc(np.squeeze(pc_np), rgb=rgb_np)

        grid_merged = merge_grid(cfg, grid)
        imageio.imwrite("{}/{}_proj.png".format(save_dir, sample.name),
                        grid_merged)

        if save_pred:
            if cfg.save_as_mat:
                save_dict = {"points": all_pcs, "z_latent": all_z_latent}
                if cfg.predict_pose:
                    save_dict["camera_pose"] = all_cameras
                scipy.io.savemat("{}/{}_pc".format(save_pred_dir, sample.name),
                                 mdict=save_dict)
            else:
                np.savez("{}/{}_pc".format(save_pred_dir, sample.name),
                         all_pcs)

            if save_voxels:
                np.savez("{}/{}_vox".format(save_pred_dir, sample.name),
                         all_voxels)

    sess.close()
Exemplo n.º 7
0
def train():
    cfg = app_config

    setup_environment(cfg)

    train_dir = cfg.checkpoint_dir
    mkdir_if_missing(train_dir)

    tf.logging.set_verbosity(tf.logging.INFO)

    split_name = "train"
    dataset_file = os.path.join(cfg.inp_dir, f"{cfg.synth_set}_{split_name}.tfrecords")

    dataset = tf.data.TFRecordDataset(dataset_file, compression_type=tf_record_compression(cfg))
    if cfg.shuffle_dataset:
        dataset = dataset.shuffle(7000)
    dataset = dataset.map(lambda rec: parse_tf_records(cfg, rec), num_parallel_calls=3) \
        .batch(cfg.batch_size) \
        .prefetch(buffer_size=100) \
        .repeat()

    iterator = dataset.make_one_shot_iterator()
    train_data = iterator.get_next()

    summary_writer = tfsum.create_file_writer(train_dir, flush_millis=10000)

    with summary_writer.as_default(), tfsum.record_summaries_every_n_global_steps(10):
        global_step = tf.train.get_or_create_global_step()
        print("global step: ", global_step)
        model = model_pc.ModelPointCloud(cfg, global_step)
        inputs = model.preprocess(train_data, cfg.step_size)

        model_fn = model.get_model_fn(
            is_training=True, reuse=False, run_projection=True)
        outputs = model_fn(inputs)

        # train_scopes
        train_scopes = ["decoder/point_cloud"]
#         train_scopes = ["decoder"]


        # loss
        task_loss = model.get_loss(inputs, outputs)
        reg_loss = regularization_loss(train_scopes, cfg)
        loss = task_loss + reg_loss

        # summary op
        summary_op = tfsum.all_summary_ops()

        # optimizer
        var_list = get_trainable_variables(train_scopes)
        optimizer = tf.train.AdamOptimizer(get_learning_rate(cfg, global_step))
        train_op = optimizer.minimize(loss, global_step, var_list)

    # saver
    max_to_keep = 2
    saver = tf.train.Saver(max_to_keep=max_to_keep)

    session_config = tf.ConfigProto(
        log_device_placement=False)
    session_config.gpu_options.allow_growth = cfg.gpu_allow_growth
    session_config.gpu_options.per_process_gpu_memory_fraction = cfg.per_process_gpu_memory_fraction

    sess = tf.Session(config=session_config)
    with sess, summary_writer.as_default():
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()
        tfsum.initialize(graph=tf.get_default_graph())
        ## TODO load pretrain
        variables_to_restore = slim.get_variables_to_restore(exclude=["meta"]) 
        #, "decoder/point_cloud"
        print(variables_to_restore)
        restorer = tf.train.Saver(variables_to_restore)
        checkpoint_file = "model-800000"
#         checkpoint_file = "model-665000"
#         checkpoint_file = "model-600000"
        #tf.train.latest_checkpoint(cfg.checkpoint_dir)
        print("restoring checkpoint", checkpoint_file)
        restorer.restore(sess, checkpoint_file)

        global_step_val = 0
        while global_step_val < cfg.max_number_of_steps:
            t0 = time.perf_counter()
            _, loss_val, global_step_val, summary = sess.run([train_op, loss, global_step, summary_op])
            t1 = time.perf_counter()
            dt = t1 - t0
            print(f"step: {global_step_val}, loss = {loss_val:.4f} ({dt:.3f} sec/step), lr = {sess.run(optimizer._lr)}")
            if global_step_val % 5000 == 0:
                saver.save(sess, f"{train_dir}/model", global_step=global_step_val)
Exemplo n.º 8
0
def compute_predictions():
    cfg = app_config

    setup_environment(cfg)

    exp_dir = cfg.checkpoint_dir

    cfg.batch_size = 1
    cfg.step_size = 1

    pc_num_points = cfg.pc_num_points
    vox_size = cfg.vox_size
    save_pred = cfg.save_predictions
    save_voxels = cfg.save_voxels
    fast_conversion = True

    pose_student = cfg.pose_predictor_student and cfg.predict_pose

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    model = model_pc.ModelPointCloud(cfg)
    model = model.to(device)

    log_dir = '../../dpc/run/model_run_data/'
    learning_rate = 1e-4
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay = cfg.weight_decay)
    global_step = 100000
    if global_step>0:
        checkpoint_path = os.path.join(log_dir,'model.ckpt_{}.pth'.format(global_step))
        print("Loading from path:",checkpoint_path)
        checkpoint = torch.load(checkpoint_path)
        global_step_val = checkpoint['global_step']
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        loss = checkpoint['loss']
    else:
        global_step_val = global_step
    print('Restored checkpoint at {} with loss {}'.format(global_step, loss))

    save_dir = os.path.join(exp_dir, '{}_vis_proj'.format(cfg.save_predictions_dir))
    mkdir_if_missing(save_dir)
    save_pred_dir = os.path.join(exp_dir, cfg.save_predictions_dir)
    mkdir_if_missing(save_pred_dir)

    vis_size = cfg.vis_size

    split_name = "val"
    dataset_folder = cfg.inp_dir

    dataset = ShapeRecords(dataset_folder, cfg, split_name)
    dataset_loader = torch.utils.data.DataLoader(dataset,
                                                 batch_size=cfg.batch_size, shuffle=cfg.shuffle_dataset,
                                                 num_workers=4,drop_last=True)
    pose_num_candidates = cfg.pose_predict_num_candidates
    num_views = cfg.num_views
    plot_h = 4
    plot_w = 6
    num_views = int(min(num_views, plot_h * plot_w / 2))

    if cfg.models_list:
        model_names = parse_lines(cfg.models_list)
    else:
        model_names = dataset.file_names
    num_models = len(model_names)
    
    for k in range(num_models):
        model_name = model_names[k]
        sample = dataset.__getitem__(k)
        images = sample['image']
        masks = sample['mask']
        if cfg.saved_camera:
            cameras = sample['extrinsic']
            cam_pos = sample['cam_pos']
        if cfg.vis_depth_projs:
            depths = sample['depth']
        if cfg.variable_num_views:
            num_views = sample['num_views']

        print("{}/{} {}".format(k, num_models, model_name))

        if pose_num_candidates == 1:
            grid = np.empty((plot_h, plot_w), dtype=object)
        else:
            plot_w = pose_num_candidates + 1
            if pose_student:
                plot_w += 1
            grid = np.empty((num_views, plot_w), dtype=object)

        if save_pred:
            all_pcs = np.zeros((num_views, pc_num_points, 3))
            all_cameras = np.zeros((num_views, 4))
            #all_voxels = np.zeros((num_views, vox_size, vox_size, vox_size))
            #all_z_latent = np.zeros((num_views, cfg.fc_dim))
        
      
        for view_idx in range(num_views):
            input_image_np = images[[view_idx], :, :, :]
            gt_mask_np = masks[[view_idx], :, :, :]
            if cfg.saved_camera:
                extr_mtr = cameras[view_idx, :, :]
                cam_quaternion_np = quaternion_from_campos(cam_pos[view_idx, :])
                cam_quaternion_np = np.expand_dims(cam_quaternion_np, axis=0)
            else:
                extr_mtr = np.zeros((4, 4))

            code = 'images' if cfg.predict_pose else 'images_1'
            input = {code: input_image_np,
                     'matrices': extr_mtr,
                     'camera_quaternion': cam_quaternion_np}
           
            out = build_model(model, input, global_step)
            input_image = out["inputs"]
            cam_matrix = out["camera_extr_src"]
            cam_quaternion = out["cam_quaternion"]
            point_cloud = out["points_1"]
            #gb = out["rgb_1"] if cfg.pc_rgb else None
            #rojs = out["projs"]
            #rojs_rgb = out["projs_rgb"]
            #rojs_depth = out["projs_depth"]
            cam_transform = out["cam_transform"]
            #_latent = out["z_latent"]

            #if cfg.pc_rgb:
            #    proj_tensor = projs_rgb
            #elif cfg.vis_depth_projs:
            #    proj_tensor = projs_depth
            #else:
            #    proj_tensor = projs

            if pose_student:
                camera_student_np = out["pose_student"]
                predicted_camera = camera_student_np
            else:
                predicted_camera = cam_transf_np

            #if cfg.vis_depth_projs:
            #    proj_np = normalise_depthmap(out["projs"])
            #    if depths is not None:
            #        depth_np = depths[view_idx, :, :, :]
            #        depth_np = normalise_depthmap(depth_np)
            #    else:
            #        depth_np = 1.0 - np.squeeze(gt_mask_np)
            #    if pose_student:
            #        proj_student_np = normalise_depthmap(proj_student_np)


            #if save_voxels:
            #    if fast_conversion:
            #        voxels, _ = pointcloud2voxels3d_fast(cfg, input_pc, None)
            #        voxels = tf.expand_dims(voxels, axis=-1)
            #        voxels = smoothen_voxels3d(cfg, voxels, model.gauss_kernel())
            #    else:
            #        voxels = pointcloud2voxels(cfg, input_pc, model.gauss_sigma())
            if cfg.predict_pose:
                if cfg.save_rotated_points:
                    ref_rot = scipy.io.loadmat("{}/final_reference_rotation.mat".format(exp_dir))
                    ref_rot = ref_rot["rotation"]

                    pc_unrot = quaternion_rotate(input_pc, ref_quat)
                    point_cloud = pc_np_unrot


            if cfg.pc_rgb:
                gt_image = input_image_np
            elif cfg.vis_depth_projs:
                gt_image = depth_np
            else:
                gt_image = gt_mask_np

#             if pose_num_candidates == 1:
#                 view_j = view_idx * 2 // plot_w
#                 view_i = view_idx * 2 % plot_w

#                 gt_image = np.squeeze(gt_image)
#                 grid[view_j, view_i] = mask4vis(cfg, gt_image, vis_size)

#                 curr_img = np.squeeze(out[projs])
#                 grid[view_j, view_i + 1] = mask4vis(cfg, curr_img, vis_size)

#                 if cfg.save_individual_images:
#                     curr_dir = os.path.join(save_dir, model_names[k])
#                     if not os.path.exists(curr_dir):
#                         os.makedirs(curr_dir)
#                     imageio.imwrite(os.path.join(curr_dir, '{}_{}.png'.format(view_idx, 'rgb_gt')),
#                                     mask4vis(cfg, np.squeeze(input_image_np), vis_size))
#                     imageio.imwrite(os.path.join(curr_dir, '{}_{}.png'.format(view_idx, 'mask_pred')),
#                                     mask4vis(cfg, np.squeeze(proj_np), vis_size))
#             else:
#                 view_j = view_idx

#                 gt_image = np.squeeze(gt_image)
#                 grid[view_j, 0] = mask4vis(cfg, gt_image, vis_size)

#                 for kk in range(pose_num_candidates):
#                     curr_img = np.squeeze(out["projs"][kk, :, :, :].detach().cpu())
#                     grid[view_j, kk + 1] = mask4vis(cfg, curr_img, vis_size)

#                     if cfg.save_individual_images:
#                         curr_dir = os.path.join(save_dir, model_names[k])
#                         if not os.path.exists(curr_dir):
#                             os.makedirs(curr_dir)
#                         imageio.imwrite(os.path.join(curr_dir, '{}_{}_{}.png'.format(view_idx, kk, 'mask_pred')),
#                                         mask4vis(cfg, np.squeeze(curr_img), vis_size))

#                 if cfg.save_individual_images:
#                     imageio.imwrite(os.path.join(curr_dir, '{}_{}.png'.format(view_idx, 'mask_gt')),
#                                     mask4vis(cfg, np.squeeze(gt_mask_np), vis_size))

#                 if pose_student:
#                     grid[view_j, -1] = mask4vis(cfg, np.squeeze(proj_student_np.detach().cpu()), vis_size)

            if save_pred:
                #pc_np = pc_np.detach().cpu().numpy()
                all_pcs[view_idx, :, :] = np.squeeze(point_cloud.detach().cpu())
                #all_z_latent[view_idx] = z_latent.detach().cpu()
                if cfg.predict_pose:
                    all_cameras[view_idx, :] = predicted_camera.detach().cpu()
#                 if save_voxels:
#                     # multiplying by two is necessary because
#                     # pc->voxel conversion expects points in [-1, 1] range
#                     pc_np_range = pc_np
#                     if not fast_conversion:
#                         pc_np_range *= 2.0
#                     voxels_np = sess.run(voxels, feed_dict={input_pc: pc_np_range})
#                     all_voxels[view_idx, :, :, :] = np.squeeze(voxels_np)

#             vis_view = view_idx == 0 or cfg.vis_all_views
#             if cfg.vis_voxels and vis_view:
#                 rgb_np = np.squeeze(rgb_np) if cfg.pc_rgb else None
#                 vis_pc(np.squeeze(pc_np), rgb=rgb_np)

        #grid_merged = merge_grid(cfg, grid)
        #imageio.imwrite("{}/{}_proj.png".format(save_dir, sample.file_names), grid_merged)
        
        if save_pred:
            if 0:
                save_dict = {"points": all_pcs}
                if cfg.predict_pose:
                    save_dict["camera_pose"] = all_cameras
                scipy.io.savemat("{}/{}_pc.mat".format(save_pred_dir, model_names[k]),
                                 mdict=save_dict)
            else:
                save_dict = {"points": all_pcs}
                if cfg.predict_pose:
                    save_dict["camera_pose"] = all_cameras
                with open("{}/{}_pc.pkl".format(save_pred_dir, model_names[k]), 'wb') as handle:
                    pickle.dump(save_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
Exemplo n.º 9
0
def train():
    cfg = app_config

    setup_environment(cfg)
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    train_dir = cfg.checkpoint_dir
    mkdir_if_missing(train_dir)

    split_name = "train"
    dataset_folder = cfg.inp_dir

    dataset = ShapeRecords(dataset_folder, cfg, split_name)
    dataset_loader = torch.utils.data.DataLoader(dataset,
                                                 batch_size=cfg.batch_size,
                                                 shuffle=cfg.shuffle_dataset,
                                                 num_workers=4,
                                                 drop_last=True)
    summary_writer = SummaryWriter(log_dir=train_dir, flush_secs=10)

    ckpt_count = 1000
    summary_count = 100

    # loading pre existing model

    # creating a new model
    model = model_pc.ModelPointCloud(cfg)
    model = model.to(device)
    print(model.parameters)
    log_dir = '../../dpc/run/model_run_data_lamp/'
    mkdir_if_missing(log_dir)
    learning_rate = 1e-4
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 weight_decay=cfg.weight_decay)

    # training steps

    global_step = 38000
    if global_step > 0:
        checkpoint_path = os.path.join(log_dir,
                                       'model.ckpt_{}.pth'.format(global_step))
        print("Loading from path:", checkpoint_path)
        checkpoint = torch.load(checkpoint_path)
        global_step_val = checkpoint['global_step']
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    else:
        global_step_val = global_step
    model.train()
    while global_step_val < cfg.max_number_of_steps:

        step_loss = 0.0
        for i, train_data in enumerate(dataset_loader, 0):

            t9 = time.perf_counter()
            for k in train_data.keys():
                try:
                    train_data[k] = train_data[k].to(device)
                except AttributeError:
                    pass
            # get inputs by data processing

            t0 = time.perf_counter()
            inputs = model.preprocess(train_data, cfg.step_size)

            t1 = time.perf_counter()
            # zero the parameter gradients
            optimizer.zero_grad()
            t2 = time.perf_counter()
            # dummy loss function
            if global_step_val % summary_count == 0:
                outputs = model(inputs,
                                global_step_val,
                                is_training=True,
                                run_projection=True,
                                summary_writer=summary_writer)
                loss, min_loss = model.get_loss(inputs,
                                                outputs,
                                                summary_writer,
                                                add_summary=True,
                                                global_step=global_step_val)
                summary_writer.add_image(
                    'prediction', outputs['projs'].detach().cpu().numpy()[
                        min_loss[0]].transpose(2, 0, 1), global_step_val)
                summary_writer.add_image(
                    'actual',
                    inputs['masks'].detach().cpu().numpy()[0].transpose(
                        2, 0, 1), global_step_val)
                #print(chamfer_distance( outputs['projs'].detach().cpu().numpy()[min_loss[0]].transpose(2, 0, 1), inputs['masks'].detach().cpu().numpy()[0].transpose(2, 0, 1))
            else:
                outputs = model(inputs,
                                global_step_val,
                                is_training=True,
                                run_projection=True)
                loss, _ = model.get_loss(inputs, outputs, add_summary=False)
            loss.backward()
            optimizer.step()
            del inputs
            del outputs
            t3 = time.perf_counter()
            dt = t3 - t9

            #print('Cuda {}'.format(t0-t9))
            #print('Preprocess {}'.format(t1-t0))
            #print('Forward {}'.format(t2-t1))
            #print('Backward {}'.format(t3-t2))
            step_loss += loss.item()
            loss_avg = step_loss / (i + 1)
            print(
                f"step: {global_step_val}, loss= {loss.item():.5f}, loss_average = {loss_avg:.4f} ({dt:.3f} sec/step)"
            )
            if global_step_val % ckpt_count == 0:  # save configuration

                checkpoint_path = os.path.join(
                    log_dir, 'model.ckpt_{}.pth'.format(global_step_val))
                print("PATH:", checkpoint_path)
                torch.save(
                    {
                        'global_step': global_step_val,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        'loss': loss_avg
                    }, checkpoint_path)
            global_step_val += 1
Exemplo n.º 10
0
def create_record(synth_set, split_name, models):
    im_size = FLAGS.image_size
    num_views = FLAGS.num_views
    num_models = len(models)

    mkdir_if_missing(FLAGS.out_dir)

    # address to save the TFRecords file
    train_filename = "{}/{}_{}.tfrecords".format(FLAGS.out_dir, synth_set,
                                                 split_name)
    # open the TFRecords file
    options = tf_record_options(FLAGS)
    writer = tf.python_io.TFRecordWriter(train_filename, options=options)

    render_dir = os.path.join(FLAGS.inp_dir_renders, synth_set)
    voxel_dir = os.path.join(FLAGS.inp_dir_voxels, synth_set)
    for j, model in enumerate(models):
        print("{}/{}".format(j, num_models))

        if FLAGS.store_voxels:
            voxels_file = os.path.join(voxel_dir, "{}.mat".format(model))
            voxels = loadmat(voxels_file)["Volume"].astype(np.float32)

            # this needed to be compatible with the
            # PTN projections
            voxels = np.transpose(voxels, (1, 0, 2))
            voxels = np.flip(voxels, axis=1)

        im_dir = os.path.join(render_dir, model)
        images = sorted(glob.glob("{}/render_*.png".format(im_dir)))

        rgbs = np.zeros((num_views, im_size, im_size, 3), dtype=np.float32)
        masks = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32)
        cameras = np.zeros((num_views, 4, 4), dtype=np.float32)
        cam_pos = np.zeros((num_views, 3), dtype=np.float32)
        depths = np.zeros((num_views, im_size, im_size, 1), dtype=np.float32)

        assert (len(images) >= num_views)

        for k in range(num_views):
            im_file = images[k]
            img = imread(im_file)
            rgb = img[:, :, 0:3]
            mask = img[:, :, [3]]
            mask = mask / 255.0
            if True:  # white background
                mask_fg = np.repeat(mask, 3, 2)
                mask_bg = 1.0 - mask_fg
                rgb = rgb * mask_fg + np.ones(rgb.shape) * 255.0 * mask_bg
            # plt.imshow(rgb.astype(np.uint8))
            # plt.show()
            rgb = rgb / 255.0
            actual_size = rgb.shape[0]
            if im_size != actual_size:
                rgb = im_resize(rgb, (im_size, im_size), order=3)
                mask = im_resize(mask, (im_size, im_size), order=3)
            rgbs[k, :, :, :] = rgb
            masks[k, :, :, :] = mask

            fn = os.path.basename(im_file)
            img_idx = int(re.search(r'\d+', fn).group())

            if FLAGS.store_camera:
                cam_file = "{}/camera_{}.mat".format(im_dir, img_idx)
                cam_extr, pos = read_camera(cam_file)
                cameras[k, :, :] = cam_extr
                cam_pos[k, :] = pos

            if FLAGS.store_depth:
                depth_file = "{}/depth_{}.png".format(im_dir, img_idx)
                depth = loadDepth(depth_file)
                d_max = 10.0
                d_min = 0.0
                depth = (depth - d_min) / d_max
                depth_r = im_resize(depth, (im_size, im_size), order=0)
                depth_r = depth_r * d_max + d_min
                depths[k, :, :] = np.expand_dims(depth_r, -1)

        # Create a feature
        feature = {
            "image": _dtype_feature(rgbs),
            "mask": _dtype_feature(masks),
            "name": _string_feature(model)
        }
        if FLAGS.store_voxels:
            feature["vox"] = _dtype_feature(voxels)

        if FLAGS.store_camera:
            # feature["extrinsic"] = _dtype_feature(extrinsic)
            feature["extrinsic"] = _dtype_feature(cameras)
            feature["cam_pos"] = _dtype_feature(cam_pos)

        if FLAGS.store_depth:
            feature["depth"] = _dtype_feature(depths)

        # Create an example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        # Serialize to string and write on the file
        writer.write(example.SerializeToString())
        """
        plt.imshow(np.squeeze(img[:,:,0:3]))
        plt.show()
        plt.imshow(np.squeeze(img[:,:,3]).astype(np.float32)/255.0)
        plt.show()
        """

    writer.close()
    sys.stdout.flush()