Пример #1
0
                # fig2 = tensor2disp(outputs[("disp", 0)] * (1 - SSIMMask), ind=1, vmax=0.1)
                # fig_combined = np.concatenate([np.array(fig1), np.array(fig2)], axis=0)
                # pil.fromarray(fig_combined).show()
                real_scale_disp = real_scale_disp * (1 - SSIMMask)
                stored_disp = real_scale_disp / 960
                save_loss(stored_disp[0, 0, :, :].cpu().numpy(), pathl)
                save_loss(stored_disp[1, 0, :, :].cpu().numpy(), pathr)

                duration = time.time() - start
                tottime = tottime + duration
                print("left time %f hours" %
                      (tottime / count * (len(filenames) - count) / 60 / 60))


if __name__ == "__main__":
    options = MonodepthOptions()
    parsed_command = options.parse()
    if parsed_command.load_weights_folders is not None:
        folders_to_eval = glob(parsed_command.load_weights_folders + '*/')
        to_order = list()
        for i in range(len(folders_to_eval)):
            to_order.append(
                int(folders_to_eval[i].split('/')[-2].split('_')[1]))
        to_order = np.array(to_order)
        to_order_index = np.argsort(to_order)
        for i in to_order_index:
            print(folders_to_eval[i])
            parsed_command.load_weights_folder = folders_to_eval[i]
            evaluate(parsed_command)
    else:
        # alpha_distance_weight = np.arange(0.1, 2, 0.1)
    gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4)
    gt_global_poses = np.concatenate(
        (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1)
    gt_global_poses[:, 3, 3] = 1
    gt_xyzs = gt_global_poses[:, :3, 3]

    gt_local_poses = []
    for i in range(1, len(gt_global_poses)):
        gt_local_poses.append(
            np.linalg.inv(np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i])))

    ates = []
    num_frames = gt_xyzs.shape[0]
    track_length = 5
    for i in range(0, num_frames - 1):
        local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1]))
        gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i:i + track_length - 1]))

        ates.append(compute_ate(gt_local_xyzs, local_xyzs))

    print("\n   Trajectory error: {:0.3f}, std: {:0.3f}\n".format(np.mean(ates), np.std(ates)))

    save_path = os.path.join(opt.load_weights_folder, "poses.npy")
    np.save(save_path, pred_poses)
    print("-> Predictions saved to", save_path)


if __name__ == "__main__":
    options = MonodepthOptions()
    evaluate(options.parse())
Пример #3
0
        for _ in range(self.opt.num_epochs):
            do_train_one_step(self.cfg, self.models['encoder'].maskrcnn,
                              self.data_loader_maskrcnn,
                              self.optimizer_maskrcnn, self.scheduler_maskrcnn,
                              self.device, self.arguments, self.opt)
            self.run_one_step_simvodis()
            if self.arguments["iteration"] % self.opt.save_frequency == 0:
                self.save_model()
                self.epoch += 1


if __name__ == "__main__":
    from options import MonodepthOptions

    options = MonodepthOptions(withMaskRCNN=True)
    args = options.parse()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)
Пример #4
0
# Copyright Niantic 2019. Patent Pending. All rights reserved.
#
# This software is licensed under the terms of the Monodepth2 licence
# which allows for non-commercial use only, the full terms of which are made
# available in the LICENSE file.

from __future__ import absolute_import, division, print_function

from trainer import Trainer
from options import MonodepthOptions
import warnings

warnings.filterwarnings("ignore")


options = MonodepthOptions()
opts = options.parse()


if __name__ == "__main__":
    trainer = Trainer(opts)
    trainer.train()
Пример #5
0
            pred_depth *= ratio

        pred_depth[pred_depth < MIN_DEPTH] = MIN_DEPTH
        pred_depth[pred_depth > MAX_DEPTH] = MAX_DEPTH

        errors.append(compute_errors(gt_depth, pred_depth))

    if not opt.disable_median_scaling:
        ratios = np.array(ratios)
        med = np.median(ratios)
        print(" Scaling ratios | med: {:0.3f} | std: {:0.3f}".format(
            med, np.std(ratios / med)))

    mean_errors = np.array(errors).mean(0)

    print("\n  " +
          ("{:>8} | " * 7
           ).format("abs_rel", "sq_rel", "rmse", "rmse_log", "a1", "a2", "a3"))
    print(("&{: 8.3f}  " * 7).format(*mean_errors.tolist()) + "\\\\")
    print("\n-> Done!")


if __name__ == "__main__":
    options = MonodepthOptions()
    # evaluate(options.parse())
    opts, rest = options.parse()
    time_start = time.time()
    evaluate(opts)

    time_end = time.time()
    print("Time elapsed:", time_end - time_start)
                        fStr1 = "frame-{:06d}.pose.txt".format(inputs["index"].item())
                        gtPosePath1 = os.path.join("/content/drive/My Drive/monodepth2/splits/7scenes/chess/seq-01", "poses", fStr1)
                        gtPose1 = np.loadtxt(gtPosePath1).reshape(4, 4)
                        fStr2 = "frame-{:06d}.pose.txt".format(inputs["index"].item()+opt.frame_ids[1])
                        print("fStr1 = {}".format(fStr1))
                        print("fStr2 = {}".format(fStr2))
                        gtPosePath2 = os.path.join("/content/drive/My Drive/monodepth2/splits/7scenes/chess/seq-01", "poses", fStr2)
                        gtPose2 = np.loadtxt(gtPosePath2).reshape(4, 4)
                        gtRelativePose = calRelativePose(gtPose1, gtPose2)

                        outputs[("cam_T_cam", 0, f_i)] = torch.from_numpy(gtRelativePose.reshape(1, 4, 4).astype(np.float32)).cuda()


                self.generate_images_pred(inputs, outputs)
                pred = outputs[("color", opt.frame_ids[1], opt.scales[0])]
                target = inputs[("color", 0, opt.scales[0])]
                reprojection_losses.append(self.compute_reprojection_loss(pred, target, inputs["depth_gt"]))
                img_2 = transforms.ToPILImage()(outputs[("color", opt.frame_ids[1], 0)].squeeze().cpu()).convert('RGB')
                img_2.save("/content/drive/My Drive/code/monodepth2-1/assets/generate_gt_{}to{}.jpg".format(opt.frame_ids[1],0)) 


        print("-> Predictions saved to")
        print(("/content/drive/My Drive/code/monodepth2-1/assets/generate_gt_{}to{}.jpg".format(opt.frame_ids[1],0)))

        


if __name__ == "__main__":
    options = MonodepthOptions()
    evaluation = Evaluation(options.parse())
    evaluation.evaluate()
Пример #7
0
def main():
    options = MonodepthOptions()

    opts, rest = options.parse()

    if opts.server == "mcity":
        datapath_dict = {
            "kitti": os.path.join(script_path, "kitti_data"),
            "kitti_odom": None,
            "kitti_depth": os.path.join(script_path, "kitti_data"),
            "TUM": None,
            "lyft_1024":
            "/mnt/storage8t/minghanz/Datasets/lyft_kitti_seq/train"
        }
    elif opts.server == "sunny":
        datapath_dict = {
            "kitti": "/media/sda1/minghanz/datasets/kitti/kitti_data",
            "kitti_odom": None,
            "kitti_depth": "/media/sda1/minghanz/datasets/kitti/kitti_data",
            "TUM": None,
            "lyft_1024": "/media/sda1/minghanz/datasets/lyft_kitti/train",
            "vkitti": "/media/sda1/minghanz/datasets/vkitti2"
        }
        #  "lyft_1024": os.path.join(script_path, "data_download/train")} # ZMH: kitti_depth originally not shown as an option here
    elif opts.server == "home":
        datapath_dict = {
            "kitti": os.path.join(script_path, "kitti_data"),
            "kitti_odom": None,
            "kitti_depth": os.path.join(script_path, "kitti_data"),
            "TUM": None,
            "lyft_1024": None
        }
    else:
        raise ValueError("server {} not recognized.".format(opts.server))

    width_dict = {
        "kitti": 640,
        "kitti_odom": None,
        "kitti_depth": 640,
        "TUM": None,
        "lyft_1024": 512,
        "vkitti": 640
    }  # ZMH: kitti_depth originally not shown as an option here
    height_dict = {
        "kitti": 192,
        "kitti_odom": None,
        "kitti_depth": 192,
        "TUM": None,
        "lyft_1024": 224,
        "vkitti": 192
    }  # ZMH: kitti_depth originally not shown as an option here # change lyft height from 256 to 192 to 224

    # data_path = datapath_dict["kitti"]
    # width = width_dict["kitti"]
    # height = height_dict["kitti"]
    data_path = datapath_dict[opts.dataset_val[0]]
    width = width_dict[opts.dataset_val[0]]
    height = height_dict[opts.dataset_val[0]]

    if opts.ext_disp_to_eval is None:
        encoder, depth_decoder, dataloader, filenames = network_define(
            opts, data_path, height, width)

        if opts.save_pred_disps:
            output_path = os.path.join(
                opts.load_weights_folder,
                "disps_{}_split.npy".format(opts.eval_split))
            # print("-> Saving predicted disparities to ", output_path)
            disps = []
    else:
        filenames = readlines(
            os.path.join(splits_dir, opts.eval_split, split_file))
        # Load predictions from file
        print("-> Loading predictions from {}".format(opts.ext_disp_to_eval))
        disps = np.load(opts.ext_disp_to_eval)

    losses_train = {}
    losses_eval = {}
    for item in depth_metric_names:
        losses_train[item] = 0
        losses_eval[item] = 0
    total_n_sp = 0

    if opts.ext_disp_to_eval is None:
        with torch.no_grad():
            for i, data in enumerate(dataloader):
                input_color = data[("color", 0, 0)].cuda(1)
                output = depth_decoder(encoder(input_color))
                disp = output[("disp", 0)]

                disp_np = disp.cpu().numpy()

                if opts.save_pred_disps:
                    disps.append(disp_np)

                line = filenames[i]
                gt_depth_train = gt_depth_from_line(line,
                                                    opts,
                                                    data_path,
                                                    mode="train").cuda(1)
                gt_depth_eval = gt_depth_from_line(line,
                                                   opts,
                                                   data_path,
                                                   mode="eval")

                # ## visualize to check the process is correct, can also be used for qualitative analysis (VKITTI2)
                # if i == 0:
                #     disp_im = (disp_np[0,0,:,:]*255).astype(np.uint8)
                #     # print(disp_np.shape, disp_np.max(), disp_np.min())
                #     img = pil.fromarray(disp_im, mode="L")
                #     img.save(os.path.join(opts.load_weights_folder, "{}.png".format(i)))

                #     gt_disp = depth_to_disp(gt_depth_train, opts.min_depth, opts.max_depth, opts.ref_depth, opts.depth_ref_mode )
                #     disp_np = gt_disp.cpu().numpy()
                #     disp_im = (disp_np[0,0,:,:]*255).astype(np.uint8)
                #     # print(disp_np.shape, disp_np.max(), disp_np.min())
                #     img = pil.fromarray(disp_im, mode="L")
                #     img.save(os.path.join(opts.load_weights_folder, "{}_gt.png".format(i)))

                #     rgb = input_color.cpu().detach().numpy().transpose(0,2,3,1)
                #     rgb = (rgb*255).astype(np.uint8)
                #     rgb_im = pil.fromarray(rgb[0], mode="RGB")
                #     rgb_im.save(os.path.join(opts.load_weights_folder, "{}_rgb.png".format(i)))

                loss_train = err_train.error_disp(disp, gt_depth_train, opts,
                                                  height, width)
                loss_eval = err_eval.error_disp(disp, gt_depth_eval, opts)

                for item in depth_metric_names:
                    losses_train[item] += loss_train[item]
                    losses_eval[item] += loss_eval[item]
                total_n_sp += 1

                # if i == 10:
                #     break
        if opts.save_pred_disps:
            disps_stack = np.stack(disps)
            np.save(output_path, disps_stack)
            print("-> Saved predicted disparities to ", output_path)

    else:
        for i, disp in enumerate(disps):
            # ## visualize to check the process is correct, can also be used for qualitative analysis (VKITTI2)
            # if i == 0:
            #     disp_im = (disp[0,0,:,:]*255).astype(np.uint8)
            #     print(disp_im.shape, disp_im.max(), disp_im.min())
            #     img = pil.fromarray(disp_im, mode="L")
            #     img.save(os.path.join(opts.load_weights_folder, "{}.png".format(i)))

            disp = torch.from_numpy(disp).to(device="cuda:1",
                                             dtype=torch.float32)

            if opts.ext_depth:
                disp = disp.unsqueeze(0).unsqueeze(0)

            line = filenames[i]
            gt_depth_train = gt_depth_from_line(line,
                                                opts,
                                                data_path,
                                                mode="train").cuda(1)
            gt_depth_eval = gt_depth_from_line(line,
                                               opts,
                                               data_path,
                                               mode="eval")

            loss_train = err_train.error_disp(disp, gt_depth_train, opts,
                                              height, width, opts.ext_depth)
            loss_eval = err_eval.error_disp(disp, gt_depth_eval, opts,
                                            opts.ext_depth)

            for item in depth_metric_names:
                losses_train[item] += loss_train[item]
                losses_eval[item] += loss_eval[item]
            total_n_sp += 1

    for item in depth_metric_names:
        losses_train[item] = losses_train[item] / total_n_sp
        losses_eval[item] = losses_eval[item] / total_n_sp
        print(item, "train:", losses_train[item], ", eval:", losses_eval[item])
    print("total # of samples:", total_n_sp)
Пример #8
0
    def __init__(self, _host_frame, _target_frame):
        '''
        initialize the randpattern based photometric residual wrapper
        :param _host_frame: numpy ndarray H x W x 3 image.
        :param _target_frame: numpy ndarray image, same dimension as above.
        '''
        # load options
        options = MonodepthOptions()
        opts = options.parse()
        self.opt = opts
        self.num_input_frames = len(self.opt.frame_ids)
        # init model
        self.model_name = "mono_1024x320"

        download_model_if_doesnt_exist(self.model_name)
        self.encoder_path = os.path.join("models", self.model_name,
                                         "encoder.pth")
        self.depth_decoder_path = os.path.join("models", self.model_name,
                                               "depth.pth")
        self.pose_encoder_path = os.path.join("models", self.model_name,
                                              "pose_encoder.pth")
        self.pose_decoder_path = os.path.join("models", self.model_name,
                                              "pose.pth")

        # LOADING PRETRAINED MODEL
        self.encoder = networks.ResnetEncoder(18, False)
        self.depth_decoder = networks.DepthDecoder(
            num_ch_enc=self.encoder.num_ch_enc, scales=range(4))
        self.pose_encoder = networks.ResnetEncoder(self.opt.num_layers, False,
                                                   2)
        # self.pose_encoder = networks.PoseCNN(self.num_input_frames if self.opt.pose_model_input == "all" else 2)
        self.pose_decoder = networks.PoseDecoder(self.pose_encoder.num_ch_enc,
                                                 1, 2)
        # self.pose_decoder = networks.PoseDecoder(self.pose_encoder.num_ch_enc, num_input_features=1,
        #                                          num_frames_to_predict_for=2)

        self.loaded_dict_enc = torch.load(self.encoder_path,
                                          map_location='cpu')
        self.filtered_dict_enc = {
            k: v
            for k, v in self.loaded_dict_enc.items()
            if k in self.encoder.state_dict()
        }
        self.encoder.load_state_dict(self.filtered_dict_enc)

        self.loaded_dict_pose_enc = torch.load(self.pose_encoder_path,
                                               map_location='cpu')
        self.filtered_dict_pose_enc = {
            k: v
            for k, v in self.loaded_dict_pose_enc.items()
            if k in self.pose_encoder.state_dict()
        }
        self.pose_encoder.load_state_dict(self.filtered_dict_pose_enc)

        self.loaded_dict = torch.load(self.depth_decoder_path,
                                      map_location='cpu')
        self.depth_decoder.load_state_dict(self.loaded_dict)

        self.loaded_dict_pose = torch.load(self.pose_decoder_path,
                                           map_location='cpu')
        self.pose_decoder.load_state_dict(self.loaded_dict_pose)

        self.encoder.eval()
        self.depth_decoder.eval()

        self.pose_encoder.eval()
        self.pose_decoder.eval()
        self.isgood = []

        # define frames
        self.host_frame = _host_frame
        self.target_frame = _target_frame
        self.host_frame_dx, self.host_frame_dy = image_gradients(
            self.host_frame)
        self.target_frame_dx, self.target_frame_dy = image_gradients(
            self.target_frame)

        # dso's pattern:
        self.residual_pattern = np.array([
            [0, 0],
            [-2, 0],
            [2, 0],
            [-1, -1],
            [1, 1],
            [-1, 1],
            [1, -1],
            [0, 2],
            [0, -2],
        ])
Пример #9
0
            #     z[selector_pos] = z[selector_pos] / pos_bar / 2
            #
            # if np.sum(selector_neg) > 1:
            #     neg_bar = -bar
            #     z[selector_neg] = -z[selector_neg] / neg_bar / 2
            #
            # znormed = z + 0.5
            # colorMap = cm(znormed)[:, 0:3]
            #
            # plt.figure(figsize=(12, 9), dpi=120, facecolor='w', edgecolor='k')
            # plt.imshow(tensor2rgb(rgbi, ind=0))
            # plt.scatter(xx[valmask], yy[valmask], c=colorMap, s=8)
            # plt.savefig(os.path.join('/media/shengjie/c9c81c9f-511c-41c6-bfe0-2fc19666fb32/Visualizations/Project_SemanDepth/vls_shapeErrType', str(count) + '.png'))
            # plt.close()

            # hthetad, vthetad = localgeomDict[acckey].get_theta(depthmap=preddepthi)
            # ratiohd, ratiohld, ratiovd, ratiovld = localgeomDict[acckey].get_ratio(htheta=hthetad, vtheta=vthetad)
            # logdepthd = torch.log(preddepthi)
            # valindic = preddepthi > 0
            # lossrec = torch.zeros_like(logdepthd)
            # inplaceShapeLoss_cuda.inplaceShapeLoss_integration(logdepthd, ratiohld, ratiovld, valindic.int(), lossrec, 1, 1)

    totloss = totloss / len(filenames)
    print(totloss)

if __name__ == "__main__":
    options = MonodepthOptions()
    args = options.parse()
    evaluate(args)