def save_network(self, dcn, optimizer, iteration, logging_dict=None):
        """
        Saves network parameters to logging directory
        :return:
        :rtype: None
        """

        network_param_file = os.path.join(
            self._logging_dir,
            utils.getPaddedString(iteration, width=6) + ".pth")
        optimizer_param_file = network_param_file + ".opt"
        torch.save(dcn.state_dict(), network_param_file)
        torch.save(optimizer.state_dict(), optimizer_param_file)

        # also save loss history stuff
        if logging_dict is not None:
            log_history_file = os.path.join(
                self._logging_dir,
                utils.getPaddedString(iteration, width=6) +
                "_log_history.yaml")
            utils.saveToYaml(logging_dict, log_history_file)

            current_loss_file = os.path.join(self._logging_dir, 'loss.yaml')
            current_loss_data = self._get_current_loss(logging_dict)

            utils.saveToYaml(current_loss_data, current_loss_file)
Exemplo n.º 2
0
def extract_descriptor_images_for_scene(dcn,
                                        dataset,
                                        scene_name,
                                        save_dir,
                                        overwrite=False):
    """
    Save the descriptor images for a scene at the given directory
    :param dcn:
    :type dcn:
    :param dataset:
    :type dataset:
    :param scene_name:
    :type scene_name:
    :param save_dir: Absolute path of where to save images
    :type save_dir:
    :return:
    :rtype:
    """

    pose_data = dataset.get_pose_data(scene_name)
    image_idxs = pose_data.keys()
    image_idxs.sort()

    num_images = len(pose_data)

    logging_frequency = 50
    start_time = time.time()

    # make the mesh_descriptors dir if it doesn't already exist
    if os.path.exists(save_dir):
        if not overwrite:
            raise ValueError(
                "save_dir %s already exists and overwrite is False" %
                (save_dir))
        else:
            shutil.rmtree(save_dir)

    os.makedirs(save_dir)

    for counter, img_idx in enumerate(image_idxs):

        if (counter % logging_frequency) == 0:
            print("processing image %d of %d" % (counter, num_images))

        rgb_img = dataset.get_rgb_image_from_scene_name_and_idx(
            scene_name, img_idx)

        # note that this has already been normalized
        rgb_img_tensor = dataset.rgb_image_to_tensor(rgb_img)
        res = dcn.forward_single_image_tensor(rgb_img_tensor).data
        descriptor_image_filename = utils.getPaddedString(
            img_idx,
            width=SpartanDataset.PADDED_STRING_WIDTH) + "_descriptor.npy"

        full_filepath = os.path.join(save_dir, descriptor_image_filename)
        np.save(full_filepath, res.cpu())

    elapsed_time = time.time() - start_time
    print("computing descriptor images took %d seconds" % (elapsed_time))
 def mesh_cells_image_filename(self, img_idx):
     """
     Returns the full filename for the cell labels image
     :param img_idx:
     :type img_idx:
     :return:
     :rtype:
     """
     filename = utils.getPaddedString(img_idx) + "_mesh_cells.png"
     return os.path.join(self.rendered_images_dir, filename)
    def render_depth_images(self, output_dir=None, rendered_images_dir=None):
        """
        Note: This is actually rendering against whatever is saved as self.foreground_reconstruction
        Renders the depth images on the entire scene
        processed/rendered_images/000000_depth.png
        :return:
        """

        if output_dir is None:
            output_dir = os.path.join(self.foreground_reconstruction.data_dir,
                                      'image_masks')

        if rendered_images_dir is None:
            rendered_images_dir = os.path.join(
                self.foreground_reconstruction.data_dir, 'rendered_images')

        start_time = time.time()
        start_time = time.time()

        # read in each image in the log
        image_dir = self.foreground_reconstruction.image_dir
        camera_pose_data = self.foreground_reconstruction.kinematics_pose_data
        img_file_extension = 'png'

        num_poses = self.foreground_reconstruction.kinematics_pose_data.num_poses(
        )

        logging_rate = 50
        counter = 0

        for idx, value in camera_pose_data.pose_dict.iteritems():
            if (counter % logging_rate) == 0:
                print "Rendering depth image for pose %d of %d" % (counter + 1,
                                                                   num_poses)

            camera_to_world = self.foreground_reconstruction.get_camera_to_world(
                idx)
            self.setCameraTransform(camera_to_world)
            depth_img = self.depthScanners[
                'foreground'].getDepthImageAsNumpyArray()
            depth_img_filename = os.path.join(
                rendered_images_dir,
                utils.getPaddedString(idx) + '_depth' + "." +
                img_file_extension)

            cv2.imwrite(depth_img_filename, depth_img)

            counter += 1

        end_time = time.time()

        print "rendering depth images took %d seconds" % (end_time -
                                                          start_time)
    def save_network(self, dcn, optimizer, iteration, logging_dict=None):
        """
        Saves network parameters to logging directory
        :return:
        :rtype: None
        """

        network_param_file = os.path.join(self._logging_dir, utils.getPaddedString(iteration, width=6) + ".pth")
        optimizer_param_file = network_param_file + ".opt"
        torch.save(dcn.state_dict(), network_param_file)
        torch.save(optimizer.state_dict(), optimizer_param_file)

        # also save loss history stuff
        if logging_dict is not None:
            log_history_file = os.path.join(self._logging_dir, utils.getPaddedString(iteration, width=6) + "_log_history.yaml")
            utils.saveToYaml(logging_dict, log_history_file)

            current_loss_file = os.path.join(self._logging_dir, 'loss.yaml')
            current_loss_data = self._get_current_loss(logging_dict)

            utils.saveToYaml(current_loss_data, current_loss_file)
    def load_pretrained(self, model_folder, iteration=None):
        """
        Loads network and optimizer parameters from a previous training run.

        Note: It is up to the user to ensure that the model parameters match.
        e.g. width, height, descriptor dimension etc.

        :param model_folder: location of the folder containing the param files 001000.pth. Can be absolute or relative path. If relative then it is relative to pdc/trained_models/
        :type model_folder:
        :param iteration: which index to use, e.g. 3500, if None it loads the latest one
        :type iteration:
        :return: iteration
        :rtype:
        """

        if not os.path.isdir(model_folder):
            pdc_path = utils.getPdcPath()
            model_folder = os.path.join(pdc_path, "trained_models",
                                        model_folder)

        # find idx.pth and idx.pth.opt files
        if iteration is None:
            files = os.listdir(model_folder)
            model_param_file = sorted(fnmatch.filter(files, '*.pth'))[-1]
            iteration = int(model_param_file.split(".")[0])
            optim_param_file = sorted(fnmatch.filter(files, '*.pth.opt'))[-1]
        else:
            prefix = utils.getPaddedString(iteration, width=6)
            model_param_file = prefix + ".pth"
            optim_param_file = prefix + ".pth.opt"

        print("model_param_file", model_param_file)
        model_param_file = os.path.join(model_folder, model_param_file)
        optim_param_file = os.path.join(model_folder, optim_param_file)

        self._dcn = self.build_network()
        self._dcn.load_state_dict(torch.load(model_param_file))
        self._dcn.cuda()
        self._dcn.train()

        self._optimizer = self._construct_optimizer(self._dcn.parameters())
        self._optimizer.load_state_dict(torch.load(optim_param_file))

        return iteration
    def load_pretrained(self, model_folder, iteration=None):
        """
        Loads network and optimizer parameters from a previous training run.

        Note: It is up to the user to ensure that the model parameters match.
        e.g. width, height, descriptor dimension etc.

        :param model_folder: location of the folder containing the param files 001000.pth. Can be absolute or relative path. If relative then it is relative to pdc/trained_models/
        :type model_folder:
        :param iteration: which index to use, e.g. 3500, if None it loads the latest one
        :type iteration:
        :return: iteration
        :rtype:
        """

        if not os.path.isdir(model_folder):
            pdc_path = utils.getPdcPath()
            model_folder = os.path.join(pdc_path, "trained_models", model_folder)

        # find idx.pth and idx.pth.opt files
        if iteration is None:
            files = os.listdir(model_folder)
            model_param_file = sorted(fnmatch.filter(files, '*.pth'))[-1]
            iteration = int(model_param_file.split(".")[0])
            optim_param_file = sorted(fnmatch.filter(files, '*.pth.opt'))[-1]
        else:
            prefix = utils.getPaddedString(iteration, width=6)
            model_param_file = prefix + ".pth"
            optim_param_file = prefix + ".pth.opt"

        print "model_param_file", model_param_file
        model_param_file = os.path.join(model_folder, model_param_file)
        optim_param_file = os.path.join(model_folder, optim_param_file)


        self._dcn = self.build_network()
        self._dcn.load_state_dict(torch.load(model_param_file))
        self._dcn.cuda()
        self._dcn.train()

        self._optimizer = self._construct_optimizer(self._dcn.parameters())
        self._optimizer.load_state_dict(torch.load(optim_param_file))

        return iteration
    def mesh_descriptors_filename(self, network_name, img_idx):
        """
        Returns the full filename for the .npz file that contains two arrays

        .npz reference https://docs.scipy.org/doc/numpy-1.14.0/reference/generated/numpy.savez.html#numpy.savez

        D = descriptor dimension

        - cell_ids: np.array of size N, dtype=np.int64
        - cell_descriptors: np.array with np.shape = [N,D dtype = np.float64
        -
        :param img_idx:
        :type img_idx:
        :return:
        :rtype:
        """

        filename = utils.getPaddedString(img_idx) + "_mesh_descriptors.npz"
        return os.path.join(self.mesh_descriptors_dir(network_name), filename)
    def render_depth_images(self, output_dir=None, rendered_images_dir=None):
        """
        Run the mask generation algorithm
        :return:
        """

        if output_dir is None:
            output_dir = os.path.join(self.foreground_reconstruction.data_dir, 'image_masks')

        if rendered_images_dir is None:
            rendered_images_dir = os.path.join(self.foreground_reconstruction.data_dir, 'rendered_images')

        start_time = time.time()

        # read in each image in the log
        image_dir = self.foreground_reconstruction.image_dir
        camera_pose_data = self.foreground_reconstruction.kinematics_pose_data
        img_file_extension = 'png'

        num_poses = self.foreground_reconstruction.kinematics_pose_data.num_poses()

        logging_rate = 50
        counter = 0

        for idx, value in camera_pose_data.pose_dict.iteritems():
            if (counter % logging_rate) == 0:
                print "Rendering depth image for pose %d of %d" % (counter + 1, num_poses)


            camera_to_world = self.foreground_reconstruction.get_camera_to_world(idx)
            self.setCameraTransform(camera_to_world)
            depth_img = self.depthScanners['foreground'].getDepthImageAsNumpyArray()
            depth_img_filename = os.path.join(rendered_images_dir, utils.getPaddedString(idx) + '_depth'
                                              + "." + img_file_extension)

            cv2.imwrite(depth_img_filename, depth_img)

            counter += 1

        end_time = time.time()

        print "rendering depth images took %d seconds" % (end_time - start_time)
    def run(self, output_dir=None, rendered_images_dir=None):
        """
        Run the mask generation algorithm
        :return:
        """

        if output_dir is None:
            output_dir = os.path.join(self.foreground_reconstruction.data_dir,
                                      'image_masks')

        if rendered_images_dir is None:
            rendered_images_dir = os.path.join(
                self.foreground_reconstruction.data_dir, 'rendered_images')

        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)

        if not os.path.isdir(rendered_images_dir):
            os.makedirs(rendered_images_dir)

        start_time = time.time()

        # read in each image in the log
        image_dir = self.foreground_reconstruction.image_dir
        camera_pose_data = self.foreground_reconstruction.kinematics_pose_data
        img_file_extension = 'png'

        num_poses = self.foreground_reconstruction.kinematics_pose_data.num_poses(
        )

        logging_rate = 50
        counter = 0

        for idx, value in camera_pose_data.pose_dict.iteritems():
            if (counter % logging_rate) == 0:
                print "Rendering mask for pose %d of %d" % (counter + 1,
                                                            num_poses)

            mask_image_filename = utils.getPaddedString(
                idx) + "_mask" + "." + img_file_extension
            mask_image_full_filename = os.path.join(output_dir,
                                                    mask_image_filename)

            camera_to_world = self.foreground_reconstruction.get_camera_to_world(
                idx)
            self.setCameraTransform(camera_to_world)
            d = self.computeForegroundMaskUsingCropStrategy(visualize=False)

            mask = d['mask']
            visible_mask = mask * 255

            visible_mask_filename = os.path.join(
                output_dir,
                utils.getPaddedString(idx) + '_visible_mask' + "." +
                img_file_extension)

            depth_img_filename = os.path.join(
                rendered_images_dir,
                utils.getPaddedString(idx) + '_depth_cropped' + "." +
                img_file_extension)

            # save the images
            cv2.imwrite(mask_image_full_filename, mask)
            cv2.imwrite(visible_mask_filename, visible_mask)

            # make sure to save this as uint16
            depth_img = d['depth_img_foreground_raw']
            cv2.imwrite(depth_img_filename, depth_img)

            counter += 1

        end_time = time.time()

        print "rendering masks took %d seconds" % (end_time - start_time)
    def run(self, output_dir=None, rendered_images_dir=None):
        """
        Run the mask generation algorithm
        :return:
        """

        if output_dir is None:
            output_dir = os.path.join(self.foreground_reconstruction.data_dir, 'image_masks')

        if rendered_images_dir is None:
            rendered_images_dir = os.path.join(self.foreground_reconstruction.data_dir, 'rendered_images')

        if not os.path.isdir(output_dir):
            os.makedirs(output_dir)

        if not os.path.isdir(rendered_images_dir):
            os.makedirs(rendered_images_dir)

        start_time = time.time()

        # read in each image in the log
        image_dir = self.foreground_reconstruction.image_dir
        camera_pose_data = self.foreground_reconstruction.kinematics_pose_data
        img_file_extension = 'png'

        num_poses = self.foreground_reconstruction.kinematics_pose_data.num_poses()

        logging_rate = 50
        counter = 0

        for idx, value in camera_pose_data.pose_dict.iteritems():
            if (counter % logging_rate) == 0:
                print "Rendering mask for pose %d of %d" %(counter + 1, num_poses)

            mask_image_filename = utils.getPaddedString(idx) + "_mask" + "." + img_file_extension
            mask_image_full_filename = os.path.join(output_dir, mask_image_filename)

            camera_to_world = self.foreground_reconstruction.get_camera_to_world(idx)
            self.setCameraTransform(camera_to_world)
            d = self.computeForegroundMaskUsingCropStrategy(visualize=False)

            mask = d['mask']
            visible_mask = mask*255

            visible_mask_filename = os.path.join(output_dir, utils.getPaddedString(idx) + '_visible_mask'
                                                 + "." + img_file_extension)

            depth_img_filename = os.path.join(rendered_images_dir, utils.getPaddedString(idx) + '_depth_cropped'
                                                 + "." + img_file_extension)

            # save the images
            cv2.imwrite(mask_image_full_filename, mask)
            cv2.imwrite(visible_mask_filename, visible_mask)

            # make sure to save this as uint16
            depth_img = d['depth_img_foreground_raw']
            cv2.imwrite(depth_img_filename, depth_img)

            counter += 1

        end_time = time.time()

        print "rendering masks took %d seconds" %(end_time - start_time)
 def descriptor_image_filename(img_idx):
     filename = utils.getPaddedString(img_idx) + "_descriptor_image.npy"
     return filename