def load_dataset(self):
        """
        Loads a dataset, construct a trainloader.
        Additionally creates a dataset and DataLoader for the test data
        :return:
        :rtype:
        """

        batch_size = self._config['training']['batch_size']
        num_workers = self._config['training']['num_workers']

        if self._dataset is None:
            self._dataset = SpartanDataset.make_default_10_scenes_drill()

        
        # self._dataset.load_all_pose_data()
        self._dataset.load_all_knots_info()
        self._dataset.set_parameters_from_training_config(self._config)

        self._data_loader = torch.utils.data.DataLoader(self._dataset, batch_size=batch_size,
                                          shuffle=True, num_workers=num_workers, drop_last=True)

        # create a test dataset
        if self._config["training"]["compute_test_loss"]:
            if self._dataset_test is None:
                self._dataset_test = SpartanDataset(mode="test", config=self._dataset.config)

            
            self._dataset_test.load_all_pose_data()
            self._dataset_test.set_parameters_from_training_config(self._config)

            self._data_loader_test = torch.utils.data.DataLoader(self._dataset_test, batch_size=batch_size,
                                          shuffle=True, num_workers=2, drop_last=True)
    def load_dataset(self):
        """
        Loads a dataset, construct a trainloader.
        Additionally creates a dataset and DataLoader for the test data
        :return:
        :rtype:
        """

        batch_size = self._config['training']['batch_size']
        num_workers = self._config['training']['num_workers']

        if self._dataset is None:
            self._dataset = SpartanDataset.make_default_10_scenes_drill()

        
        self._dataset.load_all_pose_data()
        self._dataset.set_parameters_from_training_config(self._config)

        self._data_loader = torch.utils.data.DataLoader(self._dataset, batch_size=batch_size,
                                          shuffle=True, num_workers=num_workers, drop_last=True)

        # create a test dataset
        if self._config["training"]["compute_test_loss"]:
            if self._dataset_test is None:
                self._dataset_test = SpartanDataset(mode="test", config=self._dataset.config)

            
            self._dataset_test.load_all_pose_data()
            self._dataset_test.set_parameters_from_training_config(self._config)

            self._data_loader_test = torch.utils.data.DataLoader(self._dataset_test, batch_size=batch_size,
                                          shuffle=True, num_workers=2, drop_last=True)
Exemplo n.º 3
0
    def load_specific_dataset(self):
        dataset_config_filename = os.path.join(
            utils.getDenseCorrespondenceSourceDir(), 'config',
            'dense_correspondence', 'dataset', 'composite',
            'rope_nonrigid_412vert_only.yaml')

        dataset_config = utils.getDictFromYamlFilename(dataset_config_filename)
        self._dataset = SpartanDataset(config=dataset_config)
    def load_specific_dataset(self):
        dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence',
                                            'dataset', 'composite', 'caterpillar_only_9.yaml')

        # dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config',
        #                                        'dense_correspondence',
        #                                        'dataset', 'composite', '4_shoes_all.yaml')
        # st()
        dataset_config = utils.getDictFromYamlFilename(dataset_config_filename)
        self._dataset = SpartanDataset(config=dataset_config)
 def load_dataset_from_config(self, config):
     """
     Loads train and test datasets from the given config
     :param config: Dict gotten from a YAML file
     :type config:
     :return: None
     :rtype:
     """
     self._dataset = SpartanDataset(mode="train", config=config)
     self._dataset_test = SpartanDataset(mode="test", config=config)
     self.load_dataset()
def pdc_train(dataset_config, train_config, dataset_name, logging_dir, num_iterations, dimension):

    # print("training args")
    # print(dataset_config)
    # print(train_config)
    # print(dataset_name)
    # print(logging_dir)
    # print(num_iterations)
    # print(dimension)
    print('dataset_name')
    print(dataset_name)

    dataset = SpartanDataset(config=dataset_config)

    d = dimension # the descriptor dimension
    name = dataset_name.split('/')[-1] + "_%d" %(d)
    train_config["training"]["logging_dir_name"] = name

    print('logging dir name')
    print(name)

    train_config["training"]["logging_dir"] = logging_dir
    train_config["dense_correspondence_network"]["descriptor_dimension"] = d
    train_config["training"]["num_iterations"] = num_iterations
    print "training descriptor of dimension %d" %(d)
    start_time = time.time()
    train = DenseCorrespondenceTraining(dataset=dataset, config=train_config)
    train.run()
    end_time = time.time()
    print "finished training descriptor of dimension %d using time %.2f seconds" %(d, end_time-start_time)
Exemplo n.º 7
0
def get_different_object_loss(pixelwise_contrastive_loss, image_a_pred,
                              image_b_pred, blind_non_matches_a,
                              blind_non_matches_b):
    """
    Simple wrapper for pixelwise_contrastive_loss functions.  Args and return args documented above in get_loss()
    """

    scale_by_hard_negatives = pixelwise_contrastive_loss.config[
        "scale_by_hard_negatives_DIFFERENT_OBJECT"]
    blind_non_match_loss = zero_loss()
    if not (SpartanDataset.is_empty(blind_non_matches_a.data)):
        M_descriptor = pixelwise_contrastive_loss.config["M_background"]

        blind_non_match_loss, num_hard_negatives =\
            pixelwise_contrastive_loss.non_match_loss_descriptor_only(image_a_pred, image_b_pred,
                                                                    blind_non_matches_a, blind_non_matches_b,
                                                                    M_descriptor=M_descriptor)

        if scale_by_hard_negatives:
            scale_factor = max(num_hard_negatives, 1)
        else:
            scale_factor = max(len(blind_non_matches_a), 1)

        blind_non_match_loss = 1.0 / scale_factor * blind_non_match_loss
    loss = blind_non_match_loss
    return loss, zero_loss(), zero_loss(), zero_loss(), blind_non_match_loss
    def load_specific_dataset(self):
        dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence',
                                            'dataset', 'composite', 'hats_3_demo_composite.yaml')

        dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config',
                                               'dense_correspondence',
                                               'dataset', 'composite', '4_shoes_all.yaml')

        dataset_config = utils.getDictFromYamlFilename(dataset_config_filename)
        self._dataset = SpartanDataset(config=dataset_config)
 def load_dataset_from_config(self, config):
     """
     Loads train and test datasets from the given config
     :param config: Dict gotten from a YAML file
     :type config:
     :return: None
     :rtype:
     """
     self._dataset = SpartanDataset(mode="train", config=config)
     self._dataset_test = SpartanDataset(mode="test", config=config)
     self.load_dataset()
    def load_training_dataset(self):
        """
        Loads the dataset that this was trained on
        :return: a dataset object, loaded with the config as set in the dataset.yaml
        :rtype: SpartanDataset
        """

        network_params_folder = self.path_to_network_params_folder
        network_params_folder = utils.convert_to_absolute_path(network_params_folder)
        dataset_config_file = os.path.join(network_params_folder, 'dataset.yaml')
        config = utils.getDictFromYamlFilename(dataset_config_file)
        return SpartanDataset(config_expanded=config)
Exemplo n.º 11
0
    def __init__(self, config_filename='shoes_all.yaml'):

        with HiddenPrints():

            self.config_filename = os.path.join(
                utils.getDenseCorrespondenceSourceDir(), 'config',
                'dense_correspondence', 'dataset', 'composite',
                config_filename)
            self.train_config_filename = os.path.join(
                utils.getDenseCorrespondenceSourceDir(), 'config',
                'dense_correspondence', 'training', 'training.yaml')

            self.config = utils.getDictFromYamlFilename(self.config_filename)
            self.train_config = utils.getDictFromYamlFilename(
                self.train_config_filename)

            self.dataset = SpartanDataset(config=self.config)
            self.dataset.set_parameters_from_training_config(self.train_config)

        # holds centroid and radius for each scene
        # these are for min and max z values currently. maybe include x, y, and z in the future.
        # self.centroid_and_radius[scene_name]["centroid"] or self.centroid_and_radius[scene_name]["radius"]
        self.centroid_and_radius = {}
def evaulate_model(model_lst, output_dir=None, num_image_pairs=100, gt_dataset_config=None):
    if not (gt_dataset_config is None):
        gt_dataset = SpartanDataset(config_expanded=gt_dataset_config)
    else:
        gt_dataset=None
    
    DCE = DenseCorrespondenceEvaluation

    for subdir in model_lst:
        print("evaluate model {}".format(subdir))
        start_time = time.time()
        output_subdir = os.path.join(utils.get_data_dir(), output_dir, subdir.split('/')[-1])
        DCE.run_evaluation_on_network(model_folder=subdir, compute_descriptor_statistics=True, cross_scene=False,
            output_dir=output_subdir, num_image_pairs=num_image_pairs,dataset=gt_dataset)
        end_time = time.time()
        print("evaluation takes %.2f seconds" %(end_time - start_time))
def get_same_object_across_scene_loss(pixelwise_contrastive_loss, image_a_pred, image_b_pred,
                              blind_non_matches_a, blind_non_matches_b):
    """
    Simple wrapper for pixelwise_contrastive_loss functions.  Args and return args documented above in get_loss()
    """
    blind_non_match_loss = zero_loss()
    if not (SpartanDataset.is_empty(blind_non_matches_a.data)):
        blind_non_match_loss, num_hard_negatives =\
            pixelwise_contrastive_loss.non_match_loss_descriptor_only(image_a_pred, image_b_pred,
                                                                    blind_non_matches_a, blind_non_matches_b,
                                                                    M_descriptor=pcl._config["M_masked"], invert=True)

    if pixelwise_contrastive_loss._config["scale_by_hard_negatives"]:
        scale_factor = max(num_hard_negatives, 1)
    else:
        scale_factor = max(len(blind_non_matches_a), 1)

    loss = 1.0/scale_factor * blind_non_match_loss
    blind_non_match_loss_scaled = 1.0/scale_factor * blind_non_match_loss
    return loss, zero_loss(), zero_loss(), zero_loss(), blind_non_match_loss
    def load_configuration(self):
        # config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence',
        config_filename = os.path.join(DIR_PROJ, 'config',
                                       'dense_correspondence', 'dataset',
                                       'composite', 'caterpillar_only_9.yaml')
        config = utils.getDictFromYamlFilename(config_filename)
        # train_config_file = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence',
        train_config_file = os.path.join(DIR_PROJ, 'config',
                                         'dense_correspondence', 'training',
                                         'training.yaml')
        self.train_config = utils.getDictFromYamlFilename(train_config_file)
        self.dataset = SpartanDataset(config=config)
        # st()

        logging_dir = "code/data_volume/pdc/trained_models/tutorials"
        num_iterations = 3500
        descr_dim = 3  # the descriptor dimension
        self.train_config["training"][
            "logging_dir_name"] = "caterpillar_%d" % (descr_dim)
        self.train_config["training"]["logging_dir"] = logging_dir
        self.train_config["dense_correspondence_network"][
            "descriptor_dimension"] = descr_dim
        self.train_config["training"]["num_iterations"] = num_iterations
Exemplo n.º 15
0
import dense_correspondence_manipulation.utils.utils as utils
dc_source_dir = utils.getDenseCorrespondenceSourceDir()
sys.path.append(dc_source_dir)
sys.path.append(
    os.path.join(dc_source_dir, "dense_correspondence",
                 "correspondence_tools"))
from dense_correspondence.dataset.spartan_dataset_masked import SpartanDataset, ImageType

from dense_correspondence_manipulation.simple_pixel_correspondence_labeler.annotate_correspondences import label_colors, draw_reticle, pil_image_to_cv2, drawing_scale_config

config_filename = os.path.join(
    utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence',
    'dataset', 'composite', 'caterpillar_baymax_starbot_onlymulti_front.yaml')
config = utils.getDictFromYamlFilename(config_filename)
sd = SpartanDataset(config=config)
sd.set_train_mode()

annotated_data_yaml_filename = os.path.join(os.getcwd(),
                                            "new_annotated_pairs.yaml")
annotated_data = utils.getDictFromYamlFilename(annotated_data_yaml_filename)

index_of_pair_to_display = 0


def draw_points(img, img_points_picked):
    for index, img_point in enumerate(img_points_picked):
        color = label_colors[index % len(label_colors)]
        draw_reticle(img, int(img_point["u"]), int(img_point["v"]), color)

class HeatmapVisualization(object):

    def __init__(self, config):
        self._config = config
        self._dce = DenseCorrespondenceEvaluation(EVAL_CONFIG)
        self._load_networks()
        self._reticle_color = COLOR_GREEN
        # self.load_specific_dataset() # uncomment if you want to load a specific dataset

    def _load_networks(self):
        # we will use the dataset for the first network in the series
        self._dcn_dict = dict()

        self._dataset = None
        self._network_reticle_color = dict()

        for idx, network_name in enumerate(self._config["networks"]):
            dcn = self._dce.load_network_from_config(network_name)
            dcn.eval()
            self._dcn_dict[network_name] = dcn
            # self._network_reticle_color[network_name] = label_colors[idx]

            if len(self._config["networks"]) == 1:
                self._network_reticle_color[network_name] = COLOR_RED
            else:
                self._network_reticle_color[network_name] = label_colors[idx]

            if self._dataset is None:
                self._dataset = dcn.load_training_dataset()

    def load_specific_dataset(self):
        dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence',
                                            'dataset', 'composite', 'hats_3_demo_composite.yaml')

        dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config',
                                               'dense_correspondence',
                                               'dataset', 'composite', '4_shoes_all.yaml')

        dataset_config = utils.getDictFromYamlFilename(dataset_config_filename)
        self._dataset = SpartanDataset(config=dataset_config)

    def get_random_image_pair(self):
        object_id = self._dataset.get_random_object_id()
        scene_name_a = self._dataset.get_random_single_object_scene_name(object_id)
        scene_name_b = self._dataset.get_different_scene_for_object(object_id, scene_name_a)

        if self._config["randomize_images"]:
            image_a_idx = self._dataset.get_random_image_index(scene_name_a)
            image_b_idx = self._dataset.get_random_image_index(scene_name_b)
        else:
            image_a_idx = 0
            image_b_idx = 0

        # image_b_idx = self._dataset.get_random_image_index(scene_name_b)
        return scene_name_a, scene_name_b, image_a_idx, image_b_idx

    def get_random_image_pair_across_object(self):
        """
        Gets cross object image pairs
        :param randomize:
        :type randomize:
        :return:
        :rtype:
        """

        object_id_a, object_id_b = self._dataset.get_two_different_object_ids()
        # object_id_a = "shoe_red_nike.yaml"
        # object_id_b = "shoe_gray_nike"
        # object_id_b = "shoe_green_nike"
        scene_name_a = self._dataset.get_random_single_object_scene_name(object_id_a)
        scene_name_b = self._dataset.get_random_single_object_scene_name(object_id_b)

        if self._config["randomize_images"]:
            image_a_idx = self._dataset.get_random_image_index(scene_name_a)
            image_b_idx = self._dataset.get_random_image_index(scene_name_b)
        else:
            image_a_idx = 0
            image_b_idx = 0

        return scene_name_a, scene_name_b, image_a_idx, image_b_idx

    def get_random_image_pair_multi_object_scenes(self):
        """
        Gets cross object image pairs
        :param randomize:
        :type randomize:
        :return:
        :rtype:
        """

        scene_name_a = self._dataset.get_random_multi_object_scene_name()
        scene_name_b = self._dataset.get_random_multi_object_scene_name()

        if self._config["randomize_images"]:
            image_a_idx = self._dataset.get_random_image_index(scene_name_a)
            image_b_idx = self._dataset.get_random_image_index(scene_name_b)
        else:
            image_a_idx = 0
            image_b_idx = 0

        return scene_name_a, scene_name_b, image_a_idx, image_b_idx

    def _get_new_images(self):
        """
        Gets a new pair of images
        :return:
        :rtype:
        """

        if random.random() < 0.5:
            self._dataset.set_train_mode()
        else:
            self._dataset.set_test_mode()

        if self._config["same_object"]:
            scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair()
        elif self._config["different_objects"]:
            scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_across_object()
        elif self._config["multiple_object"]:
            scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_multi_object_scenes()
        else:
            raise ValueError("At least one of the image types must be set tot True")


        self.img1_pil = self._dataset.get_rgb_image_from_scene_name_and_idx(scene_name_1, image_1_idx)
        self.img2_pil = self._dataset.get_rgb_image_from_scene_name_and_idx(scene_name_2, image_2_idx)

        self._compute_descriptors()

        # self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(img1_pil)
        # self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(img2_pil)


    def _compute_descriptors(self):
        """
        Computes the descriptors for image 1 and image 2 for each network
        :return:
        :rtype:
        """
        self.img1 = pil_image_to_cv2(self.img1_pil)
        self.img2 = pil_image_to_cv2(self.img2_pil)
        self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(self.img1_pil)
        self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(self.img2_pil)
        self.img1_gray = cv2.cvtColor(self.img1, cv2.COLOR_RGB2GRAY) / 255.0
        self.img2_gray = cv2.cvtColor(self.img2, cv2.COLOR_RGB2GRAY) / 255.0

        cv2.imshow('source', self.img1)
        cv2.imshow('target', self.img2)

        self._res_a = dict()
        self._res_b = dict()
        for network_name, dcn in self._dcn_dict.iteritems():
            self._res_a[network_name] = dcn.forward_single_image_tensor(self.rgb_1_tensor).data.cpu().numpy()
            self._res_b[network_name] = dcn.forward_single_image_tensor(self.rgb_2_tensor).data.cpu().numpy()


        self.find_best_match(None, 0, 0, None, None)

    def scale_norm_diffs_to_make_heatmap(self, norm_diffs, threshold):
        """
        Scales the norm diffs to make a heatmap. This will be scaled between 0 and 1.
        0 corresponds to a match, 1 to non-match

        :param norm_diffs: The norm diffs
        :type norm_diffs: numpy.array [H,W]
        :return:
        :rtype:
        """


        heatmap = np.copy(norm_diffs)
        greater_than_threshold = np.where(norm_diffs > threshold)
        heatmap = heatmap / threshold * self._config["heatmap_vis_upper_bound"] # linearly scale [0, threshold] to [0, 0.5]
        heatmap[greater_than_threshold] = 1 # greater than threshold is set to 1
        heatmap = heatmap.astype(self.img1_gray.dtype)
        return heatmap


    def find_best_match(self, event,u,v,flags,param):

        """
        For each network, find the best match in the target image to point highlighted
        with reticle in the source image. Displays the result
        :return:
        :rtype:
        """

        img_1_with_reticle = np.copy(self.img1)
        draw_reticle(img_1_with_reticle, u, v, self._reticle_color)
        cv2.imshow("source", img_1_with_reticle)

        alpha = self._config["blend_weight_original_image"]
        beta = 1 - alpha

        img_2_with_reticle = np.copy(self.img2)


        print "\n\n"

        self._res_uv = dict()

        # self._res_a_uv = dict()
        # self._res_b_uv = dict()

        for network_name in self._dcn_dict:
            res_a = self._res_a[network_name]
            res_b = self._res_b[network_name]
            best_match_uv, best_match_diff, norm_diffs = \
                DenseCorrespondenceNetwork.find_best_match((u, v), res_a, res_b)
            print "\n\n"
            print "network_name:", network_name
            self._res_uv[network_name] = dict()
            self._res_uv[network_name]['source'] = res_a[v, u, :].tolist()
            self._res_uv[network_name]['target'] = res_b[v, u, :].tolist()

            # print "res_a[v, u, :]:", res_a[v, u, :]
            # print "res_b[v, u, :]:", res_b[v, u, :]

            print "%s best match diff: %.3f" %(network_name, best_match_diff)

            threshold = self._config["norm_diff_threshold"]
            if network_name in self._config["norm_diff_threshold_dict"]:
                threshold = self._config["norm_diff_threshold_dict"][network_name]

            heatmap = self.scale_norm_diffs_to_make_heatmap(norm_diffs, threshold)

            reticle_color = self._network_reticle_color[network_name]
            draw_reticle(heatmap, best_match_uv[0], best_match_uv[1], reticle_color)
            draw_reticle(img_2_with_reticle, best_match_uv[0], best_match_uv[1], reticle_color)
            blended = cv2.addWeighted(self.img2_gray, alpha, heatmap, beta, 0)
            cv2.imshow(network_name, blended)

        cv2.imshow("target", img_2_with_reticle)
        if event == cv2.EVENT_LBUTTONDOWN:
            utils.saveToYaml(self._res_uv, 'clicked_point.yaml')

    def run(self):
        self._get_new_images()
        cv2.namedWindow('target')
        cv2.setMouseCallback('source', self.find_best_match)

        self._get_new_images()

        while True:
            k = cv2.waitKey(20) & 0xFF
            if k == 27:
                break
            elif k == ord('n'):
                print "HEY"
                self._get_new_images()
            elif k == ord('s'):
                print "HEY"
                img1_pil = self.img1_pil
                img2_pil = self.img2_pil
                self.img1_pil = img2_pil
                self.img2_pil = img1_pil
                self._compute_descriptors()
Exemplo n.º 17
0
class HeatmapVisualization(object):
    """
    Launches a live interactive heatmap visualization.
    Edit config/dense_correspondence/heatmap_vis/heatmap.yaml to specify which networks
    to visualize. Specifically add the network you want to visualize to the "networks" list.
    Make sure that this network appears in the file pointed to by EVAL_CONFIG
    Usage: Launch this file with python after sourcing the environment with
    `use_pytorch_dense_correspondence`
    Then `python live_heatmap_visualization.py`.
    """
    def __init__(self, config):
        self._config = config
        self._dce = DenseCorrespondenceEvaluation(EVAL_CONFIG)
        self._load_networks()
        self._reticle_color = COLOR_GREEN
        self.load_specific_dataset(
        )  # uncomment if you want to load a specific dataset

    def _load_networks(self):
        # we will use the dataset for the first network in the series
        self._dcn_dict = dict()

        self._dataset = None
        self._network_reticle_color = dict()

        for idx, network_name in enumerate(self._config["networks"]):
            dcn = self._dce.load_network_from_config(network_name)
            dcn.eval()
            self._dcn_dict[network_name] = dcn

            if len(self._config["networks"]) == 1:
                self._network_reticle_color[network_name] = COLOR_RED
            else:
                self._network_reticle_color[network_name] = label_colors[idx]

            if self._dataset is None:
                self._dataset = dcn.load_training_dataset()

    def load_specific_dataset(self):
        dataset_config_filename = os.path.join(
            utils.getDenseCorrespondenceSourceDir(), 'config',
            'dense_correspondence', 'dataset', 'composite',
            'rope_nonrigid_412vert_only.yaml')

        dataset_config = utils.getDictFromYamlFilename(dataset_config_filename)
        self._dataset = SpartanDataset(config=dataset_config)
        #self._dataset.get_knots_info('rope_nonrigid_412vert')

    def get_random_image_pair(self):
        object_id = self._dataset.get_random_object_id()
        scene_name_a = self._dataset.get_random_single_object_scene_name(
            object_id)
        scene_name_b = self._dataset.get_random_single_object_scene_name(
            object_id)

        image_a_idx = self._dataset.get_random_image_index(scene_name_a)
        image_b_idx = self._dataset.get_random_image_index(scene_name_b)

        return scene_name_a, scene_name_b, image_a_idx, image_b_idx

    def _get_new_images(self):
        """
        Gets a new pair of images
        :return:
        :rtype:
        """
        if random.random() <= 1.0:
            self._dataset.set_train_mode()
        else:
            self._dataset.set_test_mode()

        if self._config["same_object"]:
            print "getting random image pair"
            scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair(
            )
        else:
            raise ValueError(
                "At least one of the image types must be set tot True")

        print "got pair"
        self.img1_pil = self._dataset.get_rgb_image_from_scene_name_and_idx(
            scene_name_1, image_1_idx)
        print "got img1"
        self.img1_knots = self._dataset._knots_info[scene_name_1][image_1_idx]
        print "got img1 knots"
        self.img2_pil = self._dataset.get_rgb_image_from_scene_name_and_idx(
            scene_name_2, image_2_idx)
        self.img2_knots = self._dataset._knots_info[scene_name_2][image_2_idx]

    def _get_task_images(self):
        self.img1_pil = self._dataset.get_rgb_image('./images/000025_rgb.png')
        img1_mask = self._dataset.get_mask_image(
            './image_masks/000025_mask.png')
        self.img2_pil = self._dataset.get_rgb_image('./images/000018_rgb.png')
        pixs = correspondence_finder.random_sample_from_masked_image_torch(
            np.asarray(img1_mask), 25)
        self.img1_knots = list(zip(pixs[0], pixs[1]))
        #self.img1_knots = utils.getDictFromJSONFilename('./images_start/knots_info.json')["0"][0]
        #self.img2_knots = utils.getDictFromJSONFilename('./images_goal/knots_info.json')["0"][0]

    def _compute_descriptors(self, knot_idx):
        """
        Computes the descriptors for image 1 and image 2 for each network
        :return:
        :rtype:
        """
        self.img1 = pil_image_to_cv2(self.img1_pil)
        self.img2 = pil_image_to_cv2(self.img2_pil)
        self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(self.img1_pil)
        self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(self.img2_pil)
        self.img1_gray = cv2.cvtColor(self.img1, cv2.COLOR_RGB2GRAY) / 255.0
        self.img2_gray = cv2.cvtColor(self.img2, cv2.COLOR_RGB2GRAY) / 255.0

        self._res_a = dict()
        self._res_b = dict()
        for network_name, dcn in self._dcn_dict.iteritems():
            self._res_a[network_name] = dcn.forward_single_image_tensor(
                self.rgb_1_tensor).data.cpu().numpy()
            self._res_b[network_name] = dcn.forward_single_image_tensor(
                self.rgb_2_tensor).data.cpu().numpy()
        print(self.img1_knots)
        u, v = self.img1_knots[knot_idx]
        source, blended, target, p = self.find_best_match(
            None, u, v, None, None)
        return (source, blended, target, p)

    def scale_norm_diffs_to_make_heatmap(self, norm_diffs, threshold):
        """
        Scales the norm diffs to make a heatmap. This will be scaled between 0 and 1.
        0 corresponds to a match, 1 to non-match
        :param norm_diffs: The norm diffs
        :type norm_diffs: numpy.array [H,W]
        :return:
        :rtype:
        """

        heatmap = np.copy(norm_diffs)
        greater_than_threshold = np.where(norm_diffs > threshold)
        heatmap = heatmap / threshold * self._config[
            "heatmap_vis_upper_bound"]  # linearly scale [0, threshold] to [0, 0.5]
        heatmap[
            greater_than_threshold] = 1  # greater than threshold is set to 1
        heatmap = heatmap.astype(self.img1_gray.dtype)
        return heatmap

    def find_best_match(self, event, u, v, flags, param):
        """
        For each network, find the best match in the target image to point highlighted
        with reticle in the source image. Displays the result
        :return:
        :rtype:
        """

        img_1_with_reticle = np.copy(self.img1)
        draw_reticle(img_1_with_reticle, u, v, self._reticle_color)
        source = img_1_with_reticle

        alpha = self._config["blend_weight_original_image"]
        beta = 1 - alpha

        img_2_with_reticle = np.copy(self.img2)

        self._res_uv = dict()

        for network_name in self._dcn_dict:
            res_a = self._res_a[network_name]
            res_b = self._res_b[network_name]
            best_match_uv, best_match_diff, norm_diffs = \
                DenseCorrespondenceNetwork.find_best_match((u, v), res_a, res_b)
            print "network_name:", network_name
            self._res_uv[network_name] = dict()
            self._res_uv[network_name]['source'] = res_a[v, u, :].tolist()
            self._res_uv[network_name]['target'] = res_b[v, u, :].tolist()

            print "%s best match diff: %.3f" % (network_name, best_match_diff)

            threshold = self._config["norm_diff_threshold"]
            if network_name in self._config["norm_diff_threshold_dict"]:
                threshold = self._config["norm_diff_threshold_dict"][
                    network_name]

            heatmap = self.scale_norm_diffs_to_make_heatmap(
                norm_diffs, threshold)

            reticle_color = self._network_reticle_color[network_name]
            draw_reticle(heatmap, best_match_uv[0], best_match_uv[1],
                         reticle_color)
            draw_reticle(img_2_with_reticle, best_match_uv[0],
                         best_match_uv[1], reticle_color)
            blended = cv2.addWeighted(self.img2_gray, alpha, heatmap, beta, 0)

        target = img_2_with_reticle
        return (source, blended, target, [best_match_uv[0], best_match_uv[1]])

    def run(self):
        self._get_task_images()
        pixels = []
        #        for i in range(len(utils.getDictFromJSONFilename('./images_start/knots_info.json')["0"][0])):
        for i in range(25):
            print "computing descriptors"
            source, blended, target, p = self._compute_descriptors(i)
            pixels.append(p)
            print "computed descriptors"
            vis = np.concatenate((source, target), axis=1)
            print "concatenated, writing image"
            cv2.imwrite("/home/priya/code/data_volume/annotated/%06d.png" % i,
                        vis)
        np.savetxt('pixels_pred.txt', pixels)
Exemplo n.º 18
0
from dense_correspondence.training.training import DenseCorrespondenceTraining
from dense_correspondence.dataset.spartan_dataset_masked import SpartanDataset
logging.basicConfig(level=logging.INFO)

from dense_correspondence.evaluation.evaluation import DenseCorrespondenceEvaluation

config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence',
                               'dataset', 'composite', 'toy.yaml')
config = utils.getDictFromYamlFilename(config_filename)

train_config_file = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence',
                               'training', 'toy_training.yaml')

train_config = utils.getDictFromYamlFilename(train_config_file)
dataset = SpartanDataset(config=config)

logging_dir = "/home/zhouxian/git/pytorch-dense-correspondence/pdc/trained_models/tutorials"
d = 3 # the descriptor dimension
name = "toy_hacker_%d" %(d)
train_config["training"]["logging_dir_name"] = name
train_config["training"]["logging_dir"] = logging_dir
train_config["dense_correspondence_network"]["descriptor_dimension"] = d

TRAIN = True
EVALUATE = True

if TRAIN:
    print "training descriptor of dimension %d" %(d)
    train = DenseCorrespondenceTraining(dataset=dataset, config=train_config)
    train.run()
import sys
import os
import cv2
import numpy as np
import copy

import dense_correspondence_manipulation.utils.utils as utils
dc_source_dir = utils.getDenseCorrespondenceSourceDir()
sys.path.append(dc_source_dir)
sys.path.append(os.path.join(dc_source_dir, "dense_correspondence", "correspondence_tools"))
from dense_correspondence.dataset.spartan_dataset_masked import SpartanDataset, ImageType

config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 
                               'dataset', 'composite', 'star_bot_front_only.yaml')
config = utils.getDictFromYamlFilename(config_filename)
sd = SpartanDataset(config=config)
sd.set_train_mode()


USE_FIRST_IMAGE = False # force using first image in each log
RANDOMIZE_TEST_TRAIN = False # randomize seletcting

def numpy_to_cv2(numpy_img):
    return numpy_img[:, :, ::-1].copy() # open and convert between BGR and RGB

def pil_image_to_cv2(pil_image):
    return np.array(pil_image)[:, :, ::-1].copy() # open and convert between BGR and RGB
def get_cv2_img_pair_from_spartan():
    scene_name_a = sd.get_random_scene_name()
    num_attempts = 50
    for i in range(num_attempts):
 def make_default():
     dataset = SpartanDataset.make_default_caterpillar()
     return DenseCorrespondenceTraining(dataset=dataset)
class DenseCorrespondenceTraining(object):

    def __init__(self, config=None, dataset=None, dataset_test=None):
        if config is None:
            config = DenseCorrespondenceTraining.load_default_config()

        self._config = config
        self._dataset = dataset
        self._dataset_test = dataset_test

        self._dcn = None
        self._optimizer = None

    def setup(self):
        """
        Initializes the object
        :return:
        :rtype:
        """
        self.load_dataset()
        self.setup_logging_dir()
        self.setup_visdom()
        self.setup_tensorboard()


    @property
    def dataset(self):
        return self._dataset

    @dataset.setter
    def dataset(self, value):
        self._dataset = value

    def load_dataset(self):
        """
        Loads a dataset, construct a trainloader.
        Additionally creates a dataset and DataLoader for the test data
        :return:
        :rtype:
        """

        batch_size = self._config['training']['batch_size']
        num_workers = self._config['training']['num_workers']

        if self._dataset is None:
            self._dataset = SpartanDataset.make_default_10_scenes_drill()

        
        self._dataset.load_all_pose_data()
        self._dataset.set_parameters_from_training_config(self._config)

        self._data_loader = torch.utils.data.DataLoader(self._dataset, batch_size=batch_size,
                                          shuffle=True, num_workers=num_workers, drop_last=True)

        # create a test dataset
        if self._config["training"]["compute_test_loss"]:
            if self._dataset_test is None:
                self._dataset_test = SpartanDataset(mode="test", config=self._dataset.config)

            
            self._dataset_test.load_all_pose_data()
            self._dataset_test.set_parameters_from_training_config(self._config)

            self._data_loader_test = torch.utils.data.DataLoader(self._dataset_test, batch_size=batch_size,
                                          shuffle=True, num_workers=2, drop_last=True)

    def load_dataset_from_config(self, config):
        """
        Loads train and test datasets from the given config
        :param config: Dict gotten from a YAML file
        :type config:
        :return: None
        :rtype:
        """
        self._dataset = SpartanDataset(mode="train", config=config)
        self._dataset_test = SpartanDataset(mode="test", config=config)
        self.load_dataset()

    def build_network(self):
        """
        Builds the DenseCorrespondenceNetwork
        :return:
        :rtype: DenseCorrespondenceNetwork
        """

        return DenseCorrespondenceNetwork.from_config(self._config['dense_correspondence_network'],
                                                      load_stored_params=False)

    def _construct_optimizer(self, parameters):
        """
        Constructs the optimizer
        :param parameters: Parameters to adjust in the optimizer
        :type parameters:
        :return: Adam Optimizer with params from the config
        :rtype: torch.optim
        """

        learning_rate = float(self._config['training']['learning_rate'])
        weight_decay = float(self._config['training']['weight_decay'])
        optimizer = optim.Adam(parameters, lr=learning_rate, weight_decay=weight_decay)
        return optimizer

    def _get_current_loss(self, logging_dict):
        """
        Gets the current loss for both test and train
        :return:
        :rtype: dict
        """
        d = dict()
        d['train'] = dict()
        d['test'] = dict()

        for key, val in d.iteritems():
            for field in logging_dict[key].keys():
                vec = logging_dict[key][field]

                if len(vec) > 0:
                    val[field] = vec[-1]
                else:
                    val[field] = -1 # placeholder


        return d

    def load_pretrained(self, model_folder, iteration=None):
        """
        Loads network and optimizer parameters from a previous training run.

        Note: It is up to the user to ensure that the model parameters match.
        e.g. width, height, descriptor dimension etc.

        :param model_folder: location of the folder containing the param files 001000.pth. Can be absolute or relative path. If relative then it is relative to pdc/trained_models/
        :type model_folder:
        :param iteration: which index to use, e.g. 3500, if None it loads the latest one
        :type iteration:
        :return: iteration
        :rtype:
        """

        if not os.path.isdir(model_folder):
            pdc_path = utils.getPdcPath()
            model_folder = os.path.join(pdc_path, "trained_models", model_folder)

        # find idx.pth and idx.pth.opt files
        if iteration is None:
            files = os.listdir(model_folder)
            model_param_file = sorted(fnmatch.filter(files, '*.pth'))[-1]
            iteration = int(model_param_file.split(".")[0])
            optim_param_file = sorted(fnmatch.filter(files, '*.pth.opt'))[-1]
        else:
            prefix = utils.getPaddedString(iteration, width=6)
            model_param_file = prefix + ".pth"
            optim_param_file = prefix + ".pth.opt"

        print "model_param_file", model_param_file
        model_param_file = os.path.join(model_folder, model_param_file)
        optim_param_file = os.path.join(model_folder, optim_param_file)


        self._dcn = self.build_network()
        self._dcn.load_state_dict(torch.load(model_param_file))
        self._dcn.cuda()
        self._dcn.train()

        self._optimizer = self._construct_optimizer(self._dcn.parameters())
        self._optimizer.load_state_dict(torch.load(optim_param_file))

        return iteration

    def run_from_pretrained(self, model_folder, iteration=None, learning_rate=None):
        """
        Wrapper for load_pretrained(), then run()
        """
        iteration = self.load_pretrained(model_folder, iteration)
        if iteration is None:
            iteration = 0

        if learning_rate is not None:
            self._config["training"]["learning_rate_starting_from_pretrained"] = learning_rate
            self.set_learning_rate(self._optimizer, learning_rate)

        self.run(loss_current_iteration=iteration, use_pretrained=True)

    def run(self, loss_current_iteration=0, use_pretrained=False):
        """
        Runs the training
        :return:
        :rtype:
        """

        start_iteration = copy.copy(loss_current_iteration)

        DCE = DenseCorrespondenceEvaluation

        self.setup()
        self.save_configs()

        if not use_pretrained:
            # create new network and optimizer
            self._dcn = self.build_network()
            self._optimizer = self._construct_optimizer(self._dcn.parameters())
        else:
            logging.info("using pretrained model")
            if (self._dcn is None):
                raise ValueError("you must set self._dcn if use_pretrained=True")
            if (self._optimizer is None):
                raise ValueError("you must set self._optimizer if use_pretrained=True")

        # make sure network is using cuda and is in train mode
        dcn = self._dcn
        dcn.cuda()
        dcn.train()

        optimizer = self._optimizer
        batch_size = self._data_loader.batch_size

        pixelwise_contrastive_loss = PixelwiseContrastiveLoss(image_shape=dcn.image_shape, config=self._config['loss_function'])
        pixelwise_contrastive_loss.debug = True

        loss = match_loss = non_match_loss = 0

        max_num_iterations = self._config['training']['num_iterations'] + start_iteration
        logging_rate = self._config['training']['logging_rate']
        save_rate = self._config['training']['save_rate']
        compute_test_loss_rate = self._config['training']['compute_test_loss_rate']

        # logging
        self._logging_dict = dict()
        self._logging_dict['train'] = {"iteration": [], "loss": [], "match_loss": [],
                                           "masked_non_match_loss": [], 
                                           "background_non_match_loss": [],
                                           "blind_non_match_loss": [],
                                           "learning_rate": [],
                                           "different_object_non_match_loss": []}

        self._logging_dict['test'] = {"iteration": [], "loss": [], "match_loss": [],
                                           "non_match_loss": []}

        # save network before starting
        if not use_pretrained:
            self.save_network(dcn, optimizer, 0)

        for epoch in range(50):  # loop over the dataset multiple times

            for i, data in enumerate(self._data_loader, 0):
                loss_current_iteration += 1
                start_iter = time.time()

                match_type, \
                img_a, img_b, \
                matches_a, matches_b, \
                masked_non_matches_a, masked_non_matches_b, \
                background_non_matches_a, background_non_matches_b, \
                blind_non_matches_a, blind_non_matches_b, \
                metadata = data

                if (match_type == -1).all():
                    print "\n empty data, continuing \n"
                    continue


                data_type = metadata["type"][0]
                
                img_a = Variable(img_a.cuda(), requires_grad=False)
                img_b = Variable(img_b.cuda(), requires_grad=False)

                matches_a = Variable(matches_a.cuda().squeeze(0), requires_grad=False)
                matches_b = Variable(matches_b.cuda().squeeze(0), requires_grad=False)
                masked_non_matches_a = Variable(masked_non_matches_a.cuda().squeeze(0), requires_grad=False)
                masked_non_matches_b = Variable(masked_non_matches_b.cuda().squeeze(0), requires_grad=False)

                background_non_matches_a = Variable(background_non_matches_a.cuda().squeeze(0), requires_grad=False)
                background_non_matches_b = Variable(background_non_matches_b.cuda().squeeze(0), requires_grad=False)

                blind_non_matches_a = Variable(blind_non_matches_a.cuda().squeeze(0), requires_grad=False)
                blind_non_matches_b = Variable(blind_non_matches_b.cuda().squeeze(0), requires_grad=False)

                optimizer.zero_grad()
                self.adjust_learning_rate(optimizer, loss_current_iteration)

                # run both images through the network
                image_a_pred = dcn.forward(img_a)
                image_a_pred = dcn.process_network_output(image_a_pred, batch_size)

                image_b_pred = dcn.forward(img_b)
                image_b_pred = dcn.process_network_output(image_b_pred, batch_size)

                # get loss
                loss, match_loss, masked_non_match_loss, \
                background_non_match_loss, blind_non_match_loss = loss_composer.get_loss(pixelwise_contrastive_loss, match_type,
                                                                                image_a_pred, image_b_pred,
                                                                                matches_a,     matches_b,
                                                                                masked_non_matches_a, masked_non_matches_b,
                                                                                background_non_matches_a, background_non_matches_b,
                                                                                blind_non_matches_a, blind_non_matches_b)
                

                loss.backward()
                optimizer.step()

                elapsed = time.time() - start_iter

                print "single iteration took %.3f seconds" %(elapsed)


                def update_visdom_plots(loss, match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss):
                    """
                    Updates the visdom plots with current loss function information
                    :return:
                    :rtype:
                    """

                    learning_rate = DenseCorrespondenceTraining.get_learning_rate(optimizer)
                    self._logging_dict['train']['learning_rate'].append(learning_rate)
                    self._visdom_plots['learning_rate'].log(loss_current_iteration, learning_rate)
                    self._tensorboard_logger.log_value("learning rate", learning_rate, loss_current_iteration)


                    # Don't update any plots if the entry corresponding to that term
                    # is a zero loss
                    if not loss_composer.is_zero_loss(match_loss):
                        self._logging_dict['train']['match_loss'].append(match_loss.data[0])
                        self._visdom_plots['train']['match_loss'].log(loss_current_iteration, match_loss.data[0])
                        self._tensorboard_logger.log_value("train match loss", match_loss.data[0], loss_current_iteration)

                    if not loss_composer.is_zero_loss(masked_non_match_loss):
                        self._logging_dict['train']['masked_non_match_loss'].append(masked_non_match_loss.data[0])
                        self._visdom_plots['train']['masked_non_match_loss'].log(loss_current_iteration,
                                                             masked_non_match_loss.data[0])
                        self._tensorboard_logger.log_value("train masked non match loss", masked_non_match_loss.data[0], loss_current_iteration)

                    if not loss_composer.is_zero_loss(background_non_match_loss):
                        self._logging_dict['train']['background_non_match_loss'].append(background_non_match_loss.data[0])
                        self._visdom_plots['train']['background_non_match_loss'].log(loss_current_iteration,
                                                             background_non_match_loss.data[0])
                        self._tensorboard_logger.log_value("train background non match loss", background_non_match_loss.data[0], loss_current_iteration)

                    if not loss_composer.is_zero_loss(blind_non_match_loss):

                        if data_type == SpartanDatasetDataType.SINGLE_OBJECT_WITHIN_SCENE:
                            self._tensorboard_logger.log_value("train blind SINGLE_OBJECT_WITHIN_SCENE", blind_non_match_loss.data[0], loss_current_iteration)

                        if data_type == SpartanDatasetDataType.DIFFERENT_OBJECT:
                            self._tensorboard_logger.log_value("train blind DIFFERENT_OBJECT", blind_non_match_loss.data[0], loss_current_iteration)


                    # loss is never zero
                    if data_type == SpartanDatasetDataType.SINGLE_OBJECT_WITHIN_SCENE:
                        self._tensorboard_logger.log_value("train loss SINGLE_OBJECT_WITHIN_SCENE", loss.data[0], loss_current_iteration)

                    elif data_type == SpartanDatasetDataType.DIFFERENT_OBJECT:
                        self._tensorboard_logger.log_value("train loss DIFFERENT_OBJECT", loss.data[0], loss_current_iteration)

                    elif data_type == SpartanDatasetDataType.SINGLE_OBJECT_ACROSS_SCENE:
                        self._tensorboard_logger.log_value("train loss SINGLE_OBJECT_ACROSS_SCENE", loss.data[0], loss_current_iteration)

                    elif data_type == SpartanDatasetDataType.MULTI_OBJECT:
                        self._tensorboard_logger.log_value("train loss MULTI_OBJECT", loss.data[0], loss_current_iteration)
                    
                    elif data_type == SpartanDatasetDataType.SYNTHETIC_MULTI_OBJECT:
                        self._tensorboard_logger.log_value("train loss SYNTHETIC_MULTI_OBJECT", loss.data[0], loss_current_iteration)
                    else:
                        raise ValueError("unknown data type")


                    if data_type == SpartanDatasetDataType.DIFFERENT_OBJECT:
                        self._tensorboard_logger.log_value("train different object", loss.data[0], loss_current_iteration)

                    # #non_match_type = metadata['non_match_type'][0]
                    # fraction_hard_negatives = pixelwise_contrastive_loss.debug_data['fraction_hard_negatives']

                    # if pixelwise_contrastive_loss.debug:
                    #     if non_match_type == "masked":
                    #         self._visdom_plots['masked_hard_negative_rate'].log(loss_current_iteration, fraction_hard_negatives)
                    #         self._tensorboard_logger.log_value("masked hard negative rate", fraction_hard_negatives, loss_current_iteration)
                    #     elif non_match_type == "non_masked":
                    #         self._visdom_plots['non_masked_hard_negative_rate'].log(loss_current_iteration,
                    #                                                             fraction_hard_negatives)

                    #         self._tensorboard_logger.log_value("non-masked hard negative rate", fraction_hard_negatives,
                    #                                      loss_current_iteration)
                    #     else:
                    #         raise ValueError("uknown non_match_type %s" %(non_match_type))


                # def update_visdom_test_loss_plots(test_loss, test_match_loss, test_non_match_loss):
                #     """
                #     Log data about test loss and update the visdom plots
                #     :return:
                #     :rtype:
                #     """

                #     self._logging_dict['test']['loss'].append(test_loss)
                #     self._logging_dict['test']['match_loss'].append(test_match_loss)
                #     self._logging_dict['test']['non_match_loss'].append(test_non_match_loss)
                #     self._logging_dict['test']['iteration'].append(loss_current_iteration)


                #     self._visdom_plots['test']['loss'].log(loss_current_iteration, test_loss)
                #     self._visdom_plots['test']['match_loss'].log(loss_current_iteration, test_match_loss)
                #     self._visdom_plots['test']['non_match_loss'].log(loss_current_iteration, test_non_match_loss)

                #     self._tensorboard_logger.log_value('test loss', test_loss, loss_current_iteration)
                #     self._tensorboard_logger.log_value('test match loss', test_match_loss, loss_current_iteration)
                #     self._tensorboard_logger.log_value('test non-match loss', test_non_match_loss, loss_current_iteration)



                update_visdom_plots(loss, match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss)

                if loss_current_iteration % save_rate == 0:
                    self.save_network(dcn, optimizer, loss_current_iteration, logging_dict=self._logging_dict)

                if loss_current_iteration % logging_rate == 0:
                    logging.info("Training on iteration %d of %d" %(loss_current_iteration, max_num_iterations))

                    logging.info("single iteration took %.3f seconds" %(elapsed))

                    percent_complete = loss_current_iteration * 100.0/(max_num_iterations - start_iteration)
                    logging.info("Training is %d percent complete\n" %(percent_complete))


                # don't compute the test loss on the first few times through the loop
                if self._config["training"]["compute_test_loss"] and (loss_current_iteration % compute_test_loss_rate == 0) and loss_current_iteration > 5:
                    logging.info("Computing test loss")

                    # delete the loss, match_loss, non_match_loss variables so that
                    # pytorch can use that GPU memory
                    del loss, match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss
                    gc.collect()

                    dcn.eval()
                    test_loss, test_match_loss, test_non_match_loss = DCE.compute_loss_on_dataset(dcn,
                                                                                                  self._data_loader_test, self._config['loss_function'], num_iterations=self._config['training']['test_loss_num_iterations'])

                    update_visdom_test_loss_plots(test_loss, test_match_loss, test_non_match_loss)

                    # delete these variables so we can free GPU memory
                    del test_loss, test_match_loss, test_non_match_loss

                    # make sure to set the network back to train mode
                    dcn.train()

                if loss_current_iteration % self._config['training']['garbage_collect_rate'] == 0:
                    logging.debug("running garbage collection")
                    gc_start = time.time()
                    gc.collect()
                    gc_elapsed = time.time() - gc_start
                    logging.debug("garbage collection took %.2d seconds" %(gc_elapsed))

                if loss_current_iteration > max_num_iterations:
                    logging.info("Finished testing after %d iterations" % (max_num_iterations))
                    self.save_network(dcn, optimizer, loss_current_iteration, logging_dict=self._logging_dict)
                    return


    def setup_logging_dir(self):
        """
        Sets up the directory where logs will be stored and config
        files written
        :return: full path of logging dir
        :rtype: str
        """

        if 'logging_dir_name' in self._config['training']:
            dir_name = self._config['training']['logging_dir_name']
        else:
            dir_name = utils.get_current_time_unique_name() +"_" + str(self._config['dense_correspondence_network']['descriptor_dimension']) + "d"

        self._logging_dir_name = dir_name

        self._logging_dir = os.path.join(utils.convert_to_absolute_path(self._config['training']['logging_dir']), dir_name)



        if os.path.isdir(self._logging_dir):
            shutil.rmtree(self._logging_dir)

        if not os.path.isdir(self._logging_dir):
            os.makedirs(self._logging_dir)

        # make the tensorboard log directory
        self._tensorboard_log_dir = os.path.join(self._logging_dir, "tensorboard")
        if not os.path.isdir(self._tensorboard_log_dir):
            os.makedirs(self._tensorboard_log_dir)

        return self._logging_dir

    def save_network(self, dcn, optimizer, iteration, logging_dict=None):
        """
        Saves network parameters to logging directory
        :return:
        :rtype: None
        """

        network_param_file = os.path.join(self._logging_dir, utils.getPaddedString(iteration, width=6) + ".pth")
        optimizer_param_file = network_param_file + ".opt"
        torch.save(dcn.state_dict(), network_param_file)
        torch.save(optimizer.state_dict(), optimizer_param_file)

        # also save loss history stuff
        if logging_dict is not None:
            log_history_file = os.path.join(self._logging_dir, utils.getPaddedString(iteration, width=6) + "_log_history.yaml")
            utils.saveToYaml(logging_dict, log_history_file)

            current_loss_file = os.path.join(self._logging_dir, 'loss.yaml')
            current_loss_data = self._get_current_loss(logging_dict)

            utils.saveToYaml(current_loss_data, current_loss_file)



    def save_configs(self):
        """
        Saves config files to the logging directory
        :return:
        :rtype: None
        """
        training_params_file = os.path.join(self._logging_dir, 'training.yaml')
        utils.saveToYaml(self._config, training_params_file)

        dataset_params_file = os.path.join(self._logging_dir, 'dataset.yaml')
        utils.saveToYaml(self._dataset.config, dataset_params_file)        

    def adjust_learning_rate(self, optimizer, iteration):
        """
        Adjusts the learning rate according to the schedule
        :param optimizer:
        :type optimizer:
        :param iteration:
        :type iteration:
        :return:
        :rtype:
        """

        steps_between_learning_rate_decay = self._config['training']['steps_between_learning_rate_decay']
        if iteration % steps_between_learning_rate_decay == 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] = param_group['lr'] * self._config["training"]["learning_rate_decay"]

    @staticmethod
    def set_learning_rate(optimizer, learning_rate):
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate

    @staticmethod
    def get_learning_rate(optimizer):
        for param_group in optimizer.param_groups:
            lr = param_group['lr']
            break

        return lr

    def setup_visdom(self):
        """
        Sets up visdom visualizer
        :return:
        :rtype:
        """
        self.start_visdom()
        self._visdom_env = self._logging_dir_name
        self._vis = visdom.Visdom(env=self._visdom_env)

        self._port = 8097
        self._visdom_plots = dict()

        self._visdom_plots["train"] = dict()
        self._visdom_plots['train']['loss'] = VisdomPlotLogger(
        'line', port=self._port, opts={'title': 'Train Loss'}, env=self._visdom_env)

        self._visdom_plots['learning_rate'] = VisdomPlotLogger(
        'line', port=self._port, opts={'title': 'Learning Rate'}, env=self._visdom_env)

        self._visdom_plots['train']['match_loss'] = VisdomPlotLogger(
        'line', port=self._port, opts={'title': 'Train Match Loss'}, env=self._visdom_env)

        self._visdom_plots['train']['masked_non_match_loss'] = VisdomPlotLogger(
            'line', port=self._port, opts={'title': 'Train Masked Non Match Loss'}, env=self._visdom_env)

        self._visdom_plots['train']['background_non_match_loss'] = VisdomPlotLogger(
            'line', port=self._port, opts={'title': 'Train Background Non Match Loss'}, env=self._visdom_env)

        self._visdom_plots['train']['blind_non_match_loss'] = VisdomPlotLogger(
            'line', port=self._port, opts={'title': 'Train Blind Non Match Loss'}, env=self._visdom_env)


        self._visdom_plots["test"] = dict()
        self._visdom_plots['test']['loss'] = VisdomPlotLogger(
            'line', port=self._port, opts={'title': 'Test Loss'}, env=self._visdom_env)

        self._visdom_plots['test']['match_loss'] = VisdomPlotLogger(
            'line', port=self._port, opts={'title': 'Test Match Loss'}, env=self._visdom_env)

        self._visdom_plots['test']['non_match_loss'] = VisdomPlotLogger(
            'line', port=self._port, opts={'title': 'Test Non Match Loss'}, env=self._visdom_env)

        self._visdom_plots['masked_hard_negative_rate'] = VisdomPlotLogger(
            'line', port=self._port, opts={'title': 'Masked Matches Hard Negative Rate'}, env=self._visdom_env)

        self._visdom_plots['non_masked_hard_negative_rate'] = VisdomPlotLogger(
            'line', port=self._port, opts={'title': 'Non-Masked Hard Negative Rate'}, env=self._visdom_env)

    def setup_tensorboard(self):
        """
        Starts the tensorboard server and sets up the plotting
        :return:
        :rtype:
        """

        # start tensorboard
        # cmd = "python -m tensorboard.main"
        logging.info("setting up tensorboard_logger")
        cmd = "tensorboard --logdir=%s" %(self._tensorboard_log_dir)
        self._tensorboard_logger = tensorboard_logger.Logger(self._tensorboard_log_dir)
        logging.info("tensorboard logger started")


    @staticmethod
    def load_default_config():
        dc_source_dir = utils.getDenseCorrespondenceSourceDir()
        config_file = os.path.join(dc_source_dir, 'config', 'dense_correspondence',
                                   'training', 'training.yaml')

        config = utils.getDictFromYamlFilename(config_file)
        return config

    @staticmethod
    def make_default():
        dataset = SpartanDataset.make_default_caterpillar()
        return DenseCorrespondenceTraining(dataset=dataset)


    @staticmethod
    def start_visdom():
        """
        Starts visdom if it's not already running
        :return:
        :rtype:
        """

        vis = visdom.Visdom()

        if vis.check_connection():
            logging.info("Visdom already running, returning")
            return


        logging.info("Starting visdom")
        cmd = "python -m visdom.server"
        subprocess.Popen([cmd], shell=True)
Exemplo n.º 22
0
class DonDataLoader(object):
    """
    Data loader class that takes from the pytorch-dense-correspondence dataset.
    """
    def __init__(self, config_filename='shoes_all.yaml'):

        with HiddenPrints():

            self.config_filename = os.path.join(
                utils.getDenseCorrespondenceSourceDir(), 'config',
                'dense_correspondence', 'dataset', 'composite',
                config_filename)
            self.train_config_filename = os.path.join(
                utils.getDenseCorrespondenceSourceDir(), 'config',
                'dense_correspondence', 'training', 'training.yaml')

            self.config = utils.getDictFromYamlFilename(self.config_filename)
            self.train_config = utils.getDictFromYamlFilename(
                self.train_config_filename)

            self.dataset = SpartanDataset(config=self.config)
            self.dataset.set_parameters_from_training_config(self.train_config)

        # holds centroid and radius for each scene
        # these are for min and max z values currently. maybe include x, y, and z in the future.
        # self.centroid_and_radius[scene_name]["centroid"] or self.centroid_and_radius[scene_name]["radius"]
        self.centroid_and_radius = {}

    def get_random_scene_from_object_id(self, object_id=None):
        # set to first object_id if not specified
        if object_id is None:
            object_id = list(self.dataset._single_object_scene_dict.keys())[0]
        # list of scenes from training set
        scenes = self.dataset._single_object_scene_dict[object_id]["train"]
        scene = scenes[random.randint(0, len(scenes) - 1)]
        print("scene: {}".format(scene))
        return scene

    def get_frame_idx_pair_from_scene_name(self, scene_name):
        """Returns the indices of two frames in the scene."""
        frames = list(self.dataset.get_pose_data(scene_name).keys())
        frame_idx_a = frames[random.randint(0, len(frames) - 1)]
        frame_idx_b = frames[random.randint(0, len(frames) - 1)]
        print("frame_idx_a: {}, frame_idx_b: {}".format(
            frame_idx_a, frame_idx_b))
        return (frame_idx_a, frame_idx_b)

    def get_camera_intrinsics_matrix(self, scene_name):
        intrinsics = self.dataset.get_camera_intrinsics(scene_name)
        K = intrinsics.get_camera_matrix()
        return K

    def mask_is_contained(self, mask):
        """ return True if mask is fully contained in image. False otherwise
        
        inputs: mask as numpy array
        """

        y_max, x_max = mask.shape  # (height, width)

        # check top and bottom rows
        for i in range(x_max):
            if mask[0, i] != 0.0 or mask[y_max - 1, i] != 0.0:
                return False
        # check left and right cols
        for i in range(y_max):
            if mask[i, 0] != 0.0 or mask[i, x_max - 1] != 0.0:
                return False
        return True

    def set_centroid_and_radius_for_scene(self, scene_name):
        """ sets the centroid and radius for scene with the min and max z value in the scene
        """

        all_frames = list(self.dataset.get_pose_data(scene_name).keys())
        global_min_depth = float("inf")
        global_max_depth = 0.0
        for frame in all_frames:
            # ethan: this might be fragile, so come back to it
            try:
                rgb_a, depth_a, mask_a, pose_a = self.dataset.get_rgbd_mask_pose(
                    scene_name, frame)
                masked_depth = np.array(mask_a) * np.array(depth_a)
                min_depth = masked_depth[masked_depth > 0].min() / 1000.0
                max_depth = masked_depth[masked_depth > 0].max() / 1000.0

                global_min_depth = min(global_min_depth, min_depth)
                global_max_depth = max(global_max_depth, max_depth)
            except:
                pass
        z_min = global_min_depth
        z_max = global_max_depth

        radius = (z_max - z_min) / 2.0
        centroid = radius + z_min

        self.centroid_and_radius[scene_name] = {}
        self.centroid_and_radius[scene_name]["centroid"] = centroid
        self.centroid_and_radius[scene_name]["radius"] = radius

    def get_random_data_pair(self):
        # this will return a random data pair

        found = False
        while not found:
            # choose data from one frame
            scene_name = self.get_random_scene_from_object_id()

            # cache the values if they haven't been scene before
            if scene_name not in self.centroid_and_radius:
                self.set_centroid_and_radius_for_scene(scene_name)

            # set the cached values if this scene has not been scene before
            frame_idx_a, frame_idx_b = self.get_frame_idx_pair_from_scene_name(
                scene_name)

            K = self.get_camera_intrinsics_matrix(scene_name)

            rgb_a, depth_a, mask_a, pose_a = self.dataset.get_rgbd_mask_pose(
                scene_name, frame_idx_a)
            rgb_b, depth_b, mask_b, pose_b = self.dataset.get_rgbd_mask_pose(
                scene_name, frame_idx_b)

            # check that both masks are fully visible
            found = self.mask_is_contained(
                np.array(mask_a)) and self.mask_is_contained(np.array(mask_b))

        a_image_data = [rgb_a, depth_a, mask_a, pose_a]
        b_image_data = [rgb_b, depth_b, mask_b, pose_b]

        return K, a_image_data, b_image_data, scene_name
Exemplo n.º 23
0
class HeatmapVisualization(object):
    def __init__(self, config):
        self._config = config
        self._dce = DenseCorrespondenceEvaluation(EVAL_CONFIG)
        self._load_networks()
        self._reticle_color = COLOR_GREEN
        # self.load_specific_dataset() # uncomment if you want to load a specific dataset

    def _load_networks(self):
        # we will use the dataset for the first network in the series
        self._dcn_dict = dict()

        self._dataset = None
        self._network_reticle_color = dict()

        for idx, network_name in enumerate(self._config["networks"]):
            dcn = self._dce.load_network_from_config(network_name)
            dcn.eval()
            self._dcn_dict[network_name] = dcn
            # self._network_reticle_color[network_name] = label_colors[idx]

            if len(self._config["networks"]) == 1:
                self._network_reticle_color[network_name] = COLOR_RED
            else:
                self._network_reticle_color[network_name] = label_colors[idx]

            if self._dataset is None:
                self._dataset = dcn.load_training_dataset()

    def load_specific_dataset(self):
        dataset_config_filename = os.path.join(
            utils.getDenseCorrespondenceSourceDir(), 'config',
            'dense_correspondence', 'dataset', 'composite',
            'hats_3_demo_composite.yaml')

        dataset_config_filename = os.path.join(
            utils.getDenseCorrespondenceSourceDir(), 'config',
            'dense_correspondence', 'dataset', 'composite', '4_shoes_all.yaml')

        dataset_config = utils.getDictFromYamlFilename(dataset_config_filename)
        self._dataset = SpartanDataset(config=dataset_config)

    def get_random_image_pair(self):
        object_id = self._dataset.get_random_object_id()
        scene_name_a = self._dataset.get_random_single_object_scene_name(
            object_id)
        scene_name_b = self._dataset.get_different_scene_for_object(
            object_id, scene_name_a)

        if self._config["randomize_images"]:
            image_a_idx = self._dataset.get_random_image_index(scene_name_a)
            image_b_idx = self._dataset.get_random_image_index(scene_name_b)
        else:
            image_a_idx = 0
            image_b_idx = 0

        # image_b_idx = self._dataset.get_random_image_index(scene_name_b)
        return scene_name_a, scene_name_b, image_a_idx, image_b_idx

    def get_random_image_pair_across_object(self):
        """
        Gets cross object image pairs
        :param randomize:
        :type randomize:
        :return:
        :rtype:
        """

        object_id_a, object_id_b = self._dataset.get_two_different_object_ids()
        # object_id_a = "shoe_red_nike.yaml"
        # object_id_b = "shoe_gray_nike"
        # object_id_b = "shoe_green_nike"
        scene_name_a = self._dataset.get_random_single_object_scene_name(
            object_id_a)
        scene_name_b = self._dataset.get_random_single_object_scene_name(
            object_id_b)

        if self._config["randomize_images"]:
            image_a_idx = self._dataset.get_random_image_index(scene_name_a)
            image_b_idx = self._dataset.get_random_image_index(scene_name_b)
        else:
            image_a_idx = 0
            image_b_idx = 0

        return scene_name_a, scene_name_b, image_a_idx, image_b_idx

    def get_random_image_pair_multi_object_scenes(self):
        """
        Gets cross object image pairs
        :param randomize:
        :type randomize:
        :return:
        :rtype:
        """

        scene_name_a = self._dataset.get_random_multi_object_scene_name()
        scene_name_b = self._dataset.get_random_multi_object_scene_name()

        if self._config["randomize_images"]:
            image_a_idx = self._dataset.get_random_image_index(scene_name_a)
            image_b_idx = self._dataset.get_random_image_index(scene_name_b)
        else:
            image_a_idx = 0
            image_b_idx = 0

        return scene_name_a, scene_name_b, image_a_idx, image_b_idx

    def _get_new_images(self):
        """
        Gets a new pair of images
        :return:
        :rtype:
        """

        if random.random() < 0.5:
            self._dataset.set_train_mode()
        else:
            self._dataset.set_test_mode()

        if self._config["same_object"]:
            scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair(
            )
        elif self._config["different_objects"]:
            scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_across_object(
            )
        elif self._config["multiple_object"]:
            scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_multi_object_scenes(
            )
        else:
            raise ValueError(
                "At least one of the image types must be set tot True")

        self.img1_pil = self._dataset.get_rgb_image_from_scene_name_and_idx(
            scene_name_1, image_1_idx)
        self.img2_pil = self._dataset.get_rgb_image_from_scene_name_and_idx(
            scene_name_2, image_2_idx)

        self._compute_descriptors()

        # self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(img1_pil)
        # self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(img2_pil)

    def _compute_descriptors(self):
        """
        Computes the descriptors for image 1 and image 2 for each network
        :return:
        :rtype:
        """
        self.img1 = pil_image_to_cv2(self.img1_pil)
        self.img2 = pil_image_to_cv2(self.img2_pil)
        self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(self.img1_pil)
        self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(self.img2_pil)
        self.img1_gray = cv2.cvtColor(self.img1, cv2.COLOR_RGB2GRAY) / 255.0
        self.img2_gray = cv2.cvtColor(self.img2, cv2.COLOR_RGB2GRAY) / 255.0

        cv2.imshow('source', self.img1)
        cv2.imshow('target', self.img2)

        self._res_a = dict()
        self._res_b = dict()
        for network_name, dcn in self._dcn_dict.iteritems():
            self._res_a[network_name] = dcn.forward_single_image_tensor(
                self.rgb_1_tensor).data.cpu().numpy()
            self._res_b[network_name] = dcn.forward_single_image_tensor(
                self.rgb_2_tensor).data.cpu().numpy()

        self.find_best_match(None, 0, 0, None, None)

    def scale_norm_diffs_to_make_heatmap(self, norm_diffs, threshold):
        """
        Scales the norm diffs to make a heatmap. This will be scaled between 0 and 1.
        0 corresponds to a match, 1 to non-match

        :param norm_diffs: The norm diffs
        :type norm_diffs: numpy.array [H,W]
        :return:
        :rtype:
        """

        heatmap = np.copy(norm_diffs)
        greater_than_threshold = np.where(norm_diffs > threshold)
        heatmap = heatmap / threshold * self._config[
            "heatmap_vis_upper_bound"]  # linearly scale [0, threshold] to [0, 0.5]
        heatmap[
            greater_than_threshold] = 1  # greater than threshold is set to 1
        heatmap = heatmap.astype(self.img1_gray.dtype)
        return heatmap

    def find_best_match(self, event, u, v, flags, param):
        """
        For each network, find the best match in the target image to point highlighted
        with reticle in the source image. Displays the result
        :return:
        :rtype:
        """

        img_1_with_reticle = np.copy(self.img1)
        draw_reticle(img_1_with_reticle, u, v, self._reticle_color)
        cv2.imshow("source", img_1_with_reticle)

        alpha = self._config["blend_weight_original_image"]
        beta = 1 - alpha

        img_2_with_reticle = np.copy(self.img2)

        print "\n\n"

        self._res_uv = dict()

        # self._res_a_uv = dict()
        # self._res_b_uv = dict()

        for network_name in self._dcn_dict:
            res_a = self._res_a[network_name]
            res_b = self._res_b[network_name]
            best_match_uv, best_match_diff, norm_diffs = \
                DenseCorrespondenceNetwork.find_best_match((u, v), res_a, res_b)
            print "\n\n"
            print "network_name:", network_name
            self._res_uv[network_name] = dict()
            self._res_uv[network_name]['source'] = res_a[v, u, :].tolist()
            self._res_uv[network_name]['target'] = res_b[v, u, :].tolist()

            # print "res_a[v, u, :]:", res_a[v, u, :]
            # print "res_b[v, u, :]:", res_b[v, u, :]

            print "%s best match diff: %.3f" % (network_name, best_match_diff)

            threshold = self._config["norm_diff_threshold"]
            if network_name in self._config["norm_diff_threshold_dict"]:
                threshold = self._config["norm_diff_threshold_dict"][
                    network_name]

            heatmap = self.scale_norm_diffs_to_make_heatmap(
                norm_diffs, threshold)

            reticle_color = self._network_reticle_color[network_name]
            draw_reticle(heatmap, best_match_uv[0], best_match_uv[1],
                         reticle_color)
            draw_reticle(img_2_with_reticle, best_match_uv[0],
                         best_match_uv[1], reticle_color)
            blended = cv2.addWeighted(self.img2_gray, alpha, heatmap, beta, 0)
            cv2.imshow(network_name, blended)

        cv2.imshow("target", img_2_with_reticle)
        if event == cv2.EVENT_LBUTTONDOWN:
            utils.saveToYaml(self._res_uv, 'clicked_point.yaml')

    def run(self):
        self._get_new_images()
        cv2.namedWindow('target')
        cv2.setMouseCallback('source', self.find_best_match)

        self._get_new_images()

        while True:
            k = cv2.waitKey(20) & 0xFF
            if k == 27:
                break
            elif k == ord('n'):
                print "HEY"
                self._get_new_images()
            elif k == ord('s'):
                print "HEY"
                img1_pil = self.img1_pil
                img2_pil = self.img2_pil
                self.img1_pil = img2_pil
                self.img2_pil = img1_pil
                self._compute_descriptors()
        print("descriptor_filename", descriptor_filename)
        print("processing image %d of %d" % (counter, num_images))
        counter += 1


if __name__ == "__main__":
    dc_source_dir = utils.getDenseCorrespondenceSourceDir()
    config_filename = os.path.join(dc_source_dir, 'config',
                                   'dense_correspondence', 'evaluation',
                                   'lucas_evaluation.yaml')
    eval_config = utils.getDictFromYamlFilename(config_filename)
    default_config = utils.get_defaults_config()
    utils.set_cuda_visible_devices(default_config['cuda_visible_devices'])

    dce = DenseCorrespondenceEvaluation(eval_config)
    network_name = "caterpillar_M_background_0.500_3"
    dcn = dce.load_network_from_config(network_name)

    dataset_config_file = os.path.join(dc_source_dir, 'config',
                                       'dense_correspondence', 'dataset',
                                       'composite', 'caterpillar_only_9.yaml')
    dataset_config = utils.getDictFromYamlFilename(dataset_config_file)
    dataset = SpartanDataset(config=dataset_config)

    scene_name = SCENE_NAME
    save_dir = SAVE_DIR
    compute_descriptor_images_for_single_scene(dataset, scene_name, dcn,
                                               save_dir)

    print("finished cleanly")
Exemplo n.º 25
0
class HeatmapVisualization(object):
    """
    Launches a live interactive heatmap visualization.

    Edit config/dense_correspondence/heatmap_vis/heatmap.yaml to specify which networks
    to visualize. Specifically add the network you want to visualize to the "networks" list.
    Make sure that this network appears in the file pointed to by EVAL_CONFIG

    Usage: Launch this file with python after sourcing the environment with
    `use_pytorch_dense_correspondence`

    Then `python live_heatmap_visualization.py`.

    Keypresses:
        n: new set of images
        s: swap images
        p: pause/un-pause
    """
    def __init__(self, config, eval_config):
        self._config = config
        self._dce = DenseCorrespondenceEvaluation(eval_config)
        self._load_networks()
        self._reticle_color = COLOR_GREEN
        self._paused = False
        if LOAD_SPECIFIC_DATASET:
            self.load_specific_dataset(
            )  # uncomment if you want to load a specific dataset

    def _load_networks(self):
        # we will use the dataset for the first network in the series
        self._dcn_dict = dict()

        self._dataset = None
        self._network_reticle_color = dict()

        for idx, network_name in enumerate(self._config["networks"]):
            dcn = self._dce.load_network_from_config(network_name)
            dcn.eval()
            self._dcn_dict[network_name] = dcn
            # self._network_reticle_color[network_name] = label_colors[idx]

            if len(self._config["networks"]) == 1:
                self._network_reticle_color[network_name] = COLOR_RED
            else:
                self._network_reticle_color[network_name] = label_colors[idx]
            if self._dataset is None:
                self._dataset = dcn.load_training_dataset()

    def load_specific_dataset(self):
        dataset_config_filename = os.path.join(
            utils.getDenseCorrespondenceSourceDir(), 'config',
            'dense_correspondence', 'dataset', 'composite',
            'hats_3_demo_composite.yaml')

        # dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config',
        #                                        'dense_correspondence',
        #                                        'dataset', 'composite', '4_shoes_all.yaml')

        dataset_config = utils.getDictFromYamlFilename(dataset_config_filename)
        self._dataset = SpartanDataset(config=dataset_config)

    def get_random_image_pair(self):
        """
        Gets a pair of random images for different scenes of the same object
        """
        object_id = self._dataset.get_random_object_id()
        # scene_name_a = "2018-04-10-16-02-59"
        # scene_name_b = scene_name_a

        scene_name_a = self._dataset.get_random_single_object_scene_name(
            object_id)
        scene_name_b = self._dataset.get_different_scene_for_object(
            object_id, scene_name_a)

        if self._config["randomize_images"]:
            image_a_idx = self._dataset.get_random_image_index(scene_name_a)
            image_b_idx = self._dataset.get_random_image_index(scene_name_b)
        else:
            image_a_idx = 0
            image_b_idx = 0

        return scene_name_a, scene_name_b, image_a_idx, image_b_idx

    def get_random_image_pair_across_object(self):
        """
        Gets cross object image pairs
        :param randomize:
        :type randomize:
        :return:
        :rtype:
        """

        object_id_a, object_id_b = self._dataset.get_two_different_object_ids()
        # object_id_a = "shoe_red_nike.yaml"
        # object_id_b = "shoe_gray_nike"
        # object_id_b = "shoe_green_nike"
        scene_name_a = self._dataset.get_random_single_object_scene_name(
            object_id_a)
        scene_name_b = self._dataset.get_random_single_object_scene_name(
            object_id_b)

        if self._config["randomize_images"]:
            image_a_idx = self._dataset.get_random_image_index(scene_name_a)
            image_b_idx = self._dataset.get_random_image_index(scene_name_b)
        else:
            image_a_idx = 0
            image_b_idx = 0

        return scene_name_a, scene_name_b, image_a_idx, image_b_idx

    def get_random_image_pair_multi_object_scenes(self):
        """
        Gets cross object image pairs
        :param randomize:
        :type randomize:
        :return:
        :rtype:
        """

        scene_name_a = self._dataset.get_random_multi_object_scene_name()
        scene_name_b = self._dataset.get_random_multi_object_scene_name()

        if self._config["randomize_images"]:
            image_a_idx = self._dataset.get_random_image_index(scene_name_a)
            image_b_idx = self._dataset.get_random_image_index(scene_name_b)
        else:
            image_a_idx = 0
            image_b_idx = 0

        return scene_name_a, scene_name_b, image_a_idx, image_b_idx

    def _get_new_images(self):
        """
        Gets a new pair of images
        :return:
        :rtype:
        """

        if random.random() < 0.5:
            self._dataset.set_train_mode()
        else:
            self._dataset.set_test_mode()

        if self._config["same_object"]:
            scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair(
            )
        elif self._config["different_objects"]:
            scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_across_object(
            )
        elif self._config["multiple_object"]:
            scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_multi_object_scenes(
            )
        else:
            raise ValueError(
                "At least one of the image types must be set tot True")

        # caterpillar
        # scene_name_1 = "2018-04-16-14-42-26"
        # scene_name_2 = "2018-04-16-14-25-19"

        # hats
        # scene_name_1 = "2018-05-15-22-01-44"
        # scene_name_2 = "2018-05-15-22-04-17"

        self.img1_pil = self._dataset.get_rgb_image_from_scene_name_and_idx(
            scene_name_1, image_1_idx)
        self.img2_pil = self._dataset.get_rgb_image_from_scene_name_and_idx(
            scene_name_2, image_2_idx)

        self._scene_name_1 = scene_name_1
        self._scene_name_2 = scene_name_2
        self._image_1_idx = image_1_idx
        self._image_2_idx = image_2_idx

        self._compute_descriptors()

        # self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(img1_pil)
        # self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(img2_pil)

    def _compute_descriptors(self):
        """
        Computes the descriptors for image 1 and image 2 for each network
        :return:
        :rtype:
        """
        self.img1 = pil_image_to_cv2(self.img1_pil)
        self.img2 = pil_image_to_cv2(self.img2_pil)
        self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(self.img1_pil)
        self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(self.img2_pil)
        self.img1_gray = cv2.cvtColor(self.img1, cv2.COLOR_RGB2GRAY) / 255.0
        self.img2_gray = cv2.cvtColor(self.img2, cv2.COLOR_RGB2GRAY) / 255.0

        cv2.imshow('source', self.img1)
        cv2.imshow('target', self.img2)

        self._res_a = dict()
        self._res_b = dict()
        for network_name, dcn in self._dcn_dict.items():
            self._res_a[network_name] = dcn.forward_single_image_tensor(
                self.rgb_1_tensor).data.cpu().numpy()
            self._res_b[network_name] = dcn.forward_single_image_tensor(
                self.rgb_2_tensor).data.cpu().numpy()

        self.find_best_match(None, 0, 0, None, None)

    def scale_norm_diffs_to_make_heatmap(self, norm_diffs, threshold):
        """
        TODO (@manuelli) scale with Gaussian kernel instead of linear

        Scales the norm diffs to make a heatmap. This will be scaled between 0 and 1.
        0 corresponds to a match, 1 to non-match

        :param norm_diffs: The norm diffs
        :type norm_diffs: numpy.array [H,W]
        :return:
        :rtype:
        """

        heatmap = np.copy(norm_diffs)
        greater_than_threshold = np.where(norm_diffs > threshold)
        heatmap = heatmap / threshold * self._config[
            "heatmap_vis_upper_bound"]  # linearly scale [0, threshold] to [0, 0.5]
        heatmap[
            greater_than_threshold] = 1  # greater than threshold is set to 1
        heatmap = heatmap.astype(self.img1_gray.dtype)
        return heatmap

    def find_best_match(self, event, u, v, flags, param):
        """
        For each network, find the best match in the target image to point highlighted
        with reticle in the source image. Displays the result
        :return:
        :rtype:
        """

        if self._paused:
            return

        img_1_with_reticle = np.copy(self.img1)
        draw_reticle(img_1_with_reticle, u, v, self._reticle_color)
        cv2.imshow("source", img_1_with_reticle)

        alpha = self._config["blend_weight_original_image"]
        beta = 1 - alpha

        img_2_with_reticle = np.copy(self.img2)

        print("\n\n")

        self._res_uv = dict()

        # self._res_a_uv = dict()
        # self._res_b_uv = dict()

        for network_name in self._dcn_dict:
            res_a = self._res_a[network_name]
            res_b = self._res_b[network_name]
            best_match_uv, best_match_diff, norm_diffs = \
                DenseCorrespondenceNetwork.find_best_match((u, v), res_a, res_b)
            print("\n\n")
            print("network_name:", network_name)
            print("scene_name_1", self._scene_name_1)
            print("image_1_idx", self._image_1_idx)
            print("scene_name_2", self._scene_name_2)
            print("image_2_idx", self._image_2_idx)

            d = dict()
            d['scene_name'] = self._scene_name_1
            d['image_idx'] = self._image_1_idx
            d['descriptor'] = res_a[v, u, :].tolist()
            d['u'] = u
            d['v'] = v

            print("\n-------keypoint info\n", d)
            print("\n--------\n")

            self._res_uv[network_name] = dict()
            self._res_uv[network_name]['source'] = res_a[v, u, :].tolist()
            self._res_uv[network_name]['target'] = res_b[v, u, :].tolist()

            print("res_a[v, u, :]:", res_a[v, u, :])
            print("res_b[v, u, :]:", res_b[best_match_uv[1],
                                           best_match_uv[0], :])

            print("%s best match diff: %.3f" % (network_name, best_match_diff))
            print("res_a", self._res_uv[network_name]['source'])
            print("res_b", self._res_uv[network_name]['target'])

            threshold = self._config["norm_diff_threshold"]
            if network_name in self._config["norm_diff_threshold_dict"]:
                threshold = self._config["norm_diff_threshold_dict"][
                    network_name]

            heatmap_color = vis_utils.compute_gaussian_kernel_heatmap_from_norm_diffs(
                norm_diffs, self._config['kernel_variance'])

            reticle_color = self._network_reticle_color[network_name]

            draw_reticle(heatmap_color, best_match_uv[0], best_match_uv[1],
                         reticle_color)
            draw_reticle(img_2_with_reticle, best_match_uv[0],
                         best_match_uv[1], reticle_color)
            blended = cv2.addWeighted(self.img2, alpha, heatmap_color, beta, 0)
            cv2.imshow(network_name, blended)

        cv2.imshow("target", img_2_with_reticle)
        if event == cv2.EVENT_LBUTTONDOWN:
            utils.saveToYaml(self._res_uv, 'clicked_point.yaml')

    def run(self):
        self._get_new_images()
        cv2.namedWindow('target')
        cv2.setMouseCallback('source', self.find_best_match)

        self._get_new_images()

        while True:
            k = cv2.waitKey(20) & 0xFF
            if k == 27:
                break
            elif k == ord('n'):
                self._get_new_images()
            elif k == ord('s'):
                img1_pil = self.img1_pil
                img2_pil = self.img2_pil
                self.img1_pil = img2_pil
                self.img2_pil = img1_pil
                self._compute_descriptors()
            elif k == ord('p'):
                if self._paused:
                    print("un pausing")
                    self._paused = False
                else:
                    print("pausing")
                    self._paused = True
 def make_default():
     dataset = SpartanDataset.make_default_caterpillar()
     return DenseCorrespondenceTraining(dataset=dataset)
Exemplo n.º 27
0
    def run(self, loss_current_iteration=0, use_pretrained=False):
        """
        Runs the training
        :return:
        :rtype:
        """

        start_iteration = copy.copy(loss_current_iteration)

        DCE = DenseCorrespondenceEvaluation

        self.setup()
        self.save_configs()

        if not use_pretrained:
            # create new network and optimizer
            self._dcn = self.build_network()
            self._optimizer = self._construct_optimizer(self._dcn.parameters())
        else:
            logging.info("using pretrained model")
            if (self._dcn is None):
                raise ValueError("you must set self._dcn if use_pretrained=True")
            if (self._optimizer is None):
                raise ValueError("you must set self._optimizer if use_pretrained=True")

        # make sure network is using cuda and is in train mode
        dcn = self._dcn
        dcn.cuda()
        dcn.train()

        optimizer = self._optimizer
        batch_size = self._data_loader.batch_size

        pixelwise_contrastive_loss = PixelwiseContrastiveLoss(image_shape=dcn.image_shape, config=self._config['loss_function'])
        pixelwise_contrastive_loss.debug = True

        # Repeat M for background and masked
        pixelwise_contrastive_loss._config['M_background'] = pixelwise_contrastive_loss._config['M_descriptor']
        pixelwise_contrastive_loss._config['M_masked'] = pixelwise_contrastive_loss._config['M_descriptor']

        loss = match_loss = non_match_loss = 0

        num_epochs = self._config['training']['num_epochs']
        logging_rate = self._config['training']['logging_rate']
        save_rate = self._config['training']['save_rate']
        compute_test_loss_rate = self._config['training']['compute_test_loss_rate']

        # logging
        self._logging_dict = dict()
        self._logging_dict['train'] = {"iteration": [], "loss": [], "match_loss": [],
                                           "masked_non_match_loss": [], 
                                           "background_non_match_loss": [],
                                           "blind_non_match_loss": [],
                                           "learning_rate": [],
                                           "different_object_non_match_loss": []}

        self._logging_dict['test'] = {"iteration": [], "loss": [], "match_loss": [],
                                           "non_match_loss": []}

        # save network before starting
        if not use_pretrained:
            self.save_network(dcn, optimizer, 0)

        t_start = time.time()
        loss_vec = []
        match_loss_vec = []
        non_match_loss_vec = []
        for epoch in range(num_epochs):  # loop over the dataset multiple times
            for i, data in enumerate(self._data_loader, 0):
                loss_current_iteration += 1
                start_iter = time.time()

                match_type, img_a, img_b, matches_a, matches_b, non_matches_a, non_matches_b = data
                
                img_a = Variable(img_a.cuda(), requires_grad=False)
                img_b = Variable(img_b.cuda(), requires_grad=False)

                # Note: repeat non_matches for both masked and background, and fake blind nonmatches using empty tensor, for compatibility in loss computation
                matches_a = Variable(matches_a.cuda().squeeze(0), requires_grad=False)
                matches_b = Variable(matches_b.cuda().squeeze(0), requires_grad=False)
                non_matches_a = Variable(non_matches_a.cuda().squeeze(0), requires_grad=False)
                non_matches_b = Variable(non_matches_b.cuda().squeeze(0), requires_grad=False)
                blind_non_matches_a = Variable(SpartanDataset.empty_tensor().cuda().squeeze(0), requires_grad=False)
                blind_non_matches_b = Variable(SpartanDataset.empty_tensor().cuda().squeeze(0), requires_grad=False)

                optimizer.zero_grad()
                self.adjust_learning_rate(optimizer, loss_current_iteration)

                # run both images through the network
                image_a_pred = dcn.forward(img_a)
                image_a_pred = dcn.process_network_output(image_a_pred, batch_size)

                image_b_pred = dcn.forward(img_b)
                image_b_pred = dcn.process_network_output(image_b_pred, batch_size)

                # get loss.
                loss, match_loss, non_match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss \
                    = loss_composer.get_loss(pixelwise_contrastive_loss, match_type,
                                            image_a_pred, image_b_pred,
                                            matches_a,     matches_b,
                                            non_matches_a, non_matches_b,
                                            non_matches_a, non_matches_b,
                                            blind_non_matches_a, blind_non_matches_b)
                

                loss.backward()
                optimizer.step()
                elapsed = time.time() - start_iter

                # print "single iteration took %.3f seconds" %(elapsed)

                if loss_current_iteration % save_rate == 0:
                    self.save_network(dcn, optimizer, loss_current_iteration, logging_dict=self._logging_dict)

                sys.stdout.write('Epoch %d/%d, Image %d/%d, total_itr: %d, loss: %.4f, match_loss: %.4f, non_match_loss: %.4f, total_time: %s \r' % \
                    (epoch+1, num_epochs, i+1, len(self._dataset), loss_current_iteration, loss.data[0],  match_loss.data[0], non_match_loss.data[0], str(timedelta(seconds=time.time()-t_start))[:-4])); sys.stdout.flush()

                loss_vec.append(loss.data[0])
                match_loss_vec.append(match_loss.data[0])
                non_match_loss_vec.append(non_match_loss.data[0])

                if self._config["training"]["compute_test_loss"] and (loss_current_iteration % compute_test_loss_rate == 0):
                    print
                    # logging.info("Computing test loss")

                    # delete the loss, match_loss, non_match_loss variables so that
                    # pytorch can use that GPU memory
                    del loss, match_loss, non_match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss
                    gc.collect()

                    print '\tTraining average:loss: %.4f, match_loss: %.4f, non_match_loss: %.4f' % \
                            (np.mean(loss_vec), np.mean(match_loss_vec), np.mean(non_match_loss_vec))
                    loss_vec = []
                    match_loss_vec = []
                    non_match_loss_vec = []
                    
                    dcn.eval()
                    test_loss, test_match_loss, test_non_match_loss = DCE.compute_loss_on_salad_dataset(dcn,
                                                                                                  self._data_loader_test, self._config['loss_function'], num_iterations=self._config['training']['test_loss_num_iterations'])

                    print '\tTesting results: loss: %.4f, match_loss: %.4f, non_match_loss: %.4f' % \
                        (test_loss,  test_match_loss, test_non_match_loss)

                    # delete these variables so we can free GPU memory
                    del test_loss, test_match_loss, test_non_match_loss

                    # make sure to set the network back to train mode
                    dcn.train()

                if loss_current_iteration % self._config['training']['garbage_collect_rate'] == 0:
                    logging.debug("running garbage collection")
                    gc_start = time.time()
                    gc.collect()
                    gc_elapsed = time.time() - gc_start
                    logging.debug("garbage collection took %.2d seconds" %(gc_elapsed))

        logging.info("Finished training.")
        self.save_network(dcn, optimizer, loss_current_iteration, logging_dict=self._logging_dict)
        return
class DenseCorrespondenceTraining(object):
    def __init__(self, config=None, dataset=None, dataset_test=None):
        if config is None:
            config = DenseCorrespondenceTraining.load_default_config()

        self._config = config
        self._dataset = dataset
        self._dataset_test = dataset_test

        self._dcn = None
        self._optimizer = None

    def setup(self):
        """
        Initializes the object
        :return:
        :rtype:
        """
        self.load_dataset()
        self.setup_logging_dir()
        self.setup_tensorboard()

    @property
    def dataset(self):
        return self._dataset

    @dataset.setter
    def dataset(self, value):
        self._dataset = value

    def load_dataset(self):
        """
        Loads a dataset, construct a trainloader.
        Additionally creates a dataset and DataLoader for the test data
        :return:
        :rtype:
        """

        batch_size = self._config['training']['batch_size']
        num_workers = self._config['training']['num_workers']

        if self._dataset is None:
            self._dataset = SpartanDataset.make_default_10_scenes_drill()

        self._dataset.load_all_pose_data()
        self._dataset.set_parameters_from_training_config(self._config)

        self._data_loader = torch.utils.data.DataLoader(
            self._dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=num_workers,
            drop_last=True)

        # create a test dataset
        if self._config["training"]["compute_test_loss"]:
            if self._dataset_test is None:
                self._dataset_test = SpartanDataset(
                    mode="test", config=self._dataset.config)

            self._dataset_test.load_all_pose_data()
            self._dataset_test.set_parameters_from_training_config(
                self._config)

            self._data_loader_test = torch.utils.data.DataLoader(
                self._dataset_test,
                batch_size=batch_size,
                shuffle=True,
                num_workers=2,
                drop_last=True)

    def load_dataset_from_config(self, config):
        """
        Loads train and test datasets from the given config
        :param config: Dict gotten from a YAML file
        :type config:
        :return: None
        :rtype:
        """
        self._dataset = SpartanDataset(mode="train", config=config)
        self._dataset_test = SpartanDataset(mode="test", config=config)
        self.load_dataset()

    def build_network(self):
        """
        Builds the DenseCorrespondenceNetwork
        :return:
        :rtype: DenseCorrespondenceNetwork
        """

        return DenseCorrespondenceNetwork.from_config(
            self._config['dense_correspondence_network'],
            load_stored_params=False)

    def _construct_optimizer(self, parameters):
        """
        Constructs the optimizer
        :param parameters: Parameters to adjust in the optimizer
        :type parameters:
        :return: Adam Optimizer with params from the config
        :rtype: torch.optim
        """

        learning_rate = float(self._config['training']['learning_rate'])
        weight_decay = float(self._config['training']['weight_decay'])
        optimizer = optim.Adam(parameters,
                               lr=learning_rate,
                               weight_decay=weight_decay)
        return optimizer

    def _get_current_loss(self, logging_dict):
        """
        Gets the current loss for both test and train
        :return:
        :rtype: dict
        """
        d = dict()
        d['train'] = dict()
        d['test'] = dict()

        for key, val in d.items():
            for field in list(logging_dict[key].keys()):
                vec = logging_dict[key][field]

                if len(vec) > 0:
                    val[field] = vec[-1]
                else:
                    val[field] = -1  # placeholder

        return d

    def load_pretrained(self, model_folder, iteration=None):
        """
        Loads network and optimizer parameters from a previous training run.

        Note: It is up to the user to ensure that the model parameters match.
        e.g. width, height, descriptor dimension etc.

        :param model_folder: location of the folder containing the param files 001000.pth. Can be absolute or relative path. If relative then it is relative to pdc/trained_models/
        :type model_folder:
        :param iteration: which index to use, e.g. 3500, if None it loads the latest one
        :type iteration:
        :return: iteration
        :rtype:
        """

        if not os.path.isdir(model_folder):
            pdc_path = utils.getPdcPath()
            model_folder = os.path.join(pdc_path, "trained_models",
                                        model_folder)

        # find idx.pth and idx.pth.opt files
        if iteration is None:
            files = os.listdir(model_folder)
            model_param_file = sorted(fnmatch.filter(files, '*.pth'))[-1]
            iteration = int(model_param_file.split(".")[0])
            optim_param_file = sorted(fnmatch.filter(files, '*.pth.opt'))[-1]
        else:
            prefix = utils.getPaddedString(iteration, width=6)
            model_param_file = prefix + ".pth"
            optim_param_file = prefix + ".pth.opt"

        print("model_param_file", model_param_file)
        model_param_file = os.path.join(model_folder, model_param_file)
        optim_param_file = os.path.join(model_folder, optim_param_file)

        self._dcn = self.build_network()
        self._dcn.load_state_dict(torch.load(model_param_file))
        self._dcn.cuda()
        self._dcn.train()

        self._optimizer = self._construct_optimizer(self._dcn.parameters())
        self._optimizer.load_state_dict(torch.load(optim_param_file))

        return iteration

    def run_from_pretrained(self,
                            model_folder,
                            iteration=None,
                            learning_rate=None):
        """
        Wrapper for load_pretrained(), then run()
        """
        iteration = self.load_pretrained(model_folder, iteration)
        if iteration is None:
            iteration = 0

        if learning_rate is not None:
            self._config["training"][
                "learning_rate_starting_from_pretrained"] = learning_rate
            self.set_learning_rate(self._optimizer, learning_rate)

        self.run(loss_current_iteration=iteration, use_pretrained=True)

    def run(self, loss_current_iteration=0, use_pretrained=False):
        """
        Runs the training
        :return:
        :rtype:
        """

        start_iteration = copy.copy(loss_current_iteration)

        DCE = DenseCorrespondenceEvaluation

        self.setup()
        self.save_configs()

        if not use_pretrained:
            # create new network and optimizer
            self._dcn = self.build_network()
            self._optimizer = self._construct_optimizer(self._dcn.parameters())
        else:
            logging.info("using pretrained model")
            if (self._dcn is None):
                raise ValueError(
                    "you must set self._dcn if use_pretrained=True")
            if (self._optimizer is None):
                raise ValueError(
                    "you must set self._optimizer if use_pretrained=True")

        # make sure network is using cuda and is in train mode
        dcn = self._dcn
        dcn.cuda()
        dcn.train()

        optimizer = self._optimizer
        batch_size = self._data_loader.batch_size

        pixelwise_contrastive_loss = PixelwiseContrastiveLoss(
            image_shape=dcn.image_shape, config=self._config['loss_function'])
        pixelwise_contrastive_loss.debug = True

        loss = match_loss = non_match_loss = 0

        max_num_iterations = self._config['training'][
            'num_iterations'] + start_iteration
        logging_rate = self._config['training']['logging_rate']
        save_rate = self._config['training']['save_rate']
        compute_test_loss_rate = self._config['training'][
            'compute_test_loss_rate']

        # logging
        self._logging_dict = dict()
        self._logging_dict['train'] = {
            "iteration": [],
            "loss": [],
            "match_loss": [],
            "masked_non_match_loss": [],
            "background_non_match_loss": [],
            "blind_non_match_loss": [],
            "learning_rate": [],
            "different_object_non_match_loss": []
        }

        self._logging_dict['test'] = {
            "iteration": [],
            "loss": [],
            "match_loss": [],
            "non_match_loss": []
        }

        # save network before starting
        if not use_pretrained:
            self.save_network(dcn, optimizer, 0)

        # from training_progress_visualizer import TrainingProgressVisualizer
        # TPV = TrainingProgressVisualizer()

        for epoch in range(50):  # loop over the dataset multiple times

            for i, data in enumerate(self._data_loader, 0):
                loss_current_iteration += 1
                start_iter = time.time()

                match_type, \
                img_a, img_b, \
                matches_a, matches_b, \
                masked_non_matches_a, masked_non_matches_b, \
                background_non_matches_a, background_non_matches_b, \
                blind_non_matches_a, blind_non_matches_b, \
                metadata = data

                if (match_type == -1).all():
                    print("\n empty data, continuing \n")
                    continue

                data_type = metadata["type"][0]

                img_a = Variable(img_a.cuda(), requires_grad=False)
                img_b = Variable(img_b.cuda(), requires_grad=False)

                matches_a = Variable(matches_a.cuda().squeeze(0),
                                     requires_grad=False)
                matches_b = Variable(matches_b.cuda().squeeze(0),
                                     requires_grad=False)
                masked_non_matches_a = Variable(
                    masked_non_matches_a.cuda().squeeze(0),
                    requires_grad=False)
                masked_non_matches_b = Variable(
                    masked_non_matches_b.cuda().squeeze(0),
                    requires_grad=False)

                background_non_matches_a = Variable(
                    background_non_matches_a.cuda().squeeze(0),
                    requires_grad=False)
                background_non_matches_b = Variable(
                    background_non_matches_b.cuda().squeeze(0),
                    requires_grad=False)

                blind_non_matches_a = Variable(
                    blind_non_matches_a.cuda().squeeze(0), requires_grad=False)
                blind_non_matches_b = Variable(
                    blind_non_matches_b.cuda().squeeze(0), requires_grad=False)

                optimizer.zero_grad()
                self.adjust_learning_rate(optimizer, loss_current_iteration)

                # run both images through the network
                image_a_pred = dcn.forward(img_a)
                image_a_pred = dcn.process_network_output(
                    image_a_pred, batch_size)

                image_b_pred = dcn.forward(img_b)
                image_b_pred = dcn.process_network_output(
                    image_b_pred, batch_size)

                # get loss
                loss, match_loss, masked_non_match_loss, \
                background_non_match_loss, blind_non_match_loss = loss_composer.get_loss(pixelwise_contrastive_loss, match_type,
                                                                                image_a_pred, image_b_pred,
                                                                                matches_a,     matches_b,
                                                                                masked_non_matches_a, masked_non_matches_b,
                                                                                background_non_matches_a, background_non_matches_b,
                                                                                blind_non_matches_a, blind_non_matches_b)

                loss.backward()
                optimizer.step()

                #if i % 10 == 0:
                # TPV.update(self._dataset, dcn, loss_current_iteration, now_training_object_id=metadata["object_id"])

                elapsed = time.time() - start_iter

                def update_plots(loss, match_loss, masked_non_match_loss,
                                 background_non_match_loss,
                                 blind_non_match_loss):
                    """
                    Updates the tensorboard plots with current loss function information
                    :return:
                    :rtype:
                    """

                    learning_rate = DenseCorrespondenceTraining.get_learning_rate(
                        optimizer)
                    self._logging_dict['train']['learning_rate'].append(
                        learning_rate)
                    self._tensorboard_logger.log_value("learning rate",
                                                       learning_rate,
                                                       loss_current_iteration)

                    # Don't update any plots if the entry corresponding to that term
                    # is a zero loss
                    if not loss_composer.is_zero_loss(match_loss):
                        self._logging_dict['train']['match_loss'].append(
                            match_loss.item())
                        self._tensorboard_logger.log_value(
                            "train match loss", match_loss.item(),
                            loss_current_iteration)

                    if not loss_composer.is_zero_loss(masked_non_match_loss):
                        self._logging_dict['train'][
                            'masked_non_match_loss'].append(
                                masked_non_match_loss.item())

                        self._tensorboard_logger.log_value(
                            "train masked non match loss",
                            masked_non_match_loss.item(),
                            loss_current_iteration)

                    if not loss_composer.is_zero_loss(
                            background_non_match_loss):
                        self._logging_dict['train'][
                            'background_non_match_loss'].append(
                                background_non_match_loss.item())
                        self._tensorboard_logger.log_value(
                            "train background non match loss",
                            background_non_match_loss.item(),
                            loss_current_iteration)

                    if not loss_composer.is_zero_loss(blind_non_match_loss):

                        if data_type == SpartanDatasetDataType.SINGLE_OBJECT_WITHIN_SCENE:
                            self._tensorboard_logger.log_value(
                                "train blind SINGLE_OBJECT_WITHIN_SCENE",
                                blind_non_match_loss.item(),
                                loss_current_iteration)

                        if data_type == SpartanDatasetDataType.DIFFERENT_OBJECT:
                            self._tensorboard_logger.log_value(
                                "train blind DIFFERENT_OBJECT",
                                blind_non_match_loss.item(),
                                loss_current_iteration)

                    # loss is never zero
                    if data_type == SpartanDatasetDataType.SINGLE_OBJECT_WITHIN_SCENE:
                        self._tensorboard_logger.log_value(
                            "train loss SINGLE_OBJECT_WITHIN_SCENE",
                            loss.item(), loss_current_iteration)

                    elif data_type == SpartanDatasetDataType.DIFFERENT_OBJECT:
                        self._tensorboard_logger.log_value(
                            "train loss DIFFERENT_OBJECT", loss.item(),
                            loss_current_iteration)

                    elif data_type == SpartanDatasetDataType.SINGLE_OBJECT_ACROSS_SCENE:
                        self._tensorboard_logger.log_value(
                            "train loss SINGLE_OBJECT_ACROSS_SCENE",
                            loss.item(), loss_current_iteration)

                    elif data_type == SpartanDatasetDataType.MULTI_OBJECT:
                        self._tensorboard_logger.log_value(
                            "train loss MULTI_OBJECT", loss.item(),
                            loss_current_iteration)

                    elif data_type == SpartanDatasetDataType.SYNTHETIC_MULTI_OBJECT:
                        self._tensorboard_logger.log_value(
                            "train loss SYNTHETIC_MULTI_OBJECT", loss.item(),
                            loss_current_iteration)
                    else:
                        raise ValueError("unknown data type")

                    if data_type == SpartanDatasetDataType.DIFFERENT_OBJECT:
                        self._tensorboard_logger.log_value(
                            "train different object", loss.item(),
                            loss_current_iteration)

                update_plots(loss, match_loss, masked_non_match_loss,
                             background_non_match_loss, blind_non_match_loss)

                if loss_current_iteration % save_rate == 0:
                    self.save_network(dcn,
                                      optimizer,
                                      loss_current_iteration,
                                      logging_dict=self._logging_dict)

                if loss_current_iteration % logging_rate == 0:
                    logging.info("Training on iteration %d of %d" %
                                 (loss_current_iteration, max_num_iterations))

                    logging.info("single iteration took %.3f seconds" %
                                 (elapsed))

                    percent_complete = loss_current_iteration * 100.0 / (
                        max_num_iterations - start_iteration)
                    logging.info("Training is %d percent complete\n" %
                                 (percent_complete))

                # don't compute the test loss on the first few times through the loop
                if self._config["training"]["compute_test_loss"] and (
                        loss_current_iteration % compute_test_loss_rate
                        == 0) and loss_current_iteration > 5:
                    logging.info("Computing test loss")

                    # delete the loss, match_loss, non_match_loss variables so that
                    # pytorch can use that GPU memory
                    del loss, match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss
                    gc.collect()

                    dcn.eval()
                    test_loss, test_match_loss, test_non_match_loss = DCE.compute_loss_on_dataset(
                        dcn,
                        self._data_loader_test,
                        self._config['loss_function'],
                        num_iterations=self._config['training']
                        ['test_loss_num_iterations'])

                    # delete these variables so we can free GPU memory
                    del test_loss, test_match_loss, test_non_match_loss

                    # make sure to set the network back to train mode
                    dcn.train()

                if loss_current_iteration % self._config['training'][
                        'garbage_collect_rate'] == 0:
                    logging.debug("running garbage collection")
                    gc_start = time.time()
                    gc.collect()
                    gc_elapsed = time.time() - gc_start
                    logging.debug("garbage collection took %.2d seconds" %
                                  (gc_elapsed))

                if loss_current_iteration > max_num_iterations:
                    logging.info("Finished testing after %d iterations" %
                                 (max_num_iterations))
                    self.save_network(dcn,
                                      optimizer,
                                      loss_current_iteration,
                                      logging_dict=self._logging_dict)
                    return

    def setup_logging_dir(self):
        """
        Sets up the directory where logs will be stored and config
        files written
        :return: full path of logging dir
        :rtype: str
        """

        if 'logging_dir_name' in self._config['training']:
            dir_name = self._config['training']['logging_dir_name']
        else:
            dir_name = utils.get_current_time_unique_name() + "_" + str(
                self._config['dense_correspondence_network']
                ['descriptor_dimension']) + "d"

        self._logging_dir_name = dir_name

        self._logging_dir = os.path.join(
            utils.convert_data_relative_path_to_absolute_path(
                self._config['training']['logging_dir']), dir_name)

        print("logging_dir:", self._logging_dir)

        if os.path.isdir(self._logging_dir):
            shutil.rmtree(self._logging_dir)

        if not os.path.isdir(self._logging_dir):
            os.makedirs(self._logging_dir)

        # make the tensorboard log directory
        self._tensorboard_log_dir = os.path.join(self._logging_dir,
                                                 "tensorboard")
        if not os.path.isdir(self._tensorboard_log_dir):
            os.makedirs(self._tensorboard_log_dir)

        return self._logging_dir

    @property
    def logging_dir(self):
        """
        Sets up the directory where logs will be stored and config
        files written
        :return: full path of logging dir
        :rtype: str
        """
        return self._logging_dir

    def save_network(self, dcn, optimizer, iteration, logging_dict=None):
        """
        Saves network parameters to logging directory
        :return:
        :rtype: None
        """

        network_param_file = os.path.join(
            self._logging_dir,
            utils.getPaddedString(iteration, width=6) + ".pth")
        optimizer_param_file = network_param_file + ".opt"
        torch.save(dcn.state_dict(), network_param_file)
        torch.save(optimizer.state_dict(), optimizer_param_file)

        # also save loss history stuff
        if logging_dict is not None:
            log_history_file = os.path.join(
                self._logging_dir,
                utils.getPaddedString(iteration, width=6) +
                "_log_history.yaml")
            utils.saveToYaml(logging_dict, log_history_file)

            current_loss_file = os.path.join(self._logging_dir, 'loss.yaml')
            current_loss_data = self._get_current_loss(logging_dict)

            utils.saveToYaml(current_loss_data, current_loss_file)

    def save_configs(self):
        """
        Saves config files to the logging directory
        :return:
        :rtype: None
        """
        training_params_file = os.path.join(self._logging_dir, 'training.yaml')
        utils.saveToYaml(self._config, training_params_file)

        dataset_params_file = os.path.join(self._logging_dir, 'dataset.yaml')
        utils.saveToYaml(self._dataset.config, dataset_params_file)

        # make unique identifier
        identifier_file = os.path.join(self._logging_dir, 'identifier.yaml')
        identifier_dict = dict()
        identifier_dict['id'] = utils.get_unique_string()
        utils.saveToYaml(identifier_dict, identifier_file)

    def adjust_learning_rate(self, optimizer, iteration):
        """
        Adjusts the learning rate according to the schedule
        :param optimizer:
        :type optimizer:
        :param iteration:
        :type iteration:
        :return:
        :rtype:
        """

        steps_between_learning_rate_decay = self._config['training'][
            'steps_between_learning_rate_decay']
        if iteration % steps_between_learning_rate_decay == 0:
            for param_group in optimizer.param_groups:
                param_group['lr'] = param_group['lr'] * self._config[
                    "training"]["learning_rate_decay"]

    @staticmethod
    def set_learning_rate(optimizer, learning_rate):
        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate

    @staticmethod
    def get_learning_rate(optimizer):
        for param_group in optimizer.param_groups:
            lr = param_group['lr']
            break

        return lr

    def setup_tensorboard(self):
        """
        Starts the tensorboard server and sets up the plotting
        :return:
        :rtype:
        """

        # start tensorboard
        # cmd = "python -m tensorboard.main"
        logging.info("setting up tensorboard_logger")
        cmd = "tensorboard --logdir=%s" % (self._tensorboard_log_dir)
        self._tensorboard_logger = tensorboard_logger.Logger(
            self._tensorboard_log_dir)
        logging.info("tensorboard logger started")

    @staticmethod
    def load_default_config():
        dc_source_dir = utils.getDenseCorrespondenceSourceDir()
        config_file = os.path.join(dc_source_dir, 'config',
                                   'dense_correspondence', 'training',
                                   'training.yaml')

        config = utils.getDictFromYamlFilename(config_file)
        return config

    @staticmethod
    def make_default():
        dataset = SpartanDataset.make_default_caterpillar()
        return DenseCorrespondenceTraining(dataset=dataset)
Exemplo n.º 29
0
def get_within_scene_loss(pixelwise_contrastive_loss, image_a_pred,
                          image_b_pred, matches_a, matches_b,
                          masked_non_matches_a, masked_non_matches_b,
                          background_non_matches_a, background_non_matches_b,
                          blind_non_matches_a, blind_non_matches_b):
    """
    Simple wrapper for pixelwise_contrastive_loss functions.  Args and return args documented above in get_loss()
    """
    pcl = pixelwise_contrastive_loss

    match_loss, masked_non_match_loss, num_masked_hard_negatives =\
        pixelwise_contrastive_loss.get_loss_matched_and_non_matched_with_l2(image_a_pred,         image_b_pred,
                                                                          matches_a,            matches_b,
                                                                          masked_non_matches_a, masked_non_matches_b,
                                                                          M_descriptor=pcl._config["M_masked"])

    if pcl._config["use_l2_pixel_loss_on_background_non_matches"]:
        background_non_match_loss, num_background_hard_negatives =\
            pixelwise_contrastive_loss.non_match_loss_with_l2_pixel_norm(image_a_pred, image_b_pred, matches_b,
                background_non_matches_a, background_non_matches_b, M_descriptor=pcl._config["M_background"])

    else:
        background_non_match_loss, num_background_hard_negatives =\
            pixelwise_contrastive_loss.non_match_loss_descriptor_only(image_a_pred, image_b_pred,
                                                                    background_non_matches_a, background_non_matches_b,
                                                                    M_descriptor=pcl._config["M_background"])

    blind_non_match_loss = zero_loss()
    num_blind_hard_negatives = 1
    if not (SpartanDataset.is_empty(blind_non_matches_a.data)):
        blind_non_match_loss, num_blind_hard_negatives =\
            pixelwise_contrastive_loss.non_match_loss_descriptor_only(image_a_pred, image_b_pred,
                                                                    blind_non_matches_a, blind_non_matches_b,
                                                                    M_descriptor=pcl._config["M_masked"])

    total_num_hard_negatives = num_masked_hard_negatives + num_background_hard_negatives
    total_num_hard_negatives = max(total_num_hard_negatives, 1)

    if pcl._config["scale_by_hard_negatives"]:
        scale_factor = total_num_hard_negatives

        masked_non_match_loss_scaled = masked_non_match_loss * 1.0 / max(
            num_masked_hard_negatives, 1)

        background_non_match_loss_scaled = background_non_match_loss * 1.0 / max(
            num_background_hard_negatives, 1)

        blind_non_match_loss_scaled = blind_non_match_loss * 1.0 / max(
            num_blind_hard_negatives, 1)
    else:
        # we are not currently using blind non-matches
        num_masked_non_matches = max(len(masked_non_matches_a), 1)
        num_background_non_matches = max(len(background_non_matches_a), 1)
        num_blind_non_matches = max(len(blind_non_matches_a), 1)
        scale_factor = num_masked_non_matches + num_background_non_matches

        masked_non_match_loss_scaled = masked_non_match_loss * 1.0 / num_masked_non_matches

        background_non_match_loss_scaled = background_non_match_loss * 1.0 / num_background_non_matches

        blind_non_match_loss_scaled = blind_non_match_loss * 1.0 / num_blind_non_matches

    non_match_loss = 1.0 / scale_factor * (masked_non_match_loss +
                                           background_non_match_loss)

    loss = pcl._config["match_loss_weight"] * match_loss + \
    pcl._config["non_match_loss_weight"] * non_match_loss

    return loss, match_loss, masked_non_match_loss_scaled, background_non_match_loss_scaled, blind_non_match_loss_scaled
Exemplo n.º 30
0
    parser.add_argument("--data_name", type=str, default="caterpillar_upright.yaml")
    parser.add_argument("--run_prefix", type=str, default="caterpillar")
    parser.add_argument("--training_yaml", type=str, default="training.yaml")

    args = parser.parse_args()


    config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 
                                   'dataset', 'composite', args.data_name)
    config = utils.getDictFromYamlFilename(config_filename)

    train_config_file = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 
                                   'training', args.training_yaml)

    train_config = utils.getDictFromYamlFilename(train_config_file)
    dataset = SpartanDataset(config=config)
    
    dataset_test = None
    if train_config["training"]["compute_test_loss"]:
        dataset_test=SpartanDataset(mode="test", config=config)

    logging_dir = "trained_models/tutorials"
    #num_iterations = 3500
    d = 3 # the descriptor dimension
    name = f"{args.run_prefix}_%d" %(d)
    train_config["training"]["logging_dir_name"] = name
    train_config["training"]["logging_dir"] = logging_dir
    train_config["dense_correspondence_network"]["descriptor_dimension"] = d
    #train_config["training"]["num_iterations"] = num_iterations