def load_dataset(self): """ Loads a dataset, construct a trainloader. Additionally creates a dataset and DataLoader for the test data :return: :rtype: """ batch_size = self._config['training']['batch_size'] num_workers = self._config['training']['num_workers'] if self._dataset is None: self._dataset = SpartanDataset.make_default_10_scenes_drill() # self._dataset.load_all_pose_data() self._dataset.load_all_knots_info() self._dataset.set_parameters_from_training_config(self._config) self._data_loader = torch.utils.data.DataLoader(self._dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True) # create a test dataset if self._config["training"]["compute_test_loss"]: if self._dataset_test is None: self._dataset_test = SpartanDataset(mode="test", config=self._dataset.config) self._dataset_test.load_all_pose_data() self._dataset_test.set_parameters_from_training_config(self._config) self._data_loader_test = torch.utils.data.DataLoader(self._dataset_test, batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True)
def load_dataset(self): """ Loads a dataset, construct a trainloader. Additionally creates a dataset and DataLoader for the test data :return: :rtype: """ batch_size = self._config['training']['batch_size'] num_workers = self._config['training']['num_workers'] if self._dataset is None: self._dataset = SpartanDataset.make_default_10_scenes_drill() self._dataset.load_all_pose_data() self._dataset.set_parameters_from_training_config(self._config) self._data_loader = torch.utils.data.DataLoader(self._dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True) # create a test dataset if self._config["training"]["compute_test_loss"]: if self._dataset_test is None: self._dataset_test = SpartanDataset(mode="test", config=self._dataset.config) self._dataset_test.load_all_pose_data() self._dataset_test.set_parameters_from_training_config(self._config) self._data_loader_test = torch.utils.data.DataLoader(self._dataset_test, batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True)
def load_specific_dataset(self): dataset_config_filename = os.path.join( utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', 'rope_nonrigid_412vert_only.yaml') dataset_config = utils.getDictFromYamlFilename(dataset_config_filename) self._dataset = SpartanDataset(config=dataset_config)
def load_specific_dataset(self): dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', 'caterpillar_only_9.yaml') # dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', # 'dense_correspondence', # 'dataset', 'composite', '4_shoes_all.yaml') # st() dataset_config = utils.getDictFromYamlFilename(dataset_config_filename) self._dataset = SpartanDataset(config=dataset_config)
def load_dataset_from_config(self, config): """ Loads train and test datasets from the given config :param config: Dict gotten from a YAML file :type config: :return: None :rtype: """ self._dataset = SpartanDataset(mode="train", config=config) self._dataset_test = SpartanDataset(mode="test", config=config) self.load_dataset()
def pdc_train(dataset_config, train_config, dataset_name, logging_dir, num_iterations, dimension): # print("training args") # print(dataset_config) # print(train_config) # print(dataset_name) # print(logging_dir) # print(num_iterations) # print(dimension) print('dataset_name') print(dataset_name) dataset = SpartanDataset(config=dataset_config) d = dimension # the descriptor dimension name = dataset_name.split('/')[-1] + "_%d" %(d) train_config["training"]["logging_dir_name"] = name print('logging dir name') print(name) train_config["training"]["logging_dir"] = logging_dir train_config["dense_correspondence_network"]["descriptor_dimension"] = d train_config["training"]["num_iterations"] = num_iterations print "training descriptor of dimension %d" %(d) start_time = time.time() train = DenseCorrespondenceTraining(dataset=dataset, config=train_config) train.run() end_time = time.time() print "finished training descriptor of dimension %d using time %.2f seconds" %(d, end_time-start_time)
def get_different_object_loss(pixelwise_contrastive_loss, image_a_pred, image_b_pred, blind_non_matches_a, blind_non_matches_b): """ Simple wrapper for pixelwise_contrastive_loss functions. Args and return args documented above in get_loss() """ scale_by_hard_negatives = pixelwise_contrastive_loss.config[ "scale_by_hard_negatives_DIFFERENT_OBJECT"] blind_non_match_loss = zero_loss() if not (SpartanDataset.is_empty(blind_non_matches_a.data)): M_descriptor = pixelwise_contrastive_loss.config["M_background"] blind_non_match_loss, num_hard_negatives =\ pixelwise_contrastive_loss.non_match_loss_descriptor_only(image_a_pred, image_b_pred, blind_non_matches_a, blind_non_matches_b, M_descriptor=M_descriptor) if scale_by_hard_negatives: scale_factor = max(num_hard_negatives, 1) else: scale_factor = max(len(blind_non_matches_a), 1) blind_non_match_loss = 1.0 / scale_factor * blind_non_match_loss loss = blind_non_match_loss return loss, zero_loss(), zero_loss(), zero_loss(), blind_non_match_loss
def load_specific_dataset(self): dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', 'hats_3_demo_composite.yaml') dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', '4_shoes_all.yaml') dataset_config = utils.getDictFromYamlFilename(dataset_config_filename) self._dataset = SpartanDataset(config=dataset_config)
def load_training_dataset(self): """ Loads the dataset that this was trained on :return: a dataset object, loaded with the config as set in the dataset.yaml :rtype: SpartanDataset """ network_params_folder = self.path_to_network_params_folder network_params_folder = utils.convert_to_absolute_path(network_params_folder) dataset_config_file = os.path.join(network_params_folder, 'dataset.yaml') config = utils.getDictFromYamlFilename(dataset_config_file) return SpartanDataset(config_expanded=config)
def __init__(self, config_filename='shoes_all.yaml'): with HiddenPrints(): self.config_filename = os.path.join( utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', config_filename) self.train_config_filename = os.path.join( utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'training', 'training.yaml') self.config = utils.getDictFromYamlFilename(self.config_filename) self.train_config = utils.getDictFromYamlFilename( self.train_config_filename) self.dataset = SpartanDataset(config=self.config) self.dataset.set_parameters_from_training_config(self.train_config) # holds centroid and radius for each scene # these are for min and max z values currently. maybe include x, y, and z in the future. # self.centroid_and_radius[scene_name]["centroid"] or self.centroid_and_radius[scene_name]["radius"] self.centroid_and_radius = {}
def evaulate_model(model_lst, output_dir=None, num_image_pairs=100, gt_dataset_config=None): if not (gt_dataset_config is None): gt_dataset = SpartanDataset(config_expanded=gt_dataset_config) else: gt_dataset=None DCE = DenseCorrespondenceEvaluation for subdir in model_lst: print("evaluate model {}".format(subdir)) start_time = time.time() output_subdir = os.path.join(utils.get_data_dir(), output_dir, subdir.split('/')[-1]) DCE.run_evaluation_on_network(model_folder=subdir, compute_descriptor_statistics=True, cross_scene=False, output_dir=output_subdir, num_image_pairs=num_image_pairs,dataset=gt_dataset) end_time = time.time() print("evaluation takes %.2f seconds" %(end_time - start_time))
def get_same_object_across_scene_loss(pixelwise_contrastive_loss, image_a_pred, image_b_pred, blind_non_matches_a, blind_non_matches_b): """ Simple wrapper for pixelwise_contrastive_loss functions. Args and return args documented above in get_loss() """ blind_non_match_loss = zero_loss() if not (SpartanDataset.is_empty(blind_non_matches_a.data)): blind_non_match_loss, num_hard_negatives =\ pixelwise_contrastive_loss.non_match_loss_descriptor_only(image_a_pred, image_b_pred, blind_non_matches_a, blind_non_matches_b, M_descriptor=pcl._config["M_masked"], invert=True) if pixelwise_contrastive_loss._config["scale_by_hard_negatives"]: scale_factor = max(num_hard_negatives, 1) else: scale_factor = max(len(blind_non_matches_a), 1) loss = 1.0/scale_factor * blind_non_match_loss blind_non_match_loss_scaled = 1.0/scale_factor * blind_non_match_loss return loss, zero_loss(), zero_loss(), zero_loss(), blind_non_match_loss
def load_configuration(self): # config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', config_filename = os.path.join(DIR_PROJ, 'config', 'dense_correspondence', 'dataset', 'composite', 'caterpillar_only_9.yaml') config = utils.getDictFromYamlFilename(config_filename) # train_config_file = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', train_config_file = os.path.join(DIR_PROJ, 'config', 'dense_correspondence', 'training', 'training.yaml') self.train_config = utils.getDictFromYamlFilename(train_config_file) self.dataset = SpartanDataset(config=config) # st() logging_dir = "code/data_volume/pdc/trained_models/tutorials" num_iterations = 3500 descr_dim = 3 # the descriptor dimension self.train_config["training"][ "logging_dir_name"] = "caterpillar_%d" % (descr_dim) self.train_config["training"]["logging_dir"] = logging_dir self.train_config["dense_correspondence_network"][ "descriptor_dimension"] = descr_dim self.train_config["training"]["num_iterations"] = num_iterations
import dense_correspondence_manipulation.utils.utils as utils dc_source_dir = utils.getDenseCorrespondenceSourceDir() sys.path.append(dc_source_dir) sys.path.append( os.path.join(dc_source_dir, "dense_correspondence", "correspondence_tools")) from dense_correspondence.dataset.spartan_dataset_masked import SpartanDataset, ImageType from dense_correspondence_manipulation.simple_pixel_correspondence_labeler.annotate_correspondences import label_colors, draw_reticle, pil_image_to_cv2, drawing_scale_config config_filename = os.path.join( utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', 'caterpillar_baymax_starbot_onlymulti_front.yaml') config = utils.getDictFromYamlFilename(config_filename) sd = SpartanDataset(config=config) sd.set_train_mode() annotated_data_yaml_filename = os.path.join(os.getcwd(), "new_annotated_pairs.yaml") annotated_data = utils.getDictFromYamlFilename(annotated_data_yaml_filename) index_of_pair_to_display = 0 def draw_points(img, img_points_picked): for index, img_point in enumerate(img_points_picked): color = label_colors[index % len(label_colors)] draw_reticle(img, int(img_point["u"]), int(img_point["v"]), color)
class HeatmapVisualization(object): def __init__(self, config): self._config = config self._dce = DenseCorrespondenceEvaluation(EVAL_CONFIG) self._load_networks() self._reticle_color = COLOR_GREEN # self.load_specific_dataset() # uncomment if you want to load a specific dataset def _load_networks(self): # we will use the dataset for the first network in the series self._dcn_dict = dict() self._dataset = None self._network_reticle_color = dict() for idx, network_name in enumerate(self._config["networks"]): dcn = self._dce.load_network_from_config(network_name) dcn.eval() self._dcn_dict[network_name] = dcn # self._network_reticle_color[network_name] = label_colors[idx] if len(self._config["networks"]) == 1: self._network_reticle_color[network_name] = COLOR_RED else: self._network_reticle_color[network_name] = label_colors[idx] if self._dataset is None: self._dataset = dcn.load_training_dataset() def load_specific_dataset(self): dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', 'hats_3_demo_composite.yaml') dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', '4_shoes_all.yaml') dataset_config = utils.getDictFromYamlFilename(dataset_config_filename) self._dataset = SpartanDataset(config=dataset_config) def get_random_image_pair(self): object_id = self._dataset.get_random_object_id() scene_name_a = self._dataset.get_random_single_object_scene_name(object_id) scene_name_b = self._dataset.get_different_scene_for_object(object_id, scene_name_a) if self._config["randomize_images"]: image_a_idx = self._dataset.get_random_image_index(scene_name_a) image_b_idx = self._dataset.get_random_image_index(scene_name_b) else: image_a_idx = 0 image_b_idx = 0 # image_b_idx = self._dataset.get_random_image_index(scene_name_b) return scene_name_a, scene_name_b, image_a_idx, image_b_idx def get_random_image_pair_across_object(self): """ Gets cross object image pairs :param randomize: :type randomize: :return: :rtype: """ object_id_a, object_id_b = self._dataset.get_two_different_object_ids() # object_id_a = "shoe_red_nike.yaml" # object_id_b = "shoe_gray_nike" # object_id_b = "shoe_green_nike" scene_name_a = self._dataset.get_random_single_object_scene_name(object_id_a) scene_name_b = self._dataset.get_random_single_object_scene_name(object_id_b) if self._config["randomize_images"]: image_a_idx = self._dataset.get_random_image_index(scene_name_a) image_b_idx = self._dataset.get_random_image_index(scene_name_b) else: image_a_idx = 0 image_b_idx = 0 return scene_name_a, scene_name_b, image_a_idx, image_b_idx def get_random_image_pair_multi_object_scenes(self): """ Gets cross object image pairs :param randomize: :type randomize: :return: :rtype: """ scene_name_a = self._dataset.get_random_multi_object_scene_name() scene_name_b = self._dataset.get_random_multi_object_scene_name() if self._config["randomize_images"]: image_a_idx = self._dataset.get_random_image_index(scene_name_a) image_b_idx = self._dataset.get_random_image_index(scene_name_b) else: image_a_idx = 0 image_b_idx = 0 return scene_name_a, scene_name_b, image_a_idx, image_b_idx def _get_new_images(self): """ Gets a new pair of images :return: :rtype: """ if random.random() < 0.5: self._dataset.set_train_mode() else: self._dataset.set_test_mode() if self._config["same_object"]: scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair() elif self._config["different_objects"]: scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_across_object() elif self._config["multiple_object"]: scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_multi_object_scenes() else: raise ValueError("At least one of the image types must be set tot True") self.img1_pil = self._dataset.get_rgb_image_from_scene_name_and_idx(scene_name_1, image_1_idx) self.img2_pil = self._dataset.get_rgb_image_from_scene_name_and_idx(scene_name_2, image_2_idx) self._compute_descriptors() # self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(img1_pil) # self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(img2_pil) def _compute_descriptors(self): """ Computes the descriptors for image 1 and image 2 for each network :return: :rtype: """ self.img1 = pil_image_to_cv2(self.img1_pil) self.img2 = pil_image_to_cv2(self.img2_pil) self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(self.img1_pil) self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(self.img2_pil) self.img1_gray = cv2.cvtColor(self.img1, cv2.COLOR_RGB2GRAY) / 255.0 self.img2_gray = cv2.cvtColor(self.img2, cv2.COLOR_RGB2GRAY) / 255.0 cv2.imshow('source', self.img1) cv2.imshow('target', self.img2) self._res_a = dict() self._res_b = dict() for network_name, dcn in self._dcn_dict.iteritems(): self._res_a[network_name] = dcn.forward_single_image_tensor(self.rgb_1_tensor).data.cpu().numpy() self._res_b[network_name] = dcn.forward_single_image_tensor(self.rgb_2_tensor).data.cpu().numpy() self.find_best_match(None, 0, 0, None, None) def scale_norm_diffs_to_make_heatmap(self, norm_diffs, threshold): """ Scales the norm diffs to make a heatmap. This will be scaled between 0 and 1. 0 corresponds to a match, 1 to non-match :param norm_diffs: The norm diffs :type norm_diffs: numpy.array [H,W] :return: :rtype: """ heatmap = np.copy(norm_diffs) greater_than_threshold = np.where(norm_diffs > threshold) heatmap = heatmap / threshold * self._config["heatmap_vis_upper_bound"] # linearly scale [0, threshold] to [0, 0.5] heatmap[greater_than_threshold] = 1 # greater than threshold is set to 1 heatmap = heatmap.astype(self.img1_gray.dtype) return heatmap def find_best_match(self, event,u,v,flags,param): """ For each network, find the best match in the target image to point highlighted with reticle in the source image. Displays the result :return: :rtype: """ img_1_with_reticle = np.copy(self.img1) draw_reticle(img_1_with_reticle, u, v, self._reticle_color) cv2.imshow("source", img_1_with_reticle) alpha = self._config["blend_weight_original_image"] beta = 1 - alpha img_2_with_reticle = np.copy(self.img2) print "\n\n" self._res_uv = dict() # self._res_a_uv = dict() # self._res_b_uv = dict() for network_name in self._dcn_dict: res_a = self._res_a[network_name] res_b = self._res_b[network_name] best_match_uv, best_match_diff, norm_diffs = \ DenseCorrespondenceNetwork.find_best_match((u, v), res_a, res_b) print "\n\n" print "network_name:", network_name self._res_uv[network_name] = dict() self._res_uv[network_name]['source'] = res_a[v, u, :].tolist() self._res_uv[network_name]['target'] = res_b[v, u, :].tolist() # print "res_a[v, u, :]:", res_a[v, u, :] # print "res_b[v, u, :]:", res_b[v, u, :] print "%s best match diff: %.3f" %(network_name, best_match_diff) threshold = self._config["norm_diff_threshold"] if network_name in self._config["norm_diff_threshold_dict"]: threshold = self._config["norm_diff_threshold_dict"][network_name] heatmap = self.scale_norm_diffs_to_make_heatmap(norm_diffs, threshold) reticle_color = self._network_reticle_color[network_name] draw_reticle(heatmap, best_match_uv[0], best_match_uv[1], reticle_color) draw_reticle(img_2_with_reticle, best_match_uv[0], best_match_uv[1], reticle_color) blended = cv2.addWeighted(self.img2_gray, alpha, heatmap, beta, 0) cv2.imshow(network_name, blended) cv2.imshow("target", img_2_with_reticle) if event == cv2.EVENT_LBUTTONDOWN: utils.saveToYaml(self._res_uv, 'clicked_point.yaml') def run(self): self._get_new_images() cv2.namedWindow('target') cv2.setMouseCallback('source', self.find_best_match) self._get_new_images() while True: k = cv2.waitKey(20) & 0xFF if k == 27: break elif k == ord('n'): print "HEY" self._get_new_images() elif k == ord('s'): print "HEY" img1_pil = self.img1_pil img2_pil = self.img2_pil self.img1_pil = img2_pil self.img2_pil = img1_pil self._compute_descriptors()
class HeatmapVisualization(object): """ Launches a live interactive heatmap visualization. Edit config/dense_correspondence/heatmap_vis/heatmap.yaml to specify which networks to visualize. Specifically add the network you want to visualize to the "networks" list. Make sure that this network appears in the file pointed to by EVAL_CONFIG Usage: Launch this file with python after sourcing the environment with `use_pytorch_dense_correspondence` Then `python live_heatmap_visualization.py`. """ def __init__(self, config): self._config = config self._dce = DenseCorrespondenceEvaluation(EVAL_CONFIG) self._load_networks() self._reticle_color = COLOR_GREEN self.load_specific_dataset( ) # uncomment if you want to load a specific dataset def _load_networks(self): # we will use the dataset for the first network in the series self._dcn_dict = dict() self._dataset = None self._network_reticle_color = dict() for idx, network_name in enumerate(self._config["networks"]): dcn = self._dce.load_network_from_config(network_name) dcn.eval() self._dcn_dict[network_name] = dcn if len(self._config["networks"]) == 1: self._network_reticle_color[network_name] = COLOR_RED else: self._network_reticle_color[network_name] = label_colors[idx] if self._dataset is None: self._dataset = dcn.load_training_dataset() def load_specific_dataset(self): dataset_config_filename = os.path.join( utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', 'rope_nonrigid_412vert_only.yaml') dataset_config = utils.getDictFromYamlFilename(dataset_config_filename) self._dataset = SpartanDataset(config=dataset_config) #self._dataset.get_knots_info('rope_nonrigid_412vert') def get_random_image_pair(self): object_id = self._dataset.get_random_object_id() scene_name_a = self._dataset.get_random_single_object_scene_name( object_id) scene_name_b = self._dataset.get_random_single_object_scene_name( object_id) image_a_idx = self._dataset.get_random_image_index(scene_name_a) image_b_idx = self._dataset.get_random_image_index(scene_name_b) return scene_name_a, scene_name_b, image_a_idx, image_b_idx def _get_new_images(self): """ Gets a new pair of images :return: :rtype: """ if random.random() <= 1.0: self._dataset.set_train_mode() else: self._dataset.set_test_mode() if self._config["same_object"]: print "getting random image pair" scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair( ) else: raise ValueError( "At least one of the image types must be set tot True") print "got pair" self.img1_pil = self._dataset.get_rgb_image_from_scene_name_and_idx( scene_name_1, image_1_idx) print "got img1" self.img1_knots = self._dataset._knots_info[scene_name_1][image_1_idx] print "got img1 knots" self.img2_pil = self._dataset.get_rgb_image_from_scene_name_and_idx( scene_name_2, image_2_idx) self.img2_knots = self._dataset._knots_info[scene_name_2][image_2_idx] def _get_task_images(self): self.img1_pil = self._dataset.get_rgb_image('./images/000025_rgb.png') img1_mask = self._dataset.get_mask_image( './image_masks/000025_mask.png') self.img2_pil = self._dataset.get_rgb_image('./images/000018_rgb.png') pixs = correspondence_finder.random_sample_from_masked_image_torch( np.asarray(img1_mask), 25) self.img1_knots = list(zip(pixs[0], pixs[1])) #self.img1_knots = utils.getDictFromJSONFilename('./images_start/knots_info.json')["0"][0] #self.img2_knots = utils.getDictFromJSONFilename('./images_goal/knots_info.json')["0"][0] def _compute_descriptors(self, knot_idx): """ Computes the descriptors for image 1 and image 2 for each network :return: :rtype: """ self.img1 = pil_image_to_cv2(self.img1_pil) self.img2 = pil_image_to_cv2(self.img2_pil) self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(self.img1_pil) self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(self.img2_pil) self.img1_gray = cv2.cvtColor(self.img1, cv2.COLOR_RGB2GRAY) / 255.0 self.img2_gray = cv2.cvtColor(self.img2, cv2.COLOR_RGB2GRAY) / 255.0 self._res_a = dict() self._res_b = dict() for network_name, dcn in self._dcn_dict.iteritems(): self._res_a[network_name] = dcn.forward_single_image_tensor( self.rgb_1_tensor).data.cpu().numpy() self._res_b[network_name] = dcn.forward_single_image_tensor( self.rgb_2_tensor).data.cpu().numpy() print(self.img1_knots) u, v = self.img1_knots[knot_idx] source, blended, target, p = self.find_best_match( None, u, v, None, None) return (source, blended, target, p) def scale_norm_diffs_to_make_heatmap(self, norm_diffs, threshold): """ Scales the norm diffs to make a heatmap. This will be scaled between 0 and 1. 0 corresponds to a match, 1 to non-match :param norm_diffs: The norm diffs :type norm_diffs: numpy.array [H,W] :return: :rtype: """ heatmap = np.copy(norm_diffs) greater_than_threshold = np.where(norm_diffs > threshold) heatmap = heatmap / threshold * self._config[ "heatmap_vis_upper_bound"] # linearly scale [0, threshold] to [0, 0.5] heatmap[ greater_than_threshold] = 1 # greater than threshold is set to 1 heatmap = heatmap.astype(self.img1_gray.dtype) return heatmap def find_best_match(self, event, u, v, flags, param): """ For each network, find the best match in the target image to point highlighted with reticle in the source image. Displays the result :return: :rtype: """ img_1_with_reticle = np.copy(self.img1) draw_reticle(img_1_with_reticle, u, v, self._reticle_color) source = img_1_with_reticle alpha = self._config["blend_weight_original_image"] beta = 1 - alpha img_2_with_reticle = np.copy(self.img2) self._res_uv = dict() for network_name in self._dcn_dict: res_a = self._res_a[network_name] res_b = self._res_b[network_name] best_match_uv, best_match_diff, norm_diffs = \ DenseCorrespondenceNetwork.find_best_match((u, v), res_a, res_b) print "network_name:", network_name self._res_uv[network_name] = dict() self._res_uv[network_name]['source'] = res_a[v, u, :].tolist() self._res_uv[network_name]['target'] = res_b[v, u, :].tolist() print "%s best match diff: %.3f" % (network_name, best_match_diff) threshold = self._config["norm_diff_threshold"] if network_name in self._config["norm_diff_threshold_dict"]: threshold = self._config["norm_diff_threshold_dict"][ network_name] heatmap = self.scale_norm_diffs_to_make_heatmap( norm_diffs, threshold) reticle_color = self._network_reticle_color[network_name] draw_reticle(heatmap, best_match_uv[0], best_match_uv[1], reticle_color) draw_reticle(img_2_with_reticle, best_match_uv[0], best_match_uv[1], reticle_color) blended = cv2.addWeighted(self.img2_gray, alpha, heatmap, beta, 0) target = img_2_with_reticle return (source, blended, target, [best_match_uv[0], best_match_uv[1]]) def run(self): self._get_task_images() pixels = [] # for i in range(len(utils.getDictFromJSONFilename('./images_start/knots_info.json')["0"][0])): for i in range(25): print "computing descriptors" source, blended, target, p = self._compute_descriptors(i) pixels.append(p) print "computed descriptors" vis = np.concatenate((source, target), axis=1) print "concatenated, writing image" cv2.imwrite("/home/priya/code/data_volume/annotated/%06d.png" % i, vis) np.savetxt('pixels_pred.txt', pixels)
from dense_correspondence.training.training import DenseCorrespondenceTraining from dense_correspondence.dataset.spartan_dataset_masked import SpartanDataset logging.basicConfig(level=logging.INFO) from dense_correspondence.evaluation.evaluation import DenseCorrespondenceEvaluation config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', 'toy.yaml') config = utils.getDictFromYamlFilename(config_filename) train_config_file = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'training', 'toy_training.yaml') train_config = utils.getDictFromYamlFilename(train_config_file) dataset = SpartanDataset(config=config) logging_dir = "/home/zhouxian/git/pytorch-dense-correspondence/pdc/trained_models/tutorials" d = 3 # the descriptor dimension name = "toy_hacker_%d" %(d) train_config["training"]["logging_dir_name"] = name train_config["training"]["logging_dir"] = logging_dir train_config["dense_correspondence_network"]["descriptor_dimension"] = d TRAIN = True EVALUATE = True if TRAIN: print "training descriptor of dimension %d" %(d) train = DenseCorrespondenceTraining(dataset=dataset, config=train_config) train.run()
import sys import os import cv2 import numpy as np import copy import dense_correspondence_manipulation.utils.utils as utils dc_source_dir = utils.getDenseCorrespondenceSourceDir() sys.path.append(dc_source_dir) sys.path.append(os.path.join(dc_source_dir, "dense_correspondence", "correspondence_tools")) from dense_correspondence.dataset.spartan_dataset_masked import SpartanDataset, ImageType config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', 'star_bot_front_only.yaml') config = utils.getDictFromYamlFilename(config_filename) sd = SpartanDataset(config=config) sd.set_train_mode() USE_FIRST_IMAGE = False # force using first image in each log RANDOMIZE_TEST_TRAIN = False # randomize seletcting def numpy_to_cv2(numpy_img): return numpy_img[:, :, ::-1].copy() # open and convert between BGR and RGB def pil_image_to_cv2(pil_image): return np.array(pil_image)[:, :, ::-1].copy() # open and convert between BGR and RGB def get_cv2_img_pair_from_spartan(): scene_name_a = sd.get_random_scene_name() num_attempts = 50 for i in range(num_attempts):
def make_default(): dataset = SpartanDataset.make_default_caterpillar() return DenseCorrespondenceTraining(dataset=dataset)
class DenseCorrespondenceTraining(object): def __init__(self, config=None, dataset=None, dataset_test=None): if config is None: config = DenseCorrespondenceTraining.load_default_config() self._config = config self._dataset = dataset self._dataset_test = dataset_test self._dcn = None self._optimizer = None def setup(self): """ Initializes the object :return: :rtype: """ self.load_dataset() self.setup_logging_dir() self.setup_visdom() self.setup_tensorboard() @property def dataset(self): return self._dataset @dataset.setter def dataset(self, value): self._dataset = value def load_dataset(self): """ Loads a dataset, construct a trainloader. Additionally creates a dataset and DataLoader for the test data :return: :rtype: """ batch_size = self._config['training']['batch_size'] num_workers = self._config['training']['num_workers'] if self._dataset is None: self._dataset = SpartanDataset.make_default_10_scenes_drill() self._dataset.load_all_pose_data() self._dataset.set_parameters_from_training_config(self._config) self._data_loader = torch.utils.data.DataLoader(self._dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True) # create a test dataset if self._config["training"]["compute_test_loss"]: if self._dataset_test is None: self._dataset_test = SpartanDataset(mode="test", config=self._dataset.config) self._dataset_test.load_all_pose_data() self._dataset_test.set_parameters_from_training_config(self._config) self._data_loader_test = torch.utils.data.DataLoader(self._dataset_test, batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True) def load_dataset_from_config(self, config): """ Loads train and test datasets from the given config :param config: Dict gotten from a YAML file :type config: :return: None :rtype: """ self._dataset = SpartanDataset(mode="train", config=config) self._dataset_test = SpartanDataset(mode="test", config=config) self.load_dataset() def build_network(self): """ Builds the DenseCorrespondenceNetwork :return: :rtype: DenseCorrespondenceNetwork """ return DenseCorrespondenceNetwork.from_config(self._config['dense_correspondence_network'], load_stored_params=False) def _construct_optimizer(self, parameters): """ Constructs the optimizer :param parameters: Parameters to adjust in the optimizer :type parameters: :return: Adam Optimizer with params from the config :rtype: torch.optim """ learning_rate = float(self._config['training']['learning_rate']) weight_decay = float(self._config['training']['weight_decay']) optimizer = optim.Adam(parameters, lr=learning_rate, weight_decay=weight_decay) return optimizer def _get_current_loss(self, logging_dict): """ Gets the current loss for both test and train :return: :rtype: dict """ d = dict() d['train'] = dict() d['test'] = dict() for key, val in d.iteritems(): for field in logging_dict[key].keys(): vec = logging_dict[key][field] if len(vec) > 0: val[field] = vec[-1] else: val[field] = -1 # placeholder return d def load_pretrained(self, model_folder, iteration=None): """ Loads network and optimizer parameters from a previous training run. Note: It is up to the user to ensure that the model parameters match. e.g. width, height, descriptor dimension etc. :param model_folder: location of the folder containing the param files 001000.pth. Can be absolute or relative path. If relative then it is relative to pdc/trained_models/ :type model_folder: :param iteration: which index to use, e.g. 3500, if None it loads the latest one :type iteration: :return: iteration :rtype: """ if not os.path.isdir(model_folder): pdc_path = utils.getPdcPath() model_folder = os.path.join(pdc_path, "trained_models", model_folder) # find idx.pth and idx.pth.opt files if iteration is None: files = os.listdir(model_folder) model_param_file = sorted(fnmatch.filter(files, '*.pth'))[-1] iteration = int(model_param_file.split(".")[0]) optim_param_file = sorted(fnmatch.filter(files, '*.pth.opt'))[-1] else: prefix = utils.getPaddedString(iteration, width=6) model_param_file = prefix + ".pth" optim_param_file = prefix + ".pth.opt" print "model_param_file", model_param_file model_param_file = os.path.join(model_folder, model_param_file) optim_param_file = os.path.join(model_folder, optim_param_file) self._dcn = self.build_network() self._dcn.load_state_dict(torch.load(model_param_file)) self._dcn.cuda() self._dcn.train() self._optimizer = self._construct_optimizer(self._dcn.parameters()) self._optimizer.load_state_dict(torch.load(optim_param_file)) return iteration def run_from_pretrained(self, model_folder, iteration=None, learning_rate=None): """ Wrapper for load_pretrained(), then run() """ iteration = self.load_pretrained(model_folder, iteration) if iteration is None: iteration = 0 if learning_rate is not None: self._config["training"]["learning_rate_starting_from_pretrained"] = learning_rate self.set_learning_rate(self._optimizer, learning_rate) self.run(loss_current_iteration=iteration, use_pretrained=True) def run(self, loss_current_iteration=0, use_pretrained=False): """ Runs the training :return: :rtype: """ start_iteration = copy.copy(loss_current_iteration) DCE = DenseCorrespondenceEvaluation self.setup() self.save_configs() if not use_pretrained: # create new network and optimizer self._dcn = self.build_network() self._optimizer = self._construct_optimizer(self._dcn.parameters()) else: logging.info("using pretrained model") if (self._dcn is None): raise ValueError("you must set self._dcn if use_pretrained=True") if (self._optimizer is None): raise ValueError("you must set self._optimizer if use_pretrained=True") # make sure network is using cuda and is in train mode dcn = self._dcn dcn.cuda() dcn.train() optimizer = self._optimizer batch_size = self._data_loader.batch_size pixelwise_contrastive_loss = PixelwiseContrastiveLoss(image_shape=dcn.image_shape, config=self._config['loss_function']) pixelwise_contrastive_loss.debug = True loss = match_loss = non_match_loss = 0 max_num_iterations = self._config['training']['num_iterations'] + start_iteration logging_rate = self._config['training']['logging_rate'] save_rate = self._config['training']['save_rate'] compute_test_loss_rate = self._config['training']['compute_test_loss_rate'] # logging self._logging_dict = dict() self._logging_dict['train'] = {"iteration": [], "loss": [], "match_loss": [], "masked_non_match_loss": [], "background_non_match_loss": [], "blind_non_match_loss": [], "learning_rate": [], "different_object_non_match_loss": []} self._logging_dict['test'] = {"iteration": [], "loss": [], "match_loss": [], "non_match_loss": []} # save network before starting if not use_pretrained: self.save_network(dcn, optimizer, 0) for epoch in range(50): # loop over the dataset multiple times for i, data in enumerate(self._data_loader, 0): loss_current_iteration += 1 start_iter = time.time() match_type, \ img_a, img_b, \ matches_a, matches_b, \ masked_non_matches_a, masked_non_matches_b, \ background_non_matches_a, background_non_matches_b, \ blind_non_matches_a, blind_non_matches_b, \ metadata = data if (match_type == -1).all(): print "\n empty data, continuing \n" continue data_type = metadata["type"][0] img_a = Variable(img_a.cuda(), requires_grad=False) img_b = Variable(img_b.cuda(), requires_grad=False) matches_a = Variable(matches_a.cuda().squeeze(0), requires_grad=False) matches_b = Variable(matches_b.cuda().squeeze(0), requires_grad=False) masked_non_matches_a = Variable(masked_non_matches_a.cuda().squeeze(0), requires_grad=False) masked_non_matches_b = Variable(masked_non_matches_b.cuda().squeeze(0), requires_grad=False) background_non_matches_a = Variable(background_non_matches_a.cuda().squeeze(0), requires_grad=False) background_non_matches_b = Variable(background_non_matches_b.cuda().squeeze(0), requires_grad=False) blind_non_matches_a = Variable(blind_non_matches_a.cuda().squeeze(0), requires_grad=False) blind_non_matches_b = Variable(blind_non_matches_b.cuda().squeeze(0), requires_grad=False) optimizer.zero_grad() self.adjust_learning_rate(optimizer, loss_current_iteration) # run both images through the network image_a_pred = dcn.forward(img_a) image_a_pred = dcn.process_network_output(image_a_pred, batch_size) image_b_pred = dcn.forward(img_b) image_b_pred = dcn.process_network_output(image_b_pred, batch_size) # get loss loss, match_loss, masked_non_match_loss, \ background_non_match_loss, blind_non_match_loss = loss_composer.get_loss(pixelwise_contrastive_loss, match_type, image_a_pred, image_b_pred, matches_a, matches_b, masked_non_matches_a, masked_non_matches_b, background_non_matches_a, background_non_matches_b, blind_non_matches_a, blind_non_matches_b) loss.backward() optimizer.step() elapsed = time.time() - start_iter print "single iteration took %.3f seconds" %(elapsed) def update_visdom_plots(loss, match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss): """ Updates the visdom plots with current loss function information :return: :rtype: """ learning_rate = DenseCorrespondenceTraining.get_learning_rate(optimizer) self._logging_dict['train']['learning_rate'].append(learning_rate) self._visdom_plots['learning_rate'].log(loss_current_iteration, learning_rate) self._tensorboard_logger.log_value("learning rate", learning_rate, loss_current_iteration) # Don't update any plots if the entry corresponding to that term # is a zero loss if not loss_composer.is_zero_loss(match_loss): self._logging_dict['train']['match_loss'].append(match_loss.data[0]) self._visdom_plots['train']['match_loss'].log(loss_current_iteration, match_loss.data[0]) self._tensorboard_logger.log_value("train match loss", match_loss.data[0], loss_current_iteration) if not loss_composer.is_zero_loss(masked_non_match_loss): self._logging_dict['train']['masked_non_match_loss'].append(masked_non_match_loss.data[0]) self._visdom_plots['train']['masked_non_match_loss'].log(loss_current_iteration, masked_non_match_loss.data[0]) self._tensorboard_logger.log_value("train masked non match loss", masked_non_match_loss.data[0], loss_current_iteration) if not loss_composer.is_zero_loss(background_non_match_loss): self._logging_dict['train']['background_non_match_loss'].append(background_non_match_loss.data[0]) self._visdom_plots['train']['background_non_match_loss'].log(loss_current_iteration, background_non_match_loss.data[0]) self._tensorboard_logger.log_value("train background non match loss", background_non_match_loss.data[0], loss_current_iteration) if not loss_composer.is_zero_loss(blind_non_match_loss): if data_type == SpartanDatasetDataType.SINGLE_OBJECT_WITHIN_SCENE: self._tensorboard_logger.log_value("train blind SINGLE_OBJECT_WITHIN_SCENE", blind_non_match_loss.data[0], loss_current_iteration) if data_type == SpartanDatasetDataType.DIFFERENT_OBJECT: self._tensorboard_logger.log_value("train blind DIFFERENT_OBJECT", blind_non_match_loss.data[0], loss_current_iteration) # loss is never zero if data_type == SpartanDatasetDataType.SINGLE_OBJECT_WITHIN_SCENE: self._tensorboard_logger.log_value("train loss SINGLE_OBJECT_WITHIN_SCENE", loss.data[0], loss_current_iteration) elif data_type == SpartanDatasetDataType.DIFFERENT_OBJECT: self._tensorboard_logger.log_value("train loss DIFFERENT_OBJECT", loss.data[0], loss_current_iteration) elif data_type == SpartanDatasetDataType.SINGLE_OBJECT_ACROSS_SCENE: self._tensorboard_logger.log_value("train loss SINGLE_OBJECT_ACROSS_SCENE", loss.data[0], loss_current_iteration) elif data_type == SpartanDatasetDataType.MULTI_OBJECT: self._tensorboard_logger.log_value("train loss MULTI_OBJECT", loss.data[0], loss_current_iteration) elif data_type == SpartanDatasetDataType.SYNTHETIC_MULTI_OBJECT: self._tensorboard_logger.log_value("train loss SYNTHETIC_MULTI_OBJECT", loss.data[0], loss_current_iteration) else: raise ValueError("unknown data type") if data_type == SpartanDatasetDataType.DIFFERENT_OBJECT: self._tensorboard_logger.log_value("train different object", loss.data[0], loss_current_iteration) # #non_match_type = metadata['non_match_type'][0] # fraction_hard_negatives = pixelwise_contrastive_loss.debug_data['fraction_hard_negatives'] # if pixelwise_contrastive_loss.debug: # if non_match_type == "masked": # self._visdom_plots['masked_hard_negative_rate'].log(loss_current_iteration, fraction_hard_negatives) # self._tensorboard_logger.log_value("masked hard negative rate", fraction_hard_negatives, loss_current_iteration) # elif non_match_type == "non_masked": # self._visdom_plots['non_masked_hard_negative_rate'].log(loss_current_iteration, # fraction_hard_negatives) # self._tensorboard_logger.log_value("non-masked hard negative rate", fraction_hard_negatives, # loss_current_iteration) # else: # raise ValueError("uknown non_match_type %s" %(non_match_type)) # def update_visdom_test_loss_plots(test_loss, test_match_loss, test_non_match_loss): # """ # Log data about test loss and update the visdom plots # :return: # :rtype: # """ # self._logging_dict['test']['loss'].append(test_loss) # self._logging_dict['test']['match_loss'].append(test_match_loss) # self._logging_dict['test']['non_match_loss'].append(test_non_match_loss) # self._logging_dict['test']['iteration'].append(loss_current_iteration) # self._visdom_plots['test']['loss'].log(loss_current_iteration, test_loss) # self._visdom_plots['test']['match_loss'].log(loss_current_iteration, test_match_loss) # self._visdom_plots['test']['non_match_loss'].log(loss_current_iteration, test_non_match_loss) # self._tensorboard_logger.log_value('test loss', test_loss, loss_current_iteration) # self._tensorboard_logger.log_value('test match loss', test_match_loss, loss_current_iteration) # self._tensorboard_logger.log_value('test non-match loss', test_non_match_loss, loss_current_iteration) update_visdom_plots(loss, match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss) if loss_current_iteration % save_rate == 0: self.save_network(dcn, optimizer, loss_current_iteration, logging_dict=self._logging_dict) if loss_current_iteration % logging_rate == 0: logging.info("Training on iteration %d of %d" %(loss_current_iteration, max_num_iterations)) logging.info("single iteration took %.3f seconds" %(elapsed)) percent_complete = loss_current_iteration * 100.0/(max_num_iterations - start_iteration) logging.info("Training is %d percent complete\n" %(percent_complete)) # don't compute the test loss on the first few times through the loop if self._config["training"]["compute_test_loss"] and (loss_current_iteration % compute_test_loss_rate == 0) and loss_current_iteration > 5: logging.info("Computing test loss") # delete the loss, match_loss, non_match_loss variables so that # pytorch can use that GPU memory del loss, match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss gc.collect() dcn.eval() test_loss, test_match_loss, test_non_match_loss = DCE.compute_loss_on_dataset(dcn, self._data_loader_test, self._config['loss_function'], num_iterations=self._config['training']['test_loss_num_iterations']) update_visdom_test_loss_plots(test_loss, test_match_loss, test_non_match_loss) # delete these variables so we can free GPU memory del test_loss, test_match_loss, test_non_match_loss # make sure to set the network back to train mode dcn.train() if loss_current_iteration % self._config['training']['garbage_collect_rate'] == 0: logging.debug("running garbage collection") gc_start = time.time() gc.collect() gc_elapsed = time.time() - gc_start logging.debug("garbage collection took %.2d seconds" %(gc_elapsed)) if loss_current_iteration > max_num_iterations: logging.info("Finished testing after %d iterations" % (max_num_iterations)) self.save_network(dcn, optimizer, loss_current_iteration, logging_dict=self._logging_dict) return def setup_logging_dir(self): """ Sets up the directory where logs will be stored and config files written :return: full path of logging dir :rtype: str """ if 'logging_dir_name' in self._config['training']: dir_name = self._config['training']['logging_dir_name'] else: dir_name = utils.get_current_time_unique_name() +"_" + str(self._config['dense_correspondence_network']['descriptor_dimension']) + "d" self._logging_dir_name = dir_name self._logging_dir = os.path.join(utils.convert_to_absolute_path(self._config['training']['logging_dir']), dir_name) if os.path.isdir(self._logging_dir): shutil.rmtree(self._logging_dir) if not os.path.isdir(self._logging_dir): os.makedirs(self._logging_dir) # make the tensorboard log directory self._tensorboard_log_dir = os.path.join(self._logging_dir, "tensorboard") if not os.path.isdir(self._tensorboard_log_dir): os.makedirs(self._tensorboard_log_dir) return self._logging_dir def save_network(self, dcn, optimizer, iteration, logging_dict=None): """ Saves network parameters to logging directory :return: :rtype: None """ network_param_file = os.path.join(self._logging_dir, utils.getPaddedString(iteration, width=6) + ".pth") optimizer_param_file = network_param_file + ".opt" torch.save(dcn.state_dict(), network_param_file) torch.save(optimizer.state_dict(), optimizer_param_file) # also save loss history stuff if logging_dict is not None: log_history_file = os.path.join(self._logging_dir, utils.getPaddedString(iteration, width=6) + "_log_history.yaml") utils.saveToYaml(logging_dict, log_history_file) current_loss_file = os.path.join(self._logging_dir, 'loss.yaml') current_loss_data = self._get_current_loss(logging_dict) utils.saveToYaml(current_loss_data, current_loss_file) def save_configs(self): """ Saves config files to the logging directory :return: :rtype: None """ training_params_file = os.path.join(self._logging_dir, 'training.yaml') utils.saveToYaml(self._config, training_params_file) dataset_params_file = os.path.join(self._logging_dir, 'dataset.yaml') utils.saveToYaml(self._dataset.config, dataset_params_file) def adjust_learning_rate(self, optimizer, iteration): """ Adjusts the learning rate according to the schedule :param optimizer: :type optimizer: :param iteration: :type iteration: :return: :rtype: """ steps_between_learning_rate_decay = self._config['training']['steps_between_learning_rate_decay'] if iteration % steps_between_learning_rate_decay == 0: for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] * self._config["training"]["learning_rate_decay"] @staticmethod def set_learning_rate(optimizer, learning_rate): for param_group in optimizer.param_groups: param_group['lr'] = learning_rate @staticmethod def get_learning_rate(optimizer): for param_group in optimizer.param_groups: lr = param_group['lr'] break return lr def setup_visdom(self): """ Sets up visdom visualizer :return: :rtype: """ self.start_visdom() self._visdom_env = self._logging_dir_name self._vis = visdom.Visdom(env=self._visdom_env) self._port = 8097 self._visdom_plots = dict() self._visdom_plots["train"] = dict() self._visdom_plots['train']['loss'] = VisdomPlotLogger( 'line', port=self._port, opts={'title': 'Train Loss'}, env=self._visdom_env) self._visdom_plots['learning_rate'] = VisdomPlotLogger( 'line', port=self._port, opts={'title': 'Learning Rate'}, env=self._visdom_env) self._visdom_plots['train']['match_loss'] = VisdomPlotLogger( 'line', port=self._port, opts={'title': 'Train Match Loss'}, env=self._visdom_env) self._visdom_plots['train']['masked_non_match_loss'] = VisdomPlotLogger( 'line', port=self._port, opts={'title': 'Train Masked Non Match Loss'}, env=self._visdom_env) self._visdom_plots['train']['background_non_match_loss'] = VisdomPlotLogger( 'line', port=self._port, opts={'title': 'Train Background Non Match Loss'}, env=self._visdom_env) self._visdom_plots['train']['blind_non_match_loss'] = VisdomPlotLogger( 'line', port=self._port, opts={'title': 'Train Blind Non Match Loss'}, env=self._visdom_env) self._visdom_plots["test"] = dict() self._visdom_plots['test']['loss'] = VisdomPlotLogger( 'line', port=self._port, opts={'title': 'Test Loss'}, env=self._visdom_env) self._visdom_plots['test']['match_loss'] = VisdomPlotLogger( 'line', port=self._port, opts={'title': 'Test Match Loss'}, env=self._visdom_env) self._visdom_plots['test']['non_match_loss'] = VisdomPlotLogger( 'line', port=self._port, opts={'title': 'Test Non Match Loss'}, env=self._visdom_env) self._visdom_plots['masked_hard_negative_rate'] = VisdomPlotLogger( 'line', port=self._port, opts={'title': 'Masked Matches Hard Negative Rate'}, env=self._visdom_env) self._visdom_plots['non_masked_hard_negative_rate'] = VisdomPlotLogger( 'line', port=self._port, opts={'title': 'Non-Masked Hard Negative Rate'}, env=self._visdom_env) def setup_tensorboard(self): """ Starts the tensorboard server and sets up the plotting :return: :rtype: """ # start tensorboard # cmd = "python -m tensorboard.main" logging.info("setting up tensorboard_logger") cmd = "tensorboard --logdir=%s" %(self._tensorboard_log_dir) self._tensorboard_logger = tensorboard_logger.Logger(self._tensorboard_log_dir) logging.info("tensorboard logger started") @staticmethod def load_default_config(): dc_source_dir = utils.getDenseCorrespondenceSourceDir() config_file = os.path.join(dc_source_dir, 'config', 'dense_correspondence', 'training', 'training.yaml') config = utils.getDictFromYamlFilename(config_file) return config @staticmethod def make_default(): dataset = SpartanDataset.make_default_caterpillar() return DenseCorrespondenceTraining(dataset=dataset) @staticmethod def start_visdom(): """ Starts visdom if it's not already running :return: :rtype: """ vis = visdom.Visdom() if vis.check_connection(): logging.info("Visdom already running, returning") return logging.info("Starting visdom") cmd = "python -m visdom.server" subprocess.Popen([cmd], shell=True)
class DonDataLoader(object): """ Data loader class that takes from the pytorch-dense-correspondence dataset. """ def __init__(self, config_filename='shoes_all.yaml'): with HiddenPrints(): self.config_filename = os.path.join( utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', config_filename) self.train_config_filename = os.path.join( utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'training', 'training.yaml') self.config = utils.getDictFromYamlFilename(self.config_filename) self.train_config = utils.getDictFromYamlFilename( self.train_config_filename) self.dataset = SpartanDataset(config=self.config) self.dataset.set_parameters_from_training_config(self.train_config) # holds centroid and radius for each scene # these are for min and max z values currently. maybe include x, y, and z in the future. # self.centroid_and_radius[scene_name]["centroid"] or self.centroid_and_radius[scene_name]["radius"] self.centroid_and_radius = {} def get_random_scene_from_object_id(self, object_id=None): # set to first object_id if not specified if object_id is None: object_id = list(self.dataset._single_object_scene_dict.keys())[0] # list of scenes from training set scenes = self.dataset._single_object_scene_dict[object_id]["train"] scene = scenes[random.randint(0, len(scenes) - 1)] print("scene: {}".format(scene)) return scene def get_frame_idx_pair_from_scene_name(self, scene_name): """Returns the indices of two frames in the scene.""" frames = list(self.dataset.get_pose_data(scene_name).keys()) frame_idx_a = frames[random.randint(0, len(frames) - 1)] frame_idx_b = frames[random.randint(0, len(frames) - 1)] print("frame_idx_a: {}, frame_idx_b: {}".format( frame_idx_a, frame_idx_b)) return (frame_idx_a, frame_idx_b) def get_camera_intrinsics_matrix(self, scene_name): intrinsics = self.dataset.get_camera_intrinsics(scene_name) K = intrinsics.get_camera_matrix() return K def mask_is_contained(self, mask): """ return True if mask is fully contained in image. False otherwise inputs: mask as numpy array """ y_max, x_max = mask.shape # (height, width) # check top and bottom rows for i in range(x_max): if mask[0, i] != 0.0 or mask[y_max - 1, i] != 0.0: return False # check left and right cols for i in range(y_max): if mask[i, 0] != 0.0 or mask[i, x_max - 1] != 0.0: return False return True def set_centroid_and_radius_for_scene(self, scene_name): """ sets the centroid and radius for scene with the min and max z value in the scene """ all_frames = list(self.dataset.get_pose_data(scene_name).keys()) global_min_depth = float("inf") global_max_depth = 0.0 for frame in all_frames: # ethan: this might be fragile, so come back to it try: rgb_a, depth_a, mask_a, pose_a = self.dataset.get_rgbd_mask_pose( scene_name, frame) masked_depth = np.array(mask_a) * np.array(depth_a) min_depth = masked_depth[masked_depth > 0].min() / 1000.0 max_depth = masked_depth[masked_depth > 0].max() / 1000.0 global_min_depth = min(global_min_depth, min_depth) global_max_depth = max(global_max_depth, max_depth) except: pass z_min = global_min_depth z_max = global_max_depth radius = (z_max - z_min) / 2.0 centroid = radius + z_min self.centroid_and_radius[scene_name] = {} self.centroid_and_radius[scene_name]["centroid"] = centroid self.centroid_and_radius[scene_name]["radius"] = radius def get_random_data_pair(self): # this will return a random data pair found = False while not found: # choose data from one frame scene_name = self.get_random_scene_from_object_id() # cache the values if they haven't been scene before if scene_name not in self.centroid_and_radius: self.set_centroid_and_radius_for_scene(scene_name) # set the cached values if this scene has not been scene before frame_idx_a, frame_idx_b = self.get_frame_idx_pair_from_scene_name( scene_name) K = self.get_camera_intrinsics_matrix(scene_name) rgb_a, depth_a, mask_a, pose_a = self.dataset.get_rgbd_mask_pose( scene_name, frame_idx_a) rgb_b, depth_b, mask_b, pose_b = self.dataset.get_rgbd_mask_pose( scene_name, frame_idx_b) # check that both masks are fully visible found = self.mask_is_contained( np.array(mask_a)) and self.mask_is_contained(np.array(mask_b)) a_image_data = [rgb_a, depth_a, mask_a, pose_a] b_image_data = [rgb_b, depth_b, mask_b, pose_b] return K, a_image_data, b_image_data, scene_name
class HeatmapVisualization(object): def __init__(self, config): self._config = config self._dce = DenseCorrespondenceEvaluation(EVAL_CONFIG) self._load_networks() self._reticle_color = COLOR_GREEN # self.load_specific_dataset() # uncomment if you want to load a specific dataset def _load_networks(self): # we will use the dataset for the first network in the series self._dcn_dict = dict() self._dataset = None self._network_reticle_color = dict() for idx, network_name in enumerate(self._config["networks"]): dcn = self._dce.load_network_from_config(network_name) dcn.eval() self._dcn_dict[network_name] = dcn # self._network_reticle_color[network_name] = label_colors[idx] if len(self._config["networks"]) == 1: self._network_reticle_color[network_name] = COLOR_RED else: self._network_reticle_color[network_name] = label_colors[idx] if self._dataset is None: self._dataset = dcn.load_training_dataset() def load_specific_dataset(self): dataset_config_filename = os.path.join( utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', 'hats_3_demo_composite.yaml') dataset_config_filename = os.path.join( utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', '4_shoes_all.yaml') dataset_config = utils.getDictFromYamlFilename(dataset_config_filename) self._dataset = SpartanDataset(config=dataset_config) def get_random_image_pair(self): object_id = self._dataset.get_random_object_id() scene_name_a = self._dataset.get_random_single_object_scene_name( object_id) scene_name_b = self._dataset.get_different_scene_for_object( object_id, scene_name_a) if self._config["randomize_images"]: image_a_idx = self._dataset.get_random_image_index(scene_name_a) image_b_idx = self._dataset.get_random_image_index(scene_name_b) else: image_a_idx = 0 image_b_idx = 0 # image_b_idx = self._dataset.get_random_image_index(scene_name_b) return scene_name_a, scene_name_b, image_a_idx, image_b_idx def get_random_image_pair_across_object(self): """ Gets cross object image pairs :param randomize: :type randomize: :return: :rtype: """ object_id_a, object_id_b = self._dataset.get_two_different_object_ids() # object_id_a = "shoe_red_nike.yaml" # object_id_b = "shoe_gray_nike" # object_id_b = "shoe_green_nike" scene_name_a = self._dataset.get_random_single_object_scene_name( object_id_a) scene_name_b = self._dataset.get_random_single_object_scene_name( object_id_b) if self._config["randomize_images"]: image_a_idx = self._dataset.get_random_image_index(scene_name_a) image_b_idx = self._dataset.get_random_image_index(scene_name_b) else: image_a_idx = 0 image_b_idx = 0 return scene_name_a, scene_name_b, image_a_idx, image_b_idx def get_random_image_pair_multi_object_scenes(self): """ Gets cross object image pairs :param randomize: :type randomize: :return: :rtype: """ scene_name_a = self._dataset.get_random_multi_object_scene_name() scene_name_b = self._dataset.get_random_multi_object_scene_name() if self._config["randomize_images"]: image_a_idx = self._dataset.get_random_image_index(scene_name_a) image_b_idx = self._dataset.get_random_image_index(scene_name_b) else: image_a_idx = 0 image_b_idx = 0 return scene_name_a, scene_name_b, image_a_idx, image_b_idx def _get_new_images(self): """ Gets a new pair of images :return: :rtype: """ if random.random() < 0.5: self._dataset.set_train_mode() else: self._dataset.set_test_mode() if self._config["same_object"]: scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair( ) elif self._config["different_objects"]: scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_across_object( ) elif self._config["multiple_object"]: scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_multi_object_scenes( ) else: raise ValueError( "At least one of the image types must be set tot True") self.img1_pil = self._dataset.get_rgb_image_from_scene_name_and_idx( scene_name_1, image_1_idx) self.img2_pil = self._dataset.get_rgb_image_from_scene_name_and_idx( scene_name_2, image_2_idx) self._compute_descriptors() # self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(img1_pil) # self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(img2_pil) def _compute_descriptors(self): """ Computes the descriptors for image 1 and image 2 for each network :return: :rtype: """ self.img1 = pil_image_to_cv2(self.img1_pil) self.img2 = pil_image_to_cv2(self.img2_pil) self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(self.img1_pil) self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(self.img2_pil) self.img1_gray = cv2.cvtColor(self.img1, cv2.COLOR_RGB2GRAY) / 255.0 self.img2_gray = cv2.cvtColor(self.img2, cv2.COLOR_RGB2GRAY) / 255.0 cv2.imshow('source', self.img1) cv2.imshow('target', self.img2) self._res_a = dict() self._res_b = dict() for network_name, dcn in self._dcn_dict.iteritems(): self._res_a[network_name] = dcn.forward_single_image_tensor( self.rgb_1_tensor).data.cpu().numpy() self._res_b[network_name] = dcn.forward_single_image_tensor( self.rgb_2_tensor).data.cpu().numpy() self.find_best_match(None, 0, 0, None, None) def scale_norm_diffs_to_make_heatmap(self, norm_diffs, threshold): """ Scales the norm diffs to make a heatmap. This will be scaled between 0 and 1. 0 corresponds to a match, 1 to non-match :param norm_diffs: The norm diffs :type norm_diffs: numpy.array [H,W] :return: :rtype: """ heatmap = np.copy(norm_diffs) greater_than_threshold = np.where(norm_diffs > threshold) heatmap = heatmap / threshold * self._config[ "heatmap_vis_upper_bound"] # linearly scale [0, threshold] to [0, 0.5] heatmap[ greater_than_threshold] = 1 # greater than threshold is set to 1 heatmap = heatmap.astype(self.img1_gray.dtype) return heatmap def find_best_match(self, event, u, v, flags, param): """ For each network, find the best match in the target image to point highlighted with reticle in the source image. Displays the result :return: :rtype: """ img_1_with_reticle = np.copy(self.img1) draw_reticle(img_1_with_reticle, u, v, self._reticle_color) cv2.imshow("source", img_1_with_reticle) alpha = self._config["blend_weight_original_image"] beta = 1 - alpha img_2_with_reticle = np.copy(self.img2) print "\n\n" self._res_uv = dict() # self._res_a_uv = dict() # self._res_b_uv = dict() for network_name in self._dcn_dict: res_a = self._res_a[network_name] res_b = self._res_b[network_name] best_match_uv, best_match_diff, norm_diffs = \ DenseCorrespondenceNetwork.find_best_match((u, v), res_a, res_b) print "\n\n" print "network_name:", network_name self._res_uv[network_name] = dict() self._res_uv[network_name]['source'] = res_a[v, u, :].tolist() self._res_uv[network_name]['target'] = res_b[v, u, :].tolist() # print "res_a[v, u, :]:", res_a[v, u, :] # print "res_b[v, u, :]:", res_b[v, u, :] print "%s best match diff: %.3f" % (network_name, best_match_diff) threshold = self._config["norm_diff_threshold"] if network_name in self._config["norm_diff_threshold_dict"]: threshold = self._config["norm_diff_threshold_dict"][ network_name] heatmap = self.scale_norm_diffs_to_make_heatmap( norm_diffs, threshold) reticle_color = self._network_reticle_color[network_name] draw_reticle(heatmap, best_match_uv[0], best_match_uv[1], reticle_color) draw_reticle(img_2_with_reticle, best_match_uv[0], best_match_uv[1], reticle_color) blended = cv2.addWeighted(self.img2_gray, alpha, heatmap, beta, 0) cv2.imshow(network_name, blended) cv2.imshow("target", img_2_with_reticle) if event == cv2.EVENT_LBUTTONDOWN: utils.saveToYaml(self._res_uv, 'clicked_point.yaml') def run(self): self._get_new_images() cv2.namedWindow('target') cv2.setMouseCallback('source', self.find_best_match) self._get_new_images() while True: k = cv2.waitKey(20) & 0xFF if k == 27: break elif k == ord('n'): print "HEY" self._get_new_images() elif k == ord('s'): print "HEY" img1_pil = self.img1_pil img2_pil = self.img2_pil self.img1_pil = img2_pil self.img2_pil = img1_pil self._compute_descriptors()
print("descriptor_filename", descriptor_filename) print("processing image %d of %d" % (counter, num_images)) counter += 1 if __name__ == "__main__": dc_source_dir = utils.getDenseCorrespondenceSourceDir() config_filename = os.path.join(dc_source_dir, 'config', 'dense_correspondence', 'evaluation', 'lucas_evaluation.yaml') eval_config = utils.getDictFromYamlFilename(config_filename) default_config = utils.get_defaults_config() utils.set_cuda_visible_devices(default_config['cuda_visible_devices']) dce = DenseCorrespondenceEvaluation(eval_config) network_name = "caterpillar_M_background_0.500_3" dcn = dce.load_network_from_config(network_name) dataset_config_file = os.path.join(dc_source_dir, 'config', 'dense_correspondence', 'dataset', 'composite', 'caterpillar_only_9.yaml') dataset_config = utils.getDictFromYamlFilename(dataset_config_file) dataset = SpartanDataset(config=dataset_config) scene_name = SCENE_NAME save_dir = SAVE_DIR compute_descriptor_images_for_single_scene(dataset, scene_name, dcn, save_dir) print("finished cleanly")
class HeatmapVisualization(object): """ Launches a live interactive heatmap visualization. Edit config/dense_correspondence/heatmap_vis/heatmap.yaml to specify which networks to visualize. Specifically add the network you want to visualize to the "networks" list. Make sure that this network appears in the file pointed to by EVAL_CONFIG Usage: Launch this file with python after sourcing the environment with `use_pytorch_dense_correspondence` Then `python live_heatmap_visualization.py`. Keypresses: n: new set of images s: swap images p: pause/un-pause """ def __init__(self, config, eval_config): self._config = config self._dce = DenseCorrespondenceEvaluation(eval_config) self._load_networks() self._reticle_color = COLOR_GREEN self._paused = False if LOAD_SPECIFIC_DATASET: self.load_specific_dataset( ) # uncomment if you want to load a specific dataset def _load_networks(self): # we will use the dataset for the first network in the series self._dcn_dict = dict() self._dataset = None self._network_reticle_color = dict() for idx, network_name in enumerate(self._config["networks"]): dcn = self._dce.load_network_from_config(network_name) dcn.eval() self._dcn_dict[network_name] = dcn # self._network_reticle_color[network_name] = label_colors[idx] if len(self._config["networks"]) == 1: self._network_reticle_color[network_name] = COLOR_RED else: self._network_reticle_color[network_name] = label_colors[idx] if self._dataset is None: self._dataset = dcn.load_training_dataset() def load_specific_dataset(self): dataset_config_filename = os.path.join( utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', 'hats_3_demo_composite.yaml') # dataset_config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', # 'dense_correspondence', # 'dataset', 'composite', '4_shoes_all.yaml') dataset_config = utils.getDictFromYamlFilename(dataset_config_filename) self._dataset = SpartanDataset(config=dataset_config) def get_random_image_pair(self): """ Gets a pair of random images for different scenes of the same object """ object_id = self._dataset.get_random_object_id() # scene_name_a = "2018-04-10-16-02-59" # scene_name_b = scene_name_a scene_name_a = self._dataset.get_random_single_object_scene_name( object_id) scene_name_b = self._dataset.get_different_scene_for_object( object_id, scene_name_a) if self._config["randomize_images"]: image_a_idx = self._dataset.get_random_image_index(scene_name_a) image_b_idx = self._dataset.get_random_image_index(scene_name_b) else: image_a_idx = 0 image_b_idx = 0 return scene_name_a, scene_name_b, image_a_idx, image_b_idx def get_random_image_pair_across_object(self): """ Gets cross object image pairs :param randomize: :type randomize: :return: :rtype: """ object_id_a, object_id_b = self._dataset.get_two_different_object_ids() # object_id_a = "shoe_red_nike.yaml" # object_id_b = "shoe_gray_nike" # object_id_b = "shoe_green_nike" scene_name_a = self._dataset.get_random_single_object_scene_name( object_id_a) scene_name_b = self._dataset.get_random_single_object_scene_name( object_id_b) if self._config["randomize_images"]: image_a_idx = self._dataset.get_random_image_index(scene_name_a) image_b_idx = self._dataset.get_random_image_index(scene_name_b) else: image_a_idx = 0 image_b_idx = 0 return scene_name_a, scene_name_b, image_a_idx, image_b_idx def get_random_image_pair_multi_object_scenes(self): """ Gets cross object image pairs :param randomize: :type randomize: :return: :rtype: """ scene_name_a = self._dataset.get_random_multi_object_scene_name() scene_name_b = self._dataset.get_random_multi_object_scene_name() if self._config["randomize_images"]: image_a_idx = self._dataset.get_random_image_index(scene_name_a) image_b_idx = self._dataset.get_random_image_index(scene_name_b) else: image_a_idx = 0 image_b_idx = 0 return scene_name_a, scene_name_b, image_a_idx, image_b_idx def _get_new_images(self): """ Gets a new pair of images :return: :rtype: """ if random.random() < 0.5: self._dataset.set_train_mode() else: self._dataset.set_test_mode() if self._config["same_object"]: scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair( ) elif self._config["different_objects"]: scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_across_object( ) elif self._config["multiple_object"]: scene_name_1, scene_name_2, image_1_idx, image_2_idx = self.get_random_image_pair_multi_object_scenes( ) else: raise ValueError( "At least one of the image types must be set tot True") # caterpillar # scene_name_1 = "2018-04-16-14-42-26" # scene_name_2 = "2018-04-16-14-25-19" # hats # scene_name_1 = "2018-05-15-22-01-44" # scene_name_2 = "2018-05-15-22-04-17" self.img1_pil = self._dataset.get_rgb_image_from_scene_name_and_idx( scene_name_1, image_1_idx) self.img2_pil = self._dataset.get_rgb_image_from_scene_name_and_idx( scene_name_2, image_2_idx) self._scene_name_1 = scene_name_1 self._scene_name_2 = scene_name_2 self._image_1_idx = image_1_idx self._image_2_idx = image_2_idx self._compute_descriptors() # self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(img1_pil) # self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(img2_pil) def _compute_descriptors(self): """ Computes the descriptors for image 1 and image 2 for each network :return: :rtype: """ self.img1 = pil_image_to_cv2(self.img1_pil) self.img2 = pil_image_to_cv2(self.img2_pil) self.rgb_1_tensor = self._dataset.rgb_image_to_tensor(self.img1_pil) self.rgb_2_tensor = self._dataset.rgb_image_to_tensor(self.img2_pil) self.img1_gray = cv2.cvtColor(self.img1, cv2.COLOR_RGB2GRAY) / 255.0 self.img2_gray = cv2.cvtColor(self.img2, cv2.COLOR_RGB2GRAY) / 255.0 cv2.imshow('source', self.img1) cv2.imshow('target', self.img2) self._res_a = dict() self._res_b = dict() for network_name, dcn in self._dcn_dict.items(): self._res_a[network_name] = dcn.forward_single_image_tensor( self.rgb_1_tensor).data.cpu().numpy() self._res_b[network_name] = dcn.forward_single_image_tensor( self.rgb_2_tensor).data.cpu().numpy() self.find_best_match(None, 0, 0, None, None) def scale_norm_diffs_to_make_heatmap(self, norm_diffs, threshold): """ TODO (@manuelli) scale with Gaussian kernel instead of linear Scales the norm diffs to make a heatmap. This will be scaled between 0 and 1. 0 corresponds to a match, 1 to non-match :param norm_diffs: The norm diffs :type norm_diffs: numpy.array [H,W] :return: :rtype: """ heatmap = np.copy(norm_diffs) greater_than_threshold = np.where(norm_diffs > threshold) heatmap = heatmap / threshold * self._config[ "heatmap_vis_upper_bound"] # linearly scale [0, threshold] to [0, 0.5] heatmap[ greater_than_threshold] = 1 # greater than threshold is set to 1 heatmap = heatmap.astype(self.img1_gray.dtype) return heatmap def find_best_match(self, event, u, v, flags, param): """ For each network, find the best match in the target image to point highlighted with reticle in the source image. Displays the result :return: :rtype: """ if self._paused: return img_1_with_reticle = np.copy(self.img1) draw_reticle(img_1_with_reticle, u, v, self._reticle_color) cv2.imshow("source", img_1_with_reticle) alpha = self._config["blend_weight_original_image"] beta = 1 - alpha img_2_with_reticle = np.copy(self.img2) print("\n\n") self._res_uv = dict() # self._res_a_uv = dict() # self._res_b_uv = dict() for network_name in self._dcn_dict: res_a = self._res_a[network_name] res_b = self._res_b[network_name] best_match_uv, best_match_diff, norm_diffs = \ DenseCorrespondenceNetwork.find_best_match((u, v), res_a, res_b) print("\n\n") print("network_name:", network_name) print("scene_name_1", self._scene_name_1) print("image_1_idx", self._image_1_idx) print("scene_name_2", self._scene_name_2) print("image_2_idx", self._image_2_idx) d = dict() d['scene_name'] = self._scene_name_1 d['image_idx'] = self._image_1_idx d['descriptor'] = res_a[v, u, :].tolist() d['u'] = u d['v'] = v print("\n-------keypoint info\n", d) print("\n--------\n") self._res_uv[network_name] = dict() self._res_uv[network_name]['source'] = res_a[v, u, :].tolist() self._res_uv[network_name]['target'] = res_b[v, u, :].tolist() print("res_a[v, u, :]:", res_a[v, u, :]) print("res_b[v, u, :]:", res_b[best_match_uv[1], best_match_uv[0], :]) print("%s best match diff: %.3f" % (network_name, best_match_diff)) print("res_a", self._res_uv[network_name]['source']) print("res_b", self._res_uv[network_name]['target']) threshold = self._config["norm_diff_threshold"] if network_name in self._config["norm_diff_threshold_dict"]: threshold = self._config["norm_diff_threshold_dict"][ network_name] heatmap_color = vis_utils.compute_gaussian_kernel_heatmap_from_norm_diffs( norm_diffs, self._config['kernel_variance']) reticle_color = self._network_reticle_color[network_name] draw_reticle(heatmap_color, best_match_uv[0], best_match_uv[1], reticle_color) draw_reticle(img_2_with_reticle, best_match_uv[0], best_match_uv[1], reticle_color) blended = cv2.addWeighted(self.img2, alpha, heatmap_color, beta, 0) cv2.imshow(network_name, blended) cv2.imshow("target", img_2_with_reticle) if event == cv2.EVENT_LBUTTONDOWN: utils.saveToYaml(self._res_uv, 'clicked_point.yaml') def run(self): self._get_new_images() cv2.namedWindow('target') cv2.setMouseCallback('source', self.find_best_match) self._get_new_images() while True: k = cv2.waitKey(20) & 0xFF if k == 27: break elif k == ord('n'): self._get_new_images() elif k == ord('s'): img1_pil = self.img1_pil img2_pil = self.img2_pil self.img1_pil = img2_pil self.img2_pil = img1_pil self._compute_descriptors() elif k == ord('p'): if self._paused: print("un pausing") self._paused = False else: print("pausing") self._paused = True
def run(self, loss_current_iteration=0, use_pretrained=False): """ Runs the training :return: :rtype: """ start_iteration = copy.copy(loss_current_iteration) DCE = DenseCorrespondenceEvaluation self.setup() self.save_configs() if not use_pretrained: # create new network and optimizer self._dcn = self.build_network() self._optimizer = self._construct_optimizer(self._dcn.parameters()) else: logging.info("using pretrained model") if (self._dcn is None): raise ValueError("you must set self._dcn if use_pretrained=True") if (self._optimizer is None): raise ValueError("you must set self._optimizer if use_pretrained=True") # make sure network is using cuda and is in train mode dcn = self._dcn dcn.cuda() dcn.train() optimizer = self._optimizer batch_size = self._data_loader.batch_size pixelwise_contrastive_loss = PixelwiseContrastiveLoss(image_shape=dcn.image_shape, config=self._config['loss_function']) pixelwise_contrastive_loss.debug = True # Repeat M for background and masked pixelwise_contrastive_loss._config['M_background'] = pixelwise_contrastive_loss._config['M_descriptor'] pixelwise_contrastive_loss._config['M_masked'] = pixelwise_contrastive_loss._config['M_descriptor'] loss = match_loss = non_match_loss = 0 num_epochs = self._config['training']['num_epochs'] logging_rate = self._config['training']['logging_rate'] save_rate = self._config['training']['save_rate'] compute_test_loss_rate = self._config['training']['compute_test_loss_rate'] # logging self._logging_dict = dict() self._logging_dict['train'] = {"iteration": [], "loss": [], "match_loss": [], "masked_non_match_loss": [], "background_non_match_loss": [], "blind_non_match_loss": [], "learning_rate": [], "different_object_non_match_loss": []} self._logging_dict['test'] = {"iteration": [], "loss": [], "match_loss": [], "non_match_loss": []} # save network before starting if not use_pretrained: self.save_network(dcn, optimizer, 0) t_start = time.time() loss_vec = [] match_loss_vec = [] non_match_loss_vec = [] for epoch in range(num_epochs): # loop over the dataset multiple times for i, data in enumerate(self._data_loader, 0): loss_current_iteration += 1 start_iter = time.time() match_type, img_a, img_b, matches_a, matches_b, non_matches_a, non_matches_b = data img_a = Variable(img_a.cuda(), requires_grad=False) img_b = Variable(img_b.cuda(), requires_grad=False) # Note: repeat non_matches for both masked and background, and fake blind nonmatches using empty tensor, for compatibility in loss computation matches_a = Variable(matches_a.cuda().squeeze(0), requires_grad=False) matches_b = Variable(matches_b.cuda().squeeze(0), requires_grad=False) non_matches_a = Variable(non_matches_a.cuda().squeeze(0), requires_grad=False) non_matches_b = Variable(non_matches_b.cuda().squeeze(0), requires_grad=False) blind_non_matches_a = Variable(SpartanDataset.empty_tensor().cuda().squeeze(0), requires_grad=False) blind_non_matches_b = Variable(SpartanDataset.empty_tensor().cuda().squeeze(0), requires_grad=False) optimizer.zero_grad() self.adjust_learning_rate(optimizer, loss_current_iteration) # run both images through the network image_a_pred = dcn.forward(img_a) image_a_pred = dcn.process_network_output(image_a_pred, batch_size) image_b_pred = dcn.forward(img_b) image_b_pred = dcn.process_network_output(image_b_pred, batch_size) # get loss. loss, match_loss, non_match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss \ = loss_composer.get_loss(pixelwise_contrastive_loss, match_type, image_a_pred, image_b_pred, matches_a, matches_b, non_matches_a, non_matches_b, non_matches_a, non_matches_b, blind_non_matches_a, blind_non_matches_b) loss.backward() optimizer.step() elapsed = time.time() - start_iter # print "single iteration took %.3f seconds" %(elapsed) if loss_current_iteration % save_rate == 0: self.save_network(dcn, optimizer, loss_current_iteration, logging_dict=self._logging_dict) sys.stdout.write('Epoch %d/%d, Image %d/%d, total_itr: %d, loss: %.4f, match_loss: %.4f, non_match_loss: %.4f, total_time: %s \r' % \ (epoch+1, num_epochs, i+1, len(self._dataset), loss_current_iteration, loss.data[0], match_loss.data[0], non_match_loss.data[0], str(timedelta(seconds=time.time()-t_start))[:-4])); sys.stdout.flush() loss_vec.append(loss.data[0]) match_loss_vec.append(match_loss.data[0]) non_match_loss_vec.append(non_match_loss.data[0]) if self._config["training"]["compute_test_loss"] and (loss_current_iteration % compute_test_loss_rate == 0): print # logging.info("Computing test loss") # delete the loss, match_loss, non_match_loss variables so that # pytorch can use that GPU memory del loss, match_loss, non_match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss gc.collect() print '\tTraining average:loss: %.4f, match_loss: %.4f, non_match_loss: %.4f' % \ (np.mean(loss_vec), np.mean(match_loss_vec), np.mean(non_match_loss_vec)) loss_vec = [] match_loss_vec = [] non_match_loss_vec = [] dcn.eval() test_loss, test_match_loss, test_non_match_loss = DCE.compute_loss_on_salad_dataset(dcn, self._data_loader_test, self._config['loss_function'], num_iterations=self._config['training']['test_loss_num_iterations']) print '\tTesting results: loss: %.4f, match_loss: %.4f, non_match_loss: %.4f' % \ (test_loss, test_match_loss, test_non_match_loss) # delete these variables so we can free GPU memory del test_loss, test_match_loss, test_non_match_loss # make sure to set the network back to train mode dcn.train() if loss_current_iteration % self._config['training']['garbage_collect_rate'] == 0: logging.debug("running garbage collection") gc_start = time.time() gc.collect() gc_elapsed = time.time() - gc_start logging.debug("garbage collection took %.2d seconds" %(gc_elapsed)) logging.info("Finished training.") self.save_network(dcn, optimizer, loss_current_iteration, logging_dict=self._logging_dict) return
class DenseCorrespondenceTraining(object): def __init__(self, config=None, dataset=None, dataset_test=None): if config is None: config = DenseCorrespondenceTraining.load_default_config() self._config = config self._dataset = dataset self._dataset_test = dataset_test self._dcn = None self._optimizer = None def setup(self): """ Initializes the object :return: :rtype: """ self.load_dataset() self.setup_logging_dir() self.setup_tensorboard() @property def dataset(self): return self._dataset @dataset.setter def dataset(self, value): self._dataset = value def load_dataset(self): """ Loads a dataset, construct a trainloader. Additionally creates a dataset and DataLoader for the test data :return: :rtype: """ batch_size = self._config['training']['batch_size'] num_workers = self._config['training']['num_workers'] if self._dataset is None: self._dataset = SpartanDataset.make_default_10_scenes_drill() self._dataset.load_all_pose_data() self._dataset.set_parameters_from_training_config(self._config) self._data_loader = torch.utils.data.DataLoader( self._dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True) # create a test dataset if self._config["training"]["compute_test_loss"]: if self._dataset_test is None: self._dataset_test = SpartanDataset( mode="test", config=self._dataset.config) self._dataset_test.load_all_pose_data() self._dataset_test.set_parameters_from_training_config( self._config) self._data_loader_test = torch.utils.data.DataLoader( self._dataset_test, batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True) def load_dataset_from_config(self, config): """ Loads train and test datasets from the given config :param config: Dict gotten from a YAML file :type config: :return: None :rtype: """ self._dataset = SpartanDataset(mode="train", config=config) self._dataset_test = SpartanDataset(mode="test", config=config) self.load_dataset() def build_network(self): """ Builds the DenseCorrespondenceNetwork :return: :rtype: DenseCorrespondenceNetwork """ return DenseCorrespondenceNetwork.from_config( self._config['dense_correspondence_network'], load_stored_params=False) def _construct_optimizer(self, parameters): """ Constructs the optimizer :param parameters: Parameters to adjust in the optimizer :type parameters: :return: Adam Optimizer with params from the config :rtype: torch.optim """ learning_rate = float(self._config['training']['learning_rate']) weight_decay = float(self._config['training']['weight_decay']) optimizer = optim.Adam(parameters, lr=learning_rate, weight_decay=weight_decay) return optimizer def _get_current_loss(self, logging_dict): """ Gets the current loss for both test and train :return: :rtype: dict """ d = dict() d['train'] = dict() d['test'] = dict() for key, val in d.items(): for field in list(logging_dict[key].keys()): vec = logging_dict[key][field] if len(vec) > 0: val[field] = vec[-1] else: val[field] = -1 # placeholder return d def load_pretrained(self, model_folder, iteration=None): """ Loads network and optimizer parameters from a previous training run. Note: It is up to the user to ensure that the model parameters match. e.g. width, height, descriptor dimension etc. :param model_folder: location of the folder containing the param files 001000.pth. Can be absolute or relative path. If relative then it is relative to pdc/trained_models/ :type model_folder: :param iteration: which index to use, e.g. 3500, if None it loads the latest one :type iteration: :return: iteration :rtype: """ if not os.path.isdir(model_folder): pdc_path = utils.getPdcPath() model_folder = os.path.join(pdc_path, "trained_models", model_folder) # find idx.pth and idx.pth.opt files if iteration is None: files = os.listdir(model_folder) model_param_file = sorted(fnmatch.filter(files, '*.pth'))[-1] iteration = int(model_param_file.split(".")[0]) optim_param_file = sorted(fnmatch.filter(files, '*.pth.opt'))[-1] else: prefix = utils.getPaddedString(iteration, width=6) model_param_file = prefix + ".pth" optim_param_file = prefix + ".pth.opt" print("model_param_file", model_param_file) model_param_file = os.path.join(model_folder, model_param_file) optim_param_file = os.path.join(model_folder, optim_param_file) self._dcn = self.build_network() self._dcn.load_state_dict(torch.load(model_param_file)) self._dcn.cuda() self._dcn.train() self._optimizer = self._construct_optimizer(self._dcn.parameters()) self._optimizer.load_state_dict(torch.load(optim_param_file)) return iteration def run_from_pretrained(self, model_folder, iteration=None, learning_rate=None): """ Wrapper for load_pretrained(), then run() """ iteration = self.load_pretrained(model_folder, iteration) if iteration is None: iteration = 0 if learning_rate is not None: self._config["training"][ "learning_rate_starting_from_pretrained"] = learning_rate self.set_learning_rate(self._optimizer, learning_rate) self.run(loss_current_iteration=iteration, use_pretrained=True) def run(self, loss_current_iteration=0, use_pretrained=False): """ Runs the training :return: :rtype: """ start_iteration = copy.copy(loss_current_iteration) DCE = DenseCorrespondenceEvaluation self.setup() self.save_configs() if not use_pretrained: # create new network and optimizer self._dcn = self.build_network() self._optimizer = self._construct_optimizer(self._dcn.parameters()) else: logging.info("using pretrained model") if (self._dcn is None): raise ValueError( "you must set self._dcn if use_pretrained=True") if (self._optimizer is None): raise ValueError( "you must set self._optimizer if use_pretrained=True") # make sure network is using cuda and is in train mode dcn = self._dcn dcn.cuda() dcn.train() optimizer = self._optimizer batch_size = self._data_loader.batch_size pixelwise_contrastive_loss = PixelwiseContrastiveLoss( image_shape=dcn.image_shape, config=self._config['loss_function']) pixelwise_contrastive_loss.debug = True loss = match_loss = non_match_loss = 0 max_num_iterations = self._config['training'][ 'num_iterations'] + start_iteration logging_rate = self._config['training']['logging_rate'] save_rate = self._config['training']['save_rate'] compute_test_loss_rate = self._config['training'][ 'compute_test_loss_rate'] # logging self._logging_dict = dict() self._logging_dict['train'] = { "iteration": [], "loss": [], "match_loss": [], "masked_non_match_loss": [], "background_non_match_loss": [], "blind_non_match_loss": [], "learning_rate": [], "different_object_non_match_loss": [] } self._logging_dict['test'] = { "iteration": [], "loss": [], "match_loss": [], "non_match_loss": [] } # save network before starting if not use_pretrained: self.save_network(dcn, optimizer, 0) # from training_progress_visualizer import TrainingProgressVisualizer # TPV = TrainingProgressVisualizer() for epoch in range(50): # loop over the dataset multiple times for i, data in enumerate(self._data_loader, 0): loss_current_iteration += 1 start_iter = time.time() match_type, \ img_a, img_b, \ matches_a, matches_b, \ masked_non_matches_a, masked_non_matches_b, \ background_non_matches_a, background_non_matches_b, \ blind_non_matches_a, blind_non_matches_b, \ metadata = data if (match_type == -1).all(): print("\n empty data, continuing \n") continue data_type = metadata["type"][0] img_a = Variable(img_a.cuda(), requires_grad=False) img_b = Variable(img_b.cuda(), requires_grad=False) matches_a = Variable(matches_a.cuda().squeeze(0), requires_grad=False) matches_b = Variable(matches_b.cuda().squeeze(0), requires_grad=False) masked_non_matches_a = Variable( masked_non_matches_a.cuda().squeeze(0), requires_grad=False) masked_non_matches_b = Variable( masked_non_matches_b.cuda().squeeze(0), requires_grad=False) background_non_matches_a = Variable( background_non_matches_a.cuda().squeeze(0), requires_grad=False) background_non_matches_b = Variable( background_non_matches_b.cuda().squeeze(0), requires_grad=False) blind_non_matches_a = Variable( blind_non_matches_a.cuda().squeeze(0), requires_grad=False) blind_non_matches_b = Variable( blind_non_matches_b.cuda().squeeze(0), requires_grad=False) optimizer.zero_grad() self.adjust_learning_rate(optimizer, loss_current_iteration) # run both images through the network image_a_pred = dcn.forward(img_a) image_a_pred = dcn.process_network_output( image_a_pred, batch_size) image_b_pred = dcn.forward(img_b) image_b_pred = dcn.process_network_output( image_b_pred, batch_size) # get loss loss, match_loss, masked_non_match_loss, \ background_non_match_loss, blind_non_match_loss = loss_composer.get_loss(pixelwise_contrastive_loss, match_type, image_a_pred, image_b_pred, matches_a, matches_b, masked_non_matches_a, masked_non_matches_b, background_non_matches_a, background_non_matches_b, blind_non_matches_a, blind_non_matches_b) loss.backward() optimizer.step() #if i % 10 == 0: # TPV.update(self._dataset, dcn, loss_current_iteration, now_training_object_id=metadata["object_id"]) elapsed = time.time() - start_iter def update_plots(loss, match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss): """ Updates the tensorboard plots with current loss function information :return: :rtype: """ learning_rate = DenseCorrespondenceTraining.get_learning_rate( optimizer) self._logging_dict['train']['learning_rate'].append( learning_rate) self._tensorboard_logger.log_value("learning rate", learning_rate, loss_current_iteration) # Don't update any plots if the entry corresponding to that term # is a zero loss if not loss_composer.is_zero_loss(match_loss): self._logging_dict['train']['match_loss'].append( match_loss.item()) self._tensorboard_logger.log_value( "train match loss", match_loss.item(), loss_current_iteration) if not loss_composer.is_zero_loss(masked_non_match_loss): self._logging_dict['train'][ 'masked_non_match_loss'].append( masked_non_match_loss.item()) self._tensorboard_logger.log_value( "train masked non match loss", masked_non_match_loss.item(), loss_current_iteration) if not loss_composer.is_zero_loss( background_non_match_loss): self._logging_dict['train'][ 'background_non_match_loss'].append( background_non_match_loss.item()) self._tensorboard_logger.log_value( "train background non match loss", background_non_match_loss.item(), loss_current_iteration) if not loss_composer.is_zero_loss(blind_non_match_loss): if data_type == SpartanDatasetDataType.SINGLE_OBJECT_WITHIN_SCENE: self._tensorboard_logger.log_value( "train blind SINGLE_OBJECT_WITHIN_SCENE", blind_non_match_loss.item(), loss_current_iteration) if data_type == SpartanDatasetDataType.DIFFERENT_OBJECT: self._tensorboard_logger.log_value( "train blind DIFFERENT_OBJECT", blind_non_match_loss.item(), loss_current_iteration) # loss is never zero if data_type == SpartanDatasetDataType.SINGLE_OBJECT_WITHIN_SCENE: self._tensorboard_logger.log_value( "train loss SINGLE_OBJECT_WITHIN_SCENE", loss.item(), loss_current_iteration) elif data_type == SpartanDatasetDataType.DIFFERENT_OBJECT: self._tensorboard_logger.log_value( "train loss DIFFERENT_OBJECT", loss.item(), loss_current_iteration) elif data_type == SpartanDatasetDataType.SINGLE_OBJECT_ACROSS_SCENE: self._tensorboard_logger.log_value( "train loss SINGLE_OBJECT_ACROSS_SCENE", loss.item(), loss_current_iteration) elif data_type == SpartanDatasetDataType.MULTI_OBJECT: self._tensorboard_logger.log_value( "train loss MULTI_OBJECT", loss.item(), loss_current_iteration) elif data_type == SpartanDatasetDataType.SYNTHETIC_MULTI_OBJECT: self._tensorboard_logger.log_value( "train loss SYNTHETIC_MULTI_OBJECT", loss.item(), loss_current_iteration) else: raise ValueError("unknown data type") if data_type == SpartanDatasetDataType.DIFFERENT_OBJECT: self._tensorboard_logger.log_value( "train different object", loss.item(), loss_current_iteration) update_plots(loss, match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss) if loss_current_iteration % save_rate == 0: self.save_network(dcn, optimizer, loss_current_iteration, logging_dict=self._logging_dict) if loss_current_iteration % logging_rate == 0: logging.info("Training on iteration %d of %d" % (loss_current_iteration, max_num_iterations)) logging.info("single iteration took %.3f seconds" % (elapsed)) percent_complete = loss_current_iteration * 100.0 / ( max_num_iterations - start_iteration) logging.info("Training is %d percent complete\n" % (percent_complete)) # don't compute the test loss on the first few times through the loop if self._config["training"]["compute_test_loss"] and ( loss_current_iteration % compute_test_loss_rate == 0) and loss_current_iteration > 5: logging.info("Computing test loss") # delete the loss, match_loss, non_match_loss variables so that # pytorch can use that GPU memory del loss, match_loss, masked_non_match_loss, background_non_match_loss, blind_non_match_loss gc.collect() dcn.eval() test_loss, test_match_loss, test_non_match_loss = DCE.compute_loss_on_dataset( dcn, self._data_loader_test, self._config['loss_function'], num_iterations=self._config['training'] ['test_loss_num_iterations']) # delete these variables so we can free GPU memory del test_loss, test_match_loss, test_non_match_loss # make sure to set the network back to train mode dcn.train() if loss_current_iteration % self._config['training'][ 'garbage_collect_rate'] == 0: logging.debug("running garbage collection") gc_start = time.time() gc.collect() gc_elapsed = time.time() - gc_start logging.debug("garbage collection took %.2d seconds" % (gc_elapsed)) if loss_current_iteration > max_num_iterations: logging.info("Finished testing after %d iterations" % (max_num_iterations)) self.save_network(dcn, optimizer, loss_current_iteration, logging_dict=self._logging_dict) return def setup_logging_dir(self): """ Sets up the directory where logs will be stored and config files written :return: full path of logging dir :rtype: str """ if 'logging_dir_name' in self._config['training']: dir_name = self._config['training']['logging_dir_name'] else: dir_name = utils.get_current_time_unique_name() + "_" + str( self._config['dense_correspondence_network'] ['descriptor_dimension']) + "d" self._logging_dir_name = dir_name self._logging_dir = os.path.join( utils.convert_data_relative_path_to_absolute_path( self._config['training']['logging_dir']), dir_name) print("logging_dir:", self._logging_dir) if os.path.isdir(self._logging_dir): shutil.rmtree(self._logging_dir) if not os.path.isdir(self._logging_dir): os.makedirs(self._logging_dir) # make the tensorboard log directory self._tensorboard_log_dir = os.path.join(self._logging_dir, "tensorboard") if not os.path.isdir(self._tensorboard_log_dir): os.makedirs(self._tensorboard_log_dir) return self._logging_dir @property def logging_dir(self): """ Sets up the directory where logs will be stored and config files written :return: full path of logging dir :rtype: str """ return self._logging_dir def save_network(self, dcn, optimizer, iteration, logging_dict=None): """ Saves network parameters to logging directory :return: :rtype: None """ network_param_file = os.path.join( self._logging_dir, utils.getPaddedString(iteration, width=6) + ".pth") optimizer_param_file = network_param_file + ".opt" torch.save(dcn.state_dict(), network_param_file) torch.save(optimizer.state_dict(), optimizer_param_file) # also save loss history stuff if logging_dict is not None: log_history_file = os.path.join( self._logging_dir, utils.getPaddedString(iteration, width=6) + "_log_history.yaml") utils.saveToYaml(logging_dict, log_history_file) current_loss_file = os.path.join(self._logging_dir, 'loss.yaml') current_loss_data = self._get_current_loss(logging_dict) utils.saveToYaml(current_loss_data, current_loss_file) def save_configs(self): """ Saves config files to the logging directory :return: :rtype: None """ training_params_file = os.path.join(self._logging_dir, 'training.yaml') utils.saveToYaml(self._config, training_params_file) dataset_params_file = os.path.join(self._logging_dir, 'dataset.yaml') utils.saveToYaml(self._dataset.config, dataset_params_file) # make unique identifier identifier_file = os.path.join(self._logging_dir, 'identifier.yaml') identifier_dict = dict() identifier_dict['id'] = utils.get_unique_string() utils.saveToYaml(identifier_dict, identifier_file) def adjust_learning_rate(self, optimizer, iteration): """ Adjusts the learning rate according to the schedule :param optimizer: :type optimizer: :param iteration: :type iteration: :return: :rtype: """ steps_between_learning_rate_decay = self._config['training'][ 'steps_between_learning_rate_decay'] if iteration % steps_between_learning_rate_decay == 0: for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] * self._config[ "training"]["learning_rate_decay"] @staticmethod def set_learning_rate(optimizer, learning_rate): for param_group in optimizer.param_groups: param_group['lr'] = learning_rate @staticmethod def get_learning_rate(optimizer): for param_group in optimizer.param_groups: lr = param_group['lr'] break return lr def setup_tensorboard(self): """ Starts the tensorboard server and sets up the plotting :return: :rtype: """ # start tensorboard # cmd = "python -m tensorboard.main" logging.info("setting up tensorboard_logger") cmd = "tensorboard --logdir=%s" % (self._tensorboard_log_dir) self._tensorboard_logger = tensorboard_logger.Logger( self._tensorboard_log_dir) logging.info("tensorboard logger started") @staticmethod def load_default_config(): dc_source_dir = utils.getDenseCorrespondenceSourceDir() config_file = os.path.join(dc_source_dir, 'config', 'dense_correspondence', 'training', 'training.yaml') config = utils.getDictFromYamlFilename(config_file) return config @staticmethod def make_default(): dataset = SpartanDataset.make_default_caterpillar() return DenseCorrespondenceTraining(dataset=dataset)
def get_within_scene_loss(pixelwise_contrastive_loss, image_a_pred, image_b_pred, matches_a, matches_b, masked_non_matches_a, masked_non_matches_b, background_non_matches_a, background_non_matches_b, blind_non_matches_a, blind_non_matches_b): """ Simple wrapper for pixelwise_contrastive_loss functions. Args and return args documented above in get_loss() """ pcl = pixelwise_contrastive_loss match_loss, masked_non_match_loss, num_masked_hard_negatives =\ pixelwise_contrastive_loss.get_loss_matched_and_non_matched_with_l2(image_a_pred, image_b_pred, matches_a, matches_b, masked_non_matches_a, masked_non_matches_b, M_descriptor=pcl._config["M_masked"]) if pcl._config["use_l2_pixel_loss_on_background_non_matches"]: background_non_match_loss, num_background_hard_negatives =\ pixelwise_contrastive_loss.non_match_loss_with_l2_pixel_norm(image_a_pred, image_b_pred, matches_b, background_non_matches_a, background_non_matches_b, M_descriptor=pcl._config["M_background"]) else: background_non_match_loss, num_background_hard_negatives =\ pixelwise_contrastive_loss.non_match_loss_descriptor_only(image_a_pred, image_b_pred, background_non_matches_a, background_non_matches_b, M_descriptor=pcl._config["M_background"]) blind_non_match_loss = zero_loss() num_blind_hard_negatives = 1 if not (SpartanDataset.is_empty(blind_non_matches_a.data)): blind_non_match_loss, num_blind_hard_negatives =\ pixelwise_contrastive_loss.non_match_loss_descriptor_only(image_a_pred, image_b_pred, blind_non_matches_a, blind_non_matches_b, M_descriptor=pcl._config["M_masked"]) total_num_hard_negatives = num_masked_hard_negatives + num_background_hard_negatives total_num_hard_negatives = max(total_num_hard_negatives, 1) if pcl._config["scale_by_hard_negatives"]: scale_factor = total_num_hard_negatives masked_non_match_loss_scaled = masked_non_match_loss * 1.0 / max( num_masked_hard_negatives, 1) background_non_match_loss_scaled = background_non_match_loss * 1.0 / max( num_background_hard_negatives, 1) blind_non_match_loss_scaled = blind_non_match_loss * 1.0 / max( num_blind_hard_negatives, 1) else: # we are not currently using blind non-matches num_masked_non_matches = max(len(masked_non_matches_a), 1) num_background_non_matches = max(len(background_non_matches_a), 1) num_blind_non_matches = max(len(blind_non_matches_a), 1) scale_factor = num_masked_non_matches + num_background_non_matches masked_non_match_loss_scaled = masked_non_match_loss * 1.0 / num_masked_non_matches background_non_match_loss_scaled = background_non_match_loss * 1.0 / num_background_non_matches blind_non_match_loss_scaled = blind_non_match_loss * 1.0 / num_blind_non_matches non_match_loss = 1.0 / scale_factor * (masked_non_match_loss + background_non_match_loss) loss = pcl._config["match_loss_weight"] * match_loss + \ pcl._config["non_match_loss_weight"] * non_match_loss return loss, match_loss, masked_non_match_loss_scaled, background_non_match_loss_scaled, blind_non_match_loss_scaled
parser.add_argument("--data_name", type=str, default="caterpillar_upright.yaml") parser.add_argument("--run_prefix", type=str, default="caterpillar") parser.add_argument("--training_yaml", type=str, default="training.yaml") args = parser.parse_args() config_filename = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'dataset', 'composite', args.data_name) config = utils.getDictFromYamlFilename(config_filename) train_config_file = os.path.join(utils.getDenseCorrespondenceSourceDir(), 'config', 'dense_correspondence', 'training', args.training_yaml) train_config = utils.getDictFromYamlFilename(train_config_file) dataset = SpartanDataset(config=config) dataset_test = None if train_config["training"]["compute_test_loss"]: dataset_test=SpartanDataset(mode="test", config=config) logging_dir = "trained_models/tutorials" #num_iterations = 3500 d = 3 # the descriptor dimension name = f"{args.run_prefix}_%d" %(d) train_config["training"]["logging_dir_name"] = name train_config["training"]["logging_dir"] = logging_dir train_config["dense_correspondence_network"]["descriptor_dimension"] = d #train_config["training"]["num_iterations"] = num_iterations