def optimize_img(init_img, solver_type, solver_param, max_iter, display, root_dir, net, all_target_blob_names, targets, target_data_list): ensuredir(root_dir) solver_param.update({ 'maxiter': max_iter, 'disp': True, }) # Set initial value and reshape net set_data(net, init_img) x0 = np.ravel(init_img).astype(np.float64) mins = np.full_like(x0, -128) maxs = np.full_like(x0, 128) bounds = zip(mins, maxs) display_func = DisplayFunctor(net, root_dir, display) opt_res = optimize.minimize( objective_func, x0, args=(net, all_target_blob_names, targets, target_data_list), bounds=bounds, method=solver_type, jac=True, callback=display_func, options=solver_param, ) print opt_res
def gen_target_data(root_dir, caffe, net, targets): ensuredir(root_dir) target_data_list = [] for target_i, (target_img_path, target_blob_names, is_gram, _) in enumerate(targets): # Load and rescale to [0, 1] target_img = caffe.io.load_image(target_img_path) caffe_in = net.preprocess_inputs([target_img], auto_reshape=True) # Copy image into input blob get_data_blob(net).data[...] = caffe_in net.forward() target_datas = {} for target_blob_name in target_blob_names: target_data = net.blobs[target_blob_name].data.copy() # Apply ReLU pos_mask = target_data > 0 target_data[~pos_mask] = 0 if is_gram: target_datas[target_blob_name] = comp_gram(target_data) else: target_datas[target_blob_name] = target_data target_data_list.append(target_datas) save_image_blob( os.path.join(root_dir, 'target-{}.jpg'.format(target_i)), net, get_data_blob(net).data[0], ) return target_data_list
def __init__(self, name, priors_dir, verbose=False, categorization_type='final'): self._name = name self._priors_dir = priors_dir utils.ensuredir(self._priors_dir) self._verbose = verbose self._objects = ObjectCollection(categorization_type=categorization_type) self._semantic_frames = {} # house_id -> {obj_id: SemanticFrame} self._observations = {} # house_id -> {(obj_id,ref_obj_id): RelativeObservation} self._grouped_observations = {} # {ObservationCategory: [RelativeObservation]}
def optimize_img(init_img, solver_type, solver_param, max_iter, display, root_dir, net, all_target_blob_names, targets, target_data_list): ensuredir(root_dir) #Add arguments to solver_param solver_param.update({ 'maxiter': max_iter, 'disp': True, }) # Set up initial conditions from initial image """ We do NOT want to pre-process these, as pre-processing re-normalises to match the training model Instead, we get data from init_image, re-arrange to caffe shape, renormalize to -128 -> 128, and ravel """ x0 = np.array(init_img) # Load caffe-opened image to a numpy array x0 = [x0[:, :, 2], x0[:, :, 1], x0[:, :, 0]] # Rearrange into shape of a pre-processed image x0 = (np.array([x0]) * 255.0) - 128 # Re-normalise from (0,1) to (-128,128) to match bounds x0 = np.ravel(x0) # Ravel # Set up optimizer mins = np.full_like(x0, -128) maxs = np.full_like(x0, 128) bounds = zip(mins, maxs) # Set up bounds matching x0 normalisation display_func = DisplayFunctor(net, root_dir, display) # Set up display function # Run optimization opt_res = optimize.minimize( objective_func, x0, args=(net, all_target_blob_names, targets, target_data_list), bounds=bounds, method=solver_type, jac=True, callback=display_func, options=solver_param, ) print opt_res
def gen_target_data(root_dir, caffe, net, targets): if not root_dir is None: ensuredir(root_dir) target_data_list = [] for target_i, (target_img_path, target_blob_names, is_gram, _) in enumerate(targets): # Load and rescale to [0, 1] if isinstance(target_img_path,str): target_img = caffe.io.load_image(target_img_path) else: target_img = target_img_path assert target_img.dtype == np.float32 assert target_img.ndim == 3 assert target_img.min() >= 0 assert target_img.max() <= 1 caffe_in = net.preprocess_inputs([target_img], auto_reshape=True) # Copy image into input blob get_data_blob(net).data[...] = caffe_in net.forward() target_datas = {} for target_blob_name in target_blob_names: target_data = net.blobs[target_blob_name].data.copy() # Apply ReLU pos_mask = target_data > 0 target_data[~pos_mask] = 0 if is_gram: target_datas[target_blob_name] = comp_gram(target_data) else: target_datas[target_blob_name] = target_data target_data_list.append(target_datas) if not root_dir is None: save_image_blob( os.path.join(root_dir, 'target-{}.jpg'.format(target_i)), net, get_data_blob(net).data[0], ) return target_data_list
def optimize_img(init_img, solver_type, solver_param, max_iter, display, root_dir, net, all_target_blob_names, targets, target_data_list, tv_lambda=0.001): ensuredir(root_dir) solver_param.update({ 'maxiter': max_iter, 'disp': True, }) # Set initial value and reshape net set_data(net, init_img) x0 = np.ravel(init_img).astype(np.float64) mins = np.full_like(x0, -128) maxs = np.full_like(x0, 162) bounds = zip(mins, maxs) display_func = DisplayFunctor(net, root_dir, display) tv_beta = 2 opt_res = optimize.minimize( objective_func, x0, args=(net, all_target_blob_names, targets, target_data_list, tv_lambda, tv_beta), bounds=bounds, method=solver_type, jac=True, callback=display_func, options=solver_param, ) print opt_res data = np.reshape(opt_res.x, get_data_blob(net).data.shape)[0] deproc_img = net.transformer.deprocess(net.inputs[0], data) deproc_img = np.clip(deproc_img, 0, 1) return deproc_img
parser = argparse.ArgumentParser(description='Rotation Training with Scores') parser.add_argument('--data-dir', type=str, default="bedroom", metavar='S') parser.add_argument('--num-workers', type=int, default=6, metavar='N') parser.add_argument('--last-epoch', type=int, default=-1, metavar='N') parser.add_argument('--train-size', type=int, default=6400, metavar='N') parser.add_argument('--save-dir', type=str, default="train/bedroom", metavar='S') parser.add_argument('--ablation', type=str, default=None, metavar='S') parser.add_argument('--lr', type=float, default=0.001, metavar='N') parser.add_argument('--eps', type=float, default=1e-6, metavar='N') args = parser.parse_args() save_dir = args.save_dir utils.ensuredir(save_dir) learning_rate = args.lr batch_size = 16 with open(f"data/{args.data_dir}/final_categories_frequency", "r") as f: lines = f.readlines() num_categories = len(lines) - 2 if args.ablation is None: num_input_channels = num_categories + 9 elif args.ablation == "basic": num_input_channels = 7 elif args.ablation == "depth": num_input_channels = 2 else: raise NotImplementedError
def generate_pr(saw_image_dir, pixel_labels_dir, splits_dir, out_dir, dataset_split, class_weights, bl_filter_size, algo_configs, thres_count=200): """ Generate precision-recall curves for each specified algorithm. :param saw_image_dir: Directory which contains the SAW images (input to the CNN). :param pixel_labels_dir: Directory which contains the SAW pixel labels for each photo. :param splits_dir: Directory which contains the list of photo IDs for each dataset split (training, validation, test). :param out_dir: Directory where we will save the generated PR curves. :param dataset_split: Dataset split we want to evaluate on. Can be "R" (training), "V" (validation) or "E" (test). :param class_weights: List of weights for the 3 classes (NS-ND, NS-SB, S). We used [1, 1, 2] in the paper. :param bl_filter_size: The size of the maximum filter used on the shading gradient magnitude image. We used 10 in the paper. If 0, we do not filter. :param algo_configs: List of baselines as (algorithm slug, predicted (decomposed) shading directory) pairs or ("saw_pixelnet", "path_to_trained_net_dir") for our trained CNN. :param thres_count: Number of thresholds we want to evaluate on. Check ``gen_pr_thres_list`` to see how we sample thresholds between 0 and 1. """ bl_names_dic = { 'baseline_reflectance': 'Constant R', 'lettry2018_siamese': '[Ours]', 'zhou2015_reflprior': '[Zhou et al. 2015]', 'bell2014_densecrf': '[Bell et al. 2014]', 'grosse2009_color_retinex': 'Color Retinex', 'grosse2009_grayscale_retinex': 'Grayscale Retinex', 'zhao2012_nonlocal': '[Zhao et al. 2012]', 'garces2012_clustering': '[Garces et al. 2012]', 'shen2011_optimization': '[Shen et al. 2011]', 'saw_pixelnet': '[Kovacs et al. 2017]', } rootdir = os.path.join(out_dir, gen_class_weights_str(class_weights)) ensuredir(rootdir) thres_list = gen_pr_thres_list(thres_count) photo_ids = load_photo_ids_for_split(splits_dir=splits_dir, dataset_split=dataset_split) plot_arrs = [] line_names = [] fn = 'pr-%s' % {'R': 'train', 'V': 'val', 'E': 'test'}[dataset_split] title = '%s Precision-Recall' % ({ 'R': 'Training', 'V': 'Validation', 'E': 'Test' }[dataset_split], ) def snap_plot(): plot_and_save_2D_arrays( filename=os.path.join(rootdir, fn) + '.pdf', arrs=plot_arrs, title=title, xlabel='Recall', xinterval=(0, 1), ylabel='Precision', yinterval=(0, 1), line_names=line_names, ) save_plot_arr_to_csv( file_path=os.path.join(rootdir, fn) + '.csv', thres_list=thres_list, arrs=plot_arrs, line_names=line_names, ) for algo_slug, algo_dir in algo_configs: print 'Working on %s (path: %s)...' % (algo_slug, algo_dir) if algo_slug == 'saw_pixelnet': eval_kwargs = dict( saw_image_dir=saw_image_dir, net=load_net(net_dir=algo_dir), ) rdic_list = get_precision_recall_list( pixel_labels_dir=pixel_labels_dir, thres_list=thres_list, photo_ids=photo_ids, class_weights=class_weights, eval_func=eval_net_on_photo, eval_kwargs=eval_kwargs, ) else: eval_kwargs = dict( pred_shading_dir=algo_dir, bl_filter_size=bl_filter_size, ) rdic_list = get_precision_recall_list( pixel_labels_dir=pixel_labels_dir, thres_list=thres_list, photo_ids=photo_ids, class_weights=class_weights, eval_func=eval_baseline_on_photo, eval_kwargs=eval_kwargs, ) plot_arrs.append(gen_plot_arr(rdic_list)) if algo_slug in bl_names_dic: line_names.append(bl_names_dic[algo_slug]) else: line_names.append('%s, bfs (%s)' % (algo_slug, bl_filter_size)) snap_plot()