def __init__(self, sess, config, dataset): # Save pointer to the tensorflow session self.sess = sess # Save pointer to config self.config = config # Save pointer to the data module self.dataset = dataset # # Summaries to compute for this network # self.summary = [] # Normalizer for the input data (they are raw images) # Currently normalized to be between -1 and 1 self.mean = {} self.std = {} for _module in ["kp", "ori", "desc"]: self.mean[_module] = 128.0 self.std[_module] = 128.0 if self.config.use_old_mean_std: self.mean[ "kp"] = 116.4368117568544249706974369473755359649658203125 self.std["kp"] = 88.083076379771597430590190924704074859619140625 self.mean[ "ori"] = 116.4368117568544249706974369473755359649658203125 self.std["ori"] = 88.083076379771597430590190924704074859619140625 self.mean["desc"] = 110.75389862060546875 self.std["desc"] = 61.53688812255859375 # Account for the keypoint scale change while augmenting rotations self.scale_aug = float(get_patch_size(self.config)) / \ float(get_patch_size_no_aug(self.config)) # Allocate placeholders with tf.variable_scope("placeholders"): self._build_placeholders() # Build the network with tf.variable_scope("network"): self._build_network() # Build loss with tf.variable_scope("loss"): self._build_loss() # Build the optimization op with tf.variable_scope("optimization"): self._build_optim() # Build the legacy component. This is only used for accessing old # framework weights. You can safely ignore this part build_legacy(self) # Show all variables in the network show_all_variables() # Add all variables into histogram summary for _module in ["kp", "ori", "desc"]: for _param in self.params[_module]: tf.summary.histogram(_param.name, _param) # Collect all summary (Lazy...) self.summary = tf.summary.merge_all()
def config_to_param(config): """The function that takes care of the transfer to the new framework""" param = paramStruct() # Param Group "dataset" param.dataset.nTestPercent = int(20) param.dataset.dataType = "ECCV" param.dataset.nValidPercent = int(20) param.dataset.fMinKpSize = float(2.0) param.dataset.nPosPerImg = int(-1) # Note that we are passing a list. This module actually supports # concatenating datsets. param.dataset.trainSetList = ["ECCV/" + config.data_name] param.dataset.nNegPerImg = int(1000) param.dataset.nTrainPercent = int(60) # Param Group "patch" if config.old_data_compat: param.patch.nPatchSize = int(get_patch_size(config)) else: param.patch.nPatchSize = int(get_patch_size_no_aug(config)) param.patch.nPatchSizeAug = int(get_patch_size(config)) param.patch.noscale = False param.patch.fNegOverlapTh = float(0.1) param.patch.sNegMineMethod = "use_all_SIFT_points" param.patch.fRatioScale = float(get_ratio_scale(config)) param.patch.fPerturbInfo = np.array([0.2, 0.2, 0.0]).astype(float) if config.old_data_compat: param.patch.nMaxRandomNegMineIter = int(500) else: param.patch.nMaxRandomNegMineIter = int(100) param.patch.fMaxScale = 1.0 param.patch.bPerturb = 1.0 # Param Group "model" param.model.nDescInputSize = int(config.desc_input_size) # override folders from config setattr(param, "data_dir", config.data_dir) setattr(param, "temp_dir", config.temp_dir) setattr(param, "scratch_dir", config.scratch_dir) return param
def _build_network(self): """Define all the architecture here. Use the modules if necessary.""" # Import modules according to the configurations self.modules = {} for _key in ["kp", "ori", "desc"]: self.modules[_key] = importlib.import_module("modules.{}".format( getattr(self.config, "module_" + _key))) # prepare dictionary for the output and parameters of each module self.outputs = {} self.params = {} self.allparams = {} for _key in self.modules: self.outputs[_key] = {} self.params[_key] = [] self.allparams[_key] = [] # create a joint params list # NOTE: params is a list, not a dict! self.params["joint"] = [] self.allparams["joint"] = [] # create outputs placeholder for crop and rot self.outputs["resize"] = {} self.outputs["crop"] = {} self.outputs["rot"] = {} # Actual Network definition with tf.variable_scope("lift"): # Graph construction depends on the subtask subtask = self.config.subtask # ---------------------------------------- # Initial resize for the keypoint module # Includes rotation when augmentations are used # if self.config.use_augmented_set: rot = self.inputs["aug_rot"] else: rot = None self._build_st( module="resize", xyz=None, cs=rot, names=["P1", "P2", "P3", "P4"], out_size=self.config.kp_input_size, reduce_ratio=float(get_patch_size_no_aug(self.config)) / float(get_patch_size(self.config)), ) # ---------------------------------------- # Keypoint Detector # # The keypoint detector takes each patch input and outputs (1) # "score": the score of the patch, (2) "xy": keypoint position in # side the patch. The score output is the soft-maximum (not a # softmax) of the scores. The position output from the network # should be in the form friendly to the spatial # transformer. Outputs are always dictionaries. # Rotate ground truth coordinates when augmenting rotations. aug_rot = self.inputs["aug_rot"] \ if self.config.augment_rotations else None xyz_gt_scaled = self.transform_xyz(self.inputs["xyz"], aug_rot, self.config.batch_size, self.scale_aug, transpose=True, names=["P1", "P2", "P3", "P4"]) self._build_module( module="kp", inputs=self.outputs["resize"], bypass=xyz_gt_scaled, names=["P1", "P2", "P3", "P4"], skip=subtask == "ori" or subtask == "desc", ) # For image based test # self._build_module( # module="kp", # inputs=self.inputs["img"], # bypass=self.inputs["img"], # This is a dummy # names=["img"], # skip=subtask != "kp", # reuse=True, # test_only=True, # ) # ---------------------------------------- # The Crop Spatial Transformer # Output: use the same support region as for the descriptor # xyz_kp_scaled = self.transform_kp(self.outputs["kp"], aug_rot, self.config.batch_size, 1 / self.scale_aug, transpose=False, names=["P1", "P2", "P3"]) self._build_st( module="crop", xyz=xyz_kp_scaled, cs=aug_rot, names=["P1", "P2", "P3"], out_size=self.config.ori_input_size, reduce_ratio=float(self.config.desc_input_size) / float(get_patch_size(self.config)), ) # ---------------------------------------- # Orientation Estimator # # The orientation estimator takes the crop outputs as input and # outputs orientations for the spatial transformer to # use. Actually, since we output cos and sin, we can simply use the # *UNNORMALIZED* version of the two, normalize them, and directly # use it for our affine transform. In short it returns "cs": the # cos and the sin, but unnormalized. Outputs are always # dictionaries. # Bypass: just the GT angle if self.config.augment_rotations: rot = {} for name in ["P1", "P2", "P3"]: rot[name] = self.inputs["angle"][name] - \ self.inputs["aug_rot"][name]["angle"] else: rot = self.inputs["angle"] self._build_module( module="ori", inputs=self.outputs["crop"], bypass=rot, names=["P1", "P2", "P3"], skip=subtask == "kp" or subtask == "desc", ) # ---------------------------------------- # The Rot Spatial Transformer. # - No rotation augmentation: # Operates over the original patch with the ground truth angle when # bypassing. Otherwise, we combine the augmented angle and the # output of the orientation module. # We do not consider rotation augmentations for the descriptor. if self.config.augment_rotations: rot = self.chain_cs(self.inputs["aug_rot"], self.outputs["ori"], names=["P1", "P2", "P3"]) # rot = self.outputs["ori"] # xyz_desc_scaled = self.transform_kp( # self.outputs["kp"], # rot, # self.config.batch_size, # 1 / self.scale_aug, # transpose=False, # names=["P1", "P2", "P3"]) elif self.config.use_augmented_set: rot = self.outputs["ori"] # xyz_desc_scaled = self.transform_kp( # self.outputs["kp"], # rot, # self.config.batch_size, # 1 / self.scale_aug, # transpose=False, # names=["P1", "P2", "P3"]) else: rot = None # xyz_desc_scaled = self.inputs["xyz"] self._build_st( module="rot", xyz=xyz_kp_scaled, cs=rot, names=["P1", "P2", "P3"], out_size=self.config.desc_input_size, reduce_ratio=float(self.config.desc_input_size) / float(get_patch_size(self.config)), ) # ---------------------------------------- # Feature Descriptor # # The descriptor simply computes the descriptors, given the patch. self._build_module( module="desc", inputs=self.outputs["rot"], bypass=self.outputs["rot"], names=["P1", "P2", "P3"], skip=False, )
def _build_placeholders(self): """Builds Tensorflow Placeholders""" # The inputs placeholder dictionary self.inputs = {} # multiple types # LATER: label might not be necessary types = ["patch", "xyz", "angle"] if self.config.use_augmented_set: types += ["aug_rot"] for _type in types: self.inputs[_type] = {} # We *ARE* going to specify the input size, since the spatial # transformer implementation *REQUIRES* us to do so. Note that this # has to be dealt with in the validate loop. # batch_size = self.config.batch_size # Use variable batch size batch_size = None # We also read nchannel from the configuration. Make sure that the data # module is behaving accordingly nchannel = self.config.nchannel # Get the input patch size from config patch_size = float(get_patch_size(self.config)) # Compute the r_base (i.e. overlap radius when computing the keypoint # overlaps. self.r_base = (float(self.config.desc_input_size) / float(get_patch_size_no_aug(self.config))) # P1, P2, P3, P4 in the paper. P1, P2, P3 are keypoints, P1, P2 # correspond, P1, and P3 don't correspond, P4 is a non-keypoint patch. for _name in ["P1", "P2", "P3", "P4"]: self.inputs["patch"][_name] = tf.placeholder( tf.float32, shape=[batch_size, patch_size, patch_size, nchannel], name=_name, ) self.inputs["xyz"][_name] = tf.placeholder( tf.float32, shape=[ batch_size, 3, ], name=_name, ) self.inputs["angle"][_name] = tf.placeholder( tf.float32, shape=[ batch_size, 1, ], name=_name, ) if self.config.use_augmented_set: self.inputs["aug_rot"][_name] = { "cs": tf.placeholder( tf.float32, shape=[ batch_size, 2, ], name=_name, ), "angle": tf.placeholder( tf.float32, shape=[ batch_size, 1, ], name=_name, ) } # Add to summary to view them image_summary_nhwc( "input/" + _name, self.inputs["patch"][_name], ) # For Image based test self.inputs["img"] = { "img": tf.placeholder( tf.float32, shape=[None, None, None, nchannel], name="img", ) } # For runmode in dropout and batch_norm self.is_training = tf.placeholder( tf.bool, shape=(), name="is_training", )
def process(inputs, bypass, name, skip, config, is_training): """WRITEME. LATER: Clean up inputs: input to the network bypass: gt to by used when trying to bypass name: name of the siamese branch skip: whether to apply the bypass information """ # let's look at the inputs that get fed into this layer except when we are # looking at the whole image if name != "img": image_summary_nhwc(name + "-input", inputs) if skip: return bypass_kp(bypass) # we always expect a dictionary as return value to be more explicit res = {} # now abuse cur_in so that we can simply copy paste cur_in = inputs # lets apply batch normalization on the input - we did not normalize the # input range! # with tf.variable_scope("input-bn"): # if config.use_input_batch_norm: # cur_in = batch_norm(cur_in, training=is_training) with tf.variable_scope("conv-ghh-1"): nu = 1 ns = 4 nm = 4 cur_in = conv_2d(cur_in, config.kp_filter_size, nu * ns * nm, 1, "VALID") # batch norm on the output of convolutions! # if config.use_batch_norm: # cur_in = batch_norm(cur_in, training=is_training) cur_in = ghh(cur_in, ns, nm) res["scoremap-uncut"] = cur_in # --------------------------------------------------------------------- # Check how much we need to cut kp_input_size = config.kp_input_size patch_size = get_patch_size_no_aug(config) desc_input_size = config.desc_input_size rf = float(kp_input_size) / float(patch_size) input_shape = get_tensor_shape(inputs) uncut_shape = get_tensor_shape(cur_in) req_boundary = np.ceil(rf * np.sqrt(2) * desc_input_size / 2.0).astype(int) cur_boundary = (input_shape[2] - uncut_shape[2]) // 2 crop_size = req_boundary - cur_boundary # Stop building the network outputs if we are building for the full image if name == "img": return res # # Debug messages # resized_shape = get_tensor_shape(inputs) # print(' -- kp_info: output score map shape {}'.format(uncut_shape)) # print(' -- kp_info: input size after resizing {}'.format(resized_shape[2])) # print(' -- kp_info: output score map size {}'.format(uncut_shape[2])) # print(' -- kp info: required boundary {}'.format(req_boundary)) # print(' -- kp info: current boundary {}'.format(cur_boundary)) # print(' -- kp_info: additional crop size {}'.format(crop_size)) # print(' -- kp_info: additional crop size {}'.format(crop_size)) # print(' -- kp_info: final cropped score map size {}'.format( # uncut_shape[2] - 2 * crop_size)) # print(' -- kp_info: movement ratio will be {}'.format(( # float(uncut_shape[2] - 2.0 * crop_size) / # float(kp_input_size - 1)))) # Crop center cur_in = cur_in[:, crop_size:-crop_size, crop_size:-crop_size, :] res["scoremap"] = cur_in # --------------------------------------------------------------------- # Mapping layer to x,y,z com_strength = config.kp_com_strength # eps = 1e-10 scoremap_shape = get_tensor_shape(cur_in) od = len(scoremap_shape) # CoM to get the coordinates pos_array_x = tf.range(scoremap_shape[2], dtype=tf.float32) pos_array_y = tf.range(scoremap_shape[1], dtype=tf.float32) out = cur_in max_out = tf.reduce_max(out, axis=list(range(1, od)), keep_dims=True) o = tf.exp(com_strength * (out - max_out)) # + eps sum_o = tf.reduce_sum(o, axis=list(range(1, od)), keep_dims=True) x = tf.reduce_sum(o * tf.reshape(pos_array_x, [1, 1, -1, 1]), axis=list(range(1, od)), keep_dims=True) / sum_o y = tf.reduce_sum(o * tf.reshape(pos_array_y, [1, -1, 1, 1]), axis=list(range(1, od)), keep_dims=True) / sum_o # Remove the unecessary dimensions (i.e. flatten them) x = tf.reshape(x, (-1, )) y = tf.reshape(y, (-1, )) # -------------- # Turn x, and y into range -1 to 1, where the patch size is # mapped to -1 and 1 orig_patch_width = (scoremap_shape[2] + np.cast["float32"](req_boundary * 2.0)) orig_patch_height = (scoremap_shape[1] + np.cast["float32"](req_boundary * 2.0)) x = ((x + np.cast["float32"](req_boundary)) / np.cast["float32"]( (orig_patch_width - 1.0) * 0.5) - np.cast["float32"](1.0)) y = ((y + np.cast["float32"](req_boundary)) / np.cast["float32"]( (orig_patch_height - 1.0) * 0.5) - np.cast["float32"](1.0)) # -------------- # No movement in z direction z = tf.zeros_like(x) res["xyz"] = tf.stack([x, y, z], axis=1) # --------------------------------------------------------------------- # Mapping layer to x,y,z res["score"] = softmax( res["scoremap"], axis=list(range(1, od)), softmax_strength=config.kp_scoremap_softmax_strength) return res
def __init__(self, sess, config, dataset, force_mean_std=None): # Save pointer to the tensorflow session self.sess = sess # Save pointer to config self.config = config # Save pointer to the data module self.dataset = dataset # # Summaries to compute for this network # self.summary = [] # Normalizer for the input data (they are raw images) # Currently normalized to be between -1 and 1 self.mean = {} self.std = {} # Load values if they already exist if force_mean_std is not None: self.mean = force_mean_std["mean"] self.std = force_mean_std["std"] elif self.config.mean_std_type == "hardcoded": print("-- Using default values for mean/std") for _module in ["kp", "ori", "desc"]: self.mean[_module] = 128.0 self.std[_module] = 128.0 elif self.config.mean_std_type == "old": print("-- Using old (piccadilly) values for mean/std") self.mean[ "kp"] = 116.4368117568544249706974369473755359649658203125 self.std["kp"] = 88.083076379771597430590190924704074859619140625 self.mean[ "ori"] = 116.4368117568544249706974369473755359649658203125 self.std["ori"] = 88.083076379771597430590190924704074859619140625 self.mean["desc"] = 110.75389862060546875 self.std["desc"] = 61.53688812255859375 elif self.config.mean_std_type == "dataset": t = time() print("-- Recomputing dataset mean/std...") # Account for augmented sets if self.config.use_augmented_set: b = int( (get_patch_size(config) - get_patch_size_no_aug(config)) / 2) else: b = 0 if b > 0: _d = self.dataset.data["train"]["patch"][:, :, b:-b, b:-b] else: _d = self.dataset.data["train"]["patch"][:, :, :, :] # Do this incrementally to avoid memory problems jump = 1000 data_mean = np.zeros(_d.shape[0]) data_std = np.zeros(_d.shape[0]) for i in tqdm(range(0, _d.shape[0], jump)): data_mean[i:i + jump] = _d[i:i + jump].mean() data_std[i:i + jump] = _d[i:i + jump].std() data_mean = data_mean.mean() data_std = data_std.mean() print('-- Dataset mean: {0:.03f}, std = {1:.03f}'.format( data_mean, data_std)) for _module in ["kp", "ori", "desc"]: self.mean[_module] = data_mean self.std[_module] = data_std print("-- Done in {0:.02f} sec".format(time() - t)) elif self.config.mean_std_type == "batch": t = time() print("-- Will recompute mean/std per batch...") elif self.config.mean_std_type == "sample": t = time() print("-- Will recompute mean/std per sample...") elif self.config.mean_std_type == "sequence": t = time() print("-- Will recompute mean/std per sequence...") raise RuntimeError("TODO") else: raise RuntimeError("Unknown mean-std strategy") # Account for the keypoint scale change while augmenting rotations self.scale_aug = float(get_patch_size(self.config)) / \ float(get_patch_size_no_aug(self.config)) # Allocate placeholders with tf.variable_scope("placeholders"): self._build_placeholders() # Build the network with tf.variable_scope("network"): self._build_network() # Build loss with tf.variable_scope("loss"): self._build_loss() # Build the optimization op with tf.variable_scope("optimization"): self._build_optim() # Build the legacy component. This is only used for accessing old # framework weights. You can safely ignore this part # build_legacy(self) # Show all variables in the network show_all_variables() # Add all variables into histogram summary for _module in ["kp", "ori", "desc"]: for _param in self.params[_module]: tf.summary.histogram(_param.name, _param) # Collect all summary (Lazy...) self.summary = tf.summary.merge_all()