def load_data(self): """Returns the patch, given the keypoint structure LATER: Cleanup. We currently re-use the utils we had from data extraction. """ # Load image img = cv2.imread(self.config.test_img_file) # If color image, turn it to gray if len(img.shape) == 3: img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) in_dim = 1 # Load keypoints kp = np.asarray(loadKpListFromTxt(self.config.test_kp_file)) # Use load patches function # Assign dummy values to y, ID, angle y = np.zeros((len(kp), )) ID = np.zeros((len(kp), ), dtype='int64') # angle = np.zeros((len(kp),)) angle = np.pi / 180.0 * kp[:, IDX_ANGLE] # store angle in radians # load patches with id (drop out of boundary) bPerturb = False fPerturbInfo = np.zeros((3, )) dataset = load_patches(img, kp, y, ID, angle, get_ratio_scale(self.config), 1.0, int(get_patch_size(self.config)), int(self.config.desc_input_size), in_dim, bPerturb, fPerturbInfo, bReturnCoords=True, is_test=True) # Change old dataset return structure to necessary data x = dataset[0] # y = dataset[1] # ID = dataset[2] pos = dataset[3] angle = dataset[4] coords = dataset[5] # Return the dictionary structure cur_data = {} cur_data["patch"] = np.transpose(x, (0, 2, 3, 1)) # In NHWC cur_data["kps"] = coords cur_data["xyz"] = pos # Make sure that angle is a Nx1 vector cur_data["angle"] = np.reshape(angle, (-1, 1)) return cur_data
def __init__(self, config, rng): # Placeholder for the data dictionary self.data = {} # Use the old data module to load data. Processing data loading for # differen tasks for task in ["train", "valid", "test"]: param = config_to_param(config) old_data = old_impl.data_obj(param, task) # Some sanity check to make sure that the data module is behaving # as intended. assert old_data.patch_height == old_data.patch_width assert old_data.patch_height == get_patch_size(config) assert old_data.num_channel == config.nchannel assert old_data.out_dim == config.nchannel self.data[task] = { "patch": old_data.x, "xyz": old_data.pos, "angle": old_data.angle.reshape(-1, 1), "ID": old_data.ID, } # data ordering of this class self.data_order = "NCHW" # Save the hash, for the pairs folder self.hash = old_data.hash
def __init__(self, sess, config, dataset): # Save pointer to the tensorflow session self.sess = sess # Save pointer to config self.config = config # Save pointer to the data module self.dataset = dataset # # Summaries to compute for this network # self.summary = [] # Normalizer for the input data (they are raw images) # Currently normalized to be between -1 and 1 self.mean = {} self.std = {} for _module in ["kp", "ori", "desc"]: self.mean[_module] = 128.0 self.std[_module] = 128.0 if self.config.use_old_mean_std: self.mean[ "kp"] = 116.4368117568544249706974369473755359649658203125 self.std["kp"] = 88.083076379771597430590190924704074859619140625 self.mean[ "ori"] = 116.4368117568544249706974369473755359649658203125 self.std["ori"] = 88.083076379771597430590190924704074859619140625 self.mean["desc"] = 110.75389862060546875 self.std["desc"] = 61.53688812255859375 # Account for the keypoint scale change while augmenting rotations self.scale_aug = float(get_patch_size(self.config)) / \ float(get_patch_size_no_aug(self.config)) # Allocate placeholders with tf.variable_scope("placeholders"): self._build_placeholders() # Build the network with tf.variable_scope("network"): self._build_network() # Build loss with tf.variable_scope("loss"): self._build_loss() # Build the optimization op with tf.variable_scope("optimization"): self._build_optim() # Build the legacy component. This is only used for accessing old # framework weights. You can safely ignore this part build_legacy(self) # Show all variables in the network show_all_variables() # Add all variables into histogram summary for _module in ["kp", "ori", "desc"]: for _param in self.params[_module]: tf.summary.histogram(_param.name, _param) # Collect all summary (Lazy...) self.summary = tf.summary.merge_all()
def config_to_param(config): """The function that takes care of the transfer to the new framework""" param = paramStruct() # Param Group "dataset" param.dataset.nTestPercent = int(20) param.dataset.dataType = "ECCV" param.dataset.nValidPercent = int(20) param.dataset.fMinKpSize = float(2.0) param.dataset.nPosPerImg = int(-1) # Note that we are passing a list. This module actually supports # concatenating datsets. param.dataset.trainSetList = ["ECCV/" + config.data_name] param.dataset.nNegPerImg = int(1000) param.dataset.nTrainPercent = int(60) # Param Group "patch" if config.old_data_compat: param.patch.nPatchSize = int(get_patch_size(config)) else: param.patch.nPatchSize = int(get_patch_size_no_aug(config)) param.patch.nPatchSizeAug = int(get_patch_size(config)) param.patch.noscale = False param.patch.fNegOverlapTh = float(0.1) param.patch.sNegMineMethod = "use_all_SIFT_points" param.patch.fRatioScale = float(get_ratio_scale(config)) param.patch.fPerturbInfo = np.array([0.2, 0.2, 0.0]).astype(float) if config.old_data_compat: param.patch.nMaxRandomNegMineIter = int(500) else: param.patch.nMaxRandomNegMineIter = int(100) param.patch.fMaxScale = 1.0 param.patch.bPerturb = 1.0 # Param Group "model" param.model.nDescInputSize = int(config.desc_input_size) # override folders from config setattr(param, "data_dir", config.data_dir) setattr(param, "temp_dir", config.temp_dir) setattr(param, "scratch_dir", config.scratch_dir) return param
def _compute_kp(self): """Compute Keypoints. LATER: Clean up code """ total_time = 0.0 # Read image image_color, image_gray, load_prep_time = self.dataset.load_image() # check size image_height = image_gray.shape[0] image_width = image_gray.shape[1] # Multiscale Testing scl_intv = self.config.test_scl_intv # min_scale_log2 = 1 # min scale = 2 # max_scale_log2 = 4 # max scale = 16 min_scale_log2 = self.config.test_min_scale_log2 max_scale_log2 = self.config.test_max_scale_log2 # Test starting with double scale if small image min_hw = np.min(image_gray.shape[:2]) # for the case of testing on same scale, do not double scale if min_hw <= 1600 and min_scale_log2 != max_scale_log2: print("INFO: Testing double scale") min_scale_log2 -= 1 # range of scales to check num_division = (max_scale_log2 - min_scale_log2) * (scl_intv + 1) + 1 scales_to_test = 2**np.linspace(min_scale_log2, max_scale_log2, num_division) # convert scale to image resizes resize_to_test = ((float(self.config.kp_input_size - 1) / 2.0) / (get_ratio_scale(self.config) * scales_to_test)) # check if resize is valid min_hw_after_resize = resize_to_test * np.min(image_gray.shape[:2]) is_resize_valid = min_hw_after_resize > self.config.kp_filter_size + 1 # if there are invalid scales and resizes if not np.prod(is_resize_valid): # find first invalid # first_invalid = np.where(True - is_resize_valid)[0][0] first_invalid = np.where(~is_resize_valid)[0][0] # remove scales from testing scales_to_test = scales_to_test[:first_invalid] resize_to_test = resize_to_test[:first_invalid] print('resize to test is {}'.format(resize_to_test)) print('scales to test is {}'.format(scales_to_test)) # Run for each scale test_res_list = [] for resize in resize_to_test: # resize according to how we extracted patches when training new_height = np.cast['int'](np.round(image_height * resize)) new_width = np.cast['int'](np.round(image_width * resize)) start_time = time.clock() image = cv2.resize(image_gray, (new_width, new_height)) end_time = time.clock() resize_time = (end_time - start_time) * 1000.0 print("Time taken to resize image is {}ms".format(resize_time)) total_time += resize_time # run test # LATER: Compatibility with the previous implementations start_time = time.clock() # Run the network to get the scoremap (the valid region only) scoremap = None if self.config.test_kp_use_tensorflow: scoremap = self.network.test( self.config.subtask, image.reshape(1, new_height, new_width, 1)).squeeze() else: # OpenCV Version raise NotImplementedError("TODO: Implement OpenCV Version") end_time = time.clock() compute_time = (end_time - start_time) * 1000.0 print("Time taken for image size {}" " is {} milliseconds".format(image.shape, compute_time)) total_time += compute_time # pad invalid regions and add to list start_time = time.clock() test_res_list.append( np.pad(scoremap, int((self.config.kp_filter_size - 1) / 2), mode='constant', constant_values=-np.inf)) end_time = time.clock() pad_time = (end_time - start_time) * 1000.0 print("Time taken for padding and stacking is {} ms".format( pad_time)) total_time += pad_time # ------------------------------------------------------------------------ # Non-max suppresion and draw. # The nonmax suppression implemented here is very very slow. Consider # this as just a proof of concept implementation as of now. # Standard nearby : nonmax will check approximately the same area as # descriptor support region. nearby = int( np.round((0.5 * (self.config.kp_input_size - 1.0) * float(self.config.desc_input_size) / float(get_patch_size(self.config))))) fNearbyRatio = self.config.test_nearby_ratio # Multiply by quarter to compensate fNearbyRatio *= 0.25 nearby = int(np.round(nearby * fNearbyRatio)) nearby = max(nearby, 1) nms_intv = self.config.test_nms_intv edge_th = self.config.test_edge_th print("Performing NMS") start_time = time.clock() res_list = test_res_list # check whether the return result for socre is right # print(res_list[0][400:500,300:400]) XYZS = get_XYZS_from_res_list( res_list, resize_to_test, scales_to_test, nearby, edge_th, scl_intv, nms_intv, do_interpolation=True, ) end_time = time.clock() XYZS = XYZS[:self.config.test_num_keypoint] # For debugging # TODO: Remove below draw_XYZS_to_img(XYZS, image_color, self.config.test_out_file + '.jpg') nms_time = (end_time - start_time) * 1000.0 print("NMS time is {} ms".format(nms_time)) total_time += nms_time print("Total time for detection is {} ms".format(total_time)) # if bPrintTime: # # Also print to a file by appending # with open("../timing-code/timing.txt", "a") as timing_file: # print("------ Keypoint Timing ------\n" # "NMS time is {} ms\n" # "Total time is {} ms\n".format( # nms_time, total_time # ), # file=timing_file) # # resize score to original image size # res_list = [cv2.resize(score, # (image_width, image_height), # interpolation=cv2.INTER_NEAREST) # for score in test_res_list] # # make as np array # res_scores = np.asarray(res_list) # with h5py.File('test/scores.h5', 'w') as score_file: # score_file['score'] = res_scores # ------------------------------------------------------------------------ # Save as keypoint file to be used by the oxford thing print("Turning into kp_list") kp_list = XYZS2kpList(XYZS) # note that this is already sorted # ------------------------------------------------------------------------ # LATER: take care of the orientations somehow... # # Also compute angles with the SIFT method, since the keypoint # # component alone has no orientations. # print("Recomputing Orientations") # new_kp_list, _ = recomputeOrientation(image_gray, kp_list, # bSingleOrientation=True) print("Saving to txt") saveKpListToTxt(kp_list, None, self.config.test_out_file)
def compute_kp(self, image_gray): """Compute Keypoints. LATER: Clean up code """ total_time = 0.0 # check size image_height = image_gray.shape[0] image_width = image_gray.shape[1] # Multiscale Testing scl_intv = self.config.test_scl_intv # min_scale_log2 = 1 # min scale = 2 # max_scale_log2 = 4 # max scale = 16 min_scale_log2 = self.config.test_min_scale_log2 max_scale_log2 = self.config.test_max_scale_log2 # Test starting with double scale if small image min_hw = np.min(image_gray.shape[:2]) # for the case of testing on same scale, do not double scale if min_hw <= 1600 and min_scale_log2!=max_scale_log2: print("INFO: Testing double scale") min_scale_log2 -= 1 # range of scales to check num_division = (max_scale_log2 - min_scale_log2) * (scl_intv + 1) + 1 scales_to_test = 2**np.linspace(min_scale_log2, max_scale_log2, num_division) # convert scale to image resizes resize_to_test = ((float(self.config.kp_input_size - 1) / 2.0) / (get_ratio_scale(self.config) * scales_to_test)) # check if resize is valid min_hw_after_resize = resize_to_test * np.min(image_gray.shape[:2]) is_resize_valid = min_hw_after_resize > self.config.kp_filter_size + 1 # if there are invalid scales and resizes if not np.prod(is_resize_valid): # find first invalid first_invalid = np.where(~is_resize_valid)[0][0] # remove scales from testing scales_to_test = scales_to_test[:first_invalid] resize_to_test = resize_to_test[:first_invalid] print('resize to test is {}'.format(resize_to_test)) print('scales to test is {}'.format(scales_to_test)) # Run for each scale test_res_list = [] for resize in resize_to_test: # resize according to how we extracted patches when training new_height = np.cast['int'](np.round(image_height * resize)) new_width = np.cast['int'](np.round(image_width * resize)) start_time = time.clock() image = cv2.resize(image_gray, (new_width, new_height)) end_time = time.clock() resize_time = (end_time - start_time) * 1000.0 print("Time taken to resize image is {}ms".format( resize_time )) total_time += resize_time # run test # LATER: Compatibility with the previous implementations start_time = time.clock() # Run the network to get the scoremap (the valid region only) scoremap = None if self.config.test_kp_use_tensorflow: scoremap = self.graph_kp.test_squeeze(image.reshape(1, new_height, new_width, 1)) else: # OpenCV Version raise NotImplementedError( "TODO: Implement OpenCV Version") end_time = time.clock() compute_time = (end_time - start_time) * 1000.0 print("Time taken for image size {}" " is {} milliseconds".format( image.shape, compute_time)) total_time += compute_time # pad invalid regions and add to list start_time = time.clock() test_res_list.append( np.pad(scoremap, int((self.config.kp_filter_size - 1) / 2), mode='constant', constant_values=-np.inf) ) end_time = time.clock() pad_time = (end_time - start_time) * 1000.0 print("Time taken for padding and stacking is {} ms".format( pad_time )) total_time += pad_time # ------------------------------------------------------------------------ # Non-max suppresion and draw. # The nonmax suppression implemented here is very very slow. Consider # this as just a proof of concept implementation as of now. # Standard nearby : nonmax will check approximately the same area as # descriptor support region. nearby = int(np.round( (0.5 * (self.config.kp_input_size - 1.0) * float(self.config.desc_input_size) / float(get_patch_size(self.config))) )) fNearbyRatio = self.config.test_nearby_ratio # Multiply by quarter to compensate fNearbyRatio *= 0.25 nearby = int(np.round(nearby * fNearbyRatio)) nearby = max(nearby, 1) nms_intv = self.config.test_nms_intv edge_th = self.config.test_edge_th print("Performing NMS") start_time = time.clock() res_list = test_res_list #print(res_list[0][400:500,300:400]) # check whether the return result for socre is right XYZS = get_XYZS_from_res_list( res_list, resize_to_test, scales_to_test, nearby, edge_th, scl_intv, nms_intv, do_interpolation=True, ) end_time = time.clock() XYZS = XYZS[:self.config.test_num_keypoint] nms_time = (end_time - start_time) * 1000.0 print("NMS time is {} ms".format(nms_time)) total_time += nms_time print("Total time for detection is {} ms".format(total_time)) # ------------------------------------------------------------------------ # Save as keypoint file to be used by the oxford thing print("Turning into kp_list") kp_list = XYZS2kpList(XYZS) # note that this is already sorted return kp_list
def _build_network(self): """Define all the architecture here. Use the modules if necessary.""" # Import modules according to the configurations self.modules = {} for _key in ["kp", "ori", "desc"]: self.modules[_key] = importlib.import_module("modules.{}".format( getattr(self.config, "module_" + _key))) # prepare dictionary for the output and parameters of each module self.outputs = {} self.params = {} self.allparams = {} for _key in self.modules: self.outputs[_key] = {} self.params[_key] = [] self.allparams[_key] = [] # create a joint params list # NOTE: params is a list, not a dict! self.params["joint"] = [] self.allparams["joint"] = [] # create outputs placeholder for crop and rot self.outputs["resize"] = {} self.outputs["crop"] = {} self.outputs["rot"] = {} # Actual Network definition with tf.variable_scope("lift"): # Graph construction depends on the subtask subtask = self.config.subtask # ---------------------------------------- # Initial resize for the keypoint module # Includes rotation when augmentations are used # if self.config.use_augmented_set: rot = self.inputs["aug_rot"] else: rot = None self._build_st( module="resize", xyz=None, cs=rot, names=["P1", "P2", "P3", "P4"], out_size=self.config.kp_input_size, reduce_ratio=float(get_patch_size_no_aug(self.config)) / float(get_patch_size(self.config)), ) # ---------------------------------------- # Keypoint Detector # # The keypoint detector takes each patch input and outputs (1) # "score": the score of the patch, (2) "xy": keypoint position in # side the patch. The score output is the soft-maximum (not a # softmax) of the scores. The position output from the network # should be in the form friendly to the spatial # transformer. Outputs are always dictionaries. # Rotate ground truth coordinates when augmenting rotations. aug_rot = self.inputs["aug_rot"] \ if self.config.augment_rotations else None xyz_gt_scaled = self.transform_xyz(self.inputs["xyz"], aug_rot, self.config.batch_size, self.scale_aug, transpose=True, names=["P1", "P2", "P3", "P4"]) self._build_module( module="kp", inputs=self.outputs["resize"], bypass=xyz_gt_scaled, names=["P1", "P2", "P3", "P4"], skip=subtask == "ori" or subtask == "desc", ) # For image based test # self._build_module( # module="kp", # inputs=self.inputs["img"], # bypass=self.inputs["img"], # This is a dummy # names=["img"], # skip=subtask != "kp", # reuse=True, # test_only=True, # ) # ---------------------------------------- # The Crop Spatial Transformer # Output: use the same support region as for the descriptor # xyz_kp_scaled = self.transform_kp(self.outputs["kp"], aug_rot, self.config.batch_size, 1 / self.scale_aug, transpose=False, names=["P1", "P2", "P3"]) self._build_st( module="crop", xyz=xyz_kp_scaled, cs=aug_rot, names=["P1", "P2", "P3"], out_size=self.config.ori_input_size, reduce_ratio=float(self.config.desc_input_size) / float(get_patch_size(self.config)), ) # ---------------------------------------- # Orientation Estimator # # The orientation estimator takes the crop outputs as input and # outputs orientations for the spatial transformer to # use. Actually, since we output cos and sin, we can simply use the # *UNNORMALIZED* version of the two, normalize them, and directly # use it for our affine transform. In short it returns "cs": the # cos and the sin, but unnormalized. Outputs are always # dictionaries. # Bypass: just the GT angle if self.config.augment_rotations: rot = {} for name in ["P1", "P2", "P3"]: rot[name] = self.inputs["angle"][name] - \ self.inputs["aug_rot"][name]["angle"] else: rot = self.inputs["angle"] self._build_module( module="ori", inputs=self.outputs["crop"], bypass=rot, names=["P1", "P2", "P3"], skip=subtask == "kp" or subtask == "desc", ) # ---------------------------------------- # The Rot Spatial Transformer. # - No rotation augmentation: # Operates over the original patch with the ground truth angle when # bypassing. Otherwise, we combine the augmented angle and the # output of the orientation module. # We do not consider rotation augmentations for the descriptor. if self.config.augment_rotations: rot = self.chain_cs(self.inputs["aug_rot"], self.outputs["ori"], names=["P1", "P2", "P3"]) # rot = self.outputs["ori"] # xyz_desc_scaled = self.transform_kp( # self.outputs["kp"], # rot, # self.config.batch_size, # 1 / self.scale_aug, # transpose=False, # names=["P1", "P2", "P3"]) elif self.config.use_augmented_set: rot = self.outputs["ori"] # xyz_desc_scaled = self.transform_kp( # self.outputs["kp"], # rot, # self.config.batch_size, # 1 / self.scale_aug, # transpose=False, # names=["P1", "P2", "P3"]) else: rot = None # xyz_desc_scaled = self.inputs["xyz"] self._build_st( module="rot", xyz=xyz_kp_scaled, cs=rot, names=["P1", "P2", "P3"], out_size=self.config.desc_input_size, reduce_ratio=float(self.config.desc_input_size) / float(get_patch_size(self.config)), ) # ---------------------------------------- # Feature Descriptor # # The descriptor simply computes the descriptors, given the patch. self._build_module( module="desc", inputs=self.outputs["rot"], bypass=self.outputs["rot"], names=["P1", "P2", "P3"], skip=False, )
def _build_placeholders(self): """Builds Tensorflow Placeholders""" # The inputs placeholder dictionary self.inputs = {} # multiple types # LATER: label might not be necessary types = ["patch", "xyz", "angle"] if self.config.use_augmented_set: types += ["aug_rot"] for _type in types: self.inputs[_type] = {} # We *ARE* going to specify the input size, since the spatial # transformer implementation *REQUIRES* us to do so. Note that this # has to be dealt with in the validate loop. # batch_size = self.config.batch_size # Use variable batch size batch_size = None # We also read nchannel from the configuration. Make sure that the data # module is behaving accordingly nchannel = self.config.nchannel # Get the input patch size from config patch_size = float(get_patch_size(self.config)) # Compute the r_base (i.e. overlap radius when computing the keypoint # overlaps. self.r_base = (float(self.config.desc_input_size) / float(get_patch_size_no_aug(self.config))) # P1, P2, P3, P4 in the paper. P1, P2, P3 are keypoints, P1, P2 # correspond, P1, and P3 don't correspond, P4 is a non-keypoint patch. for _name in ["P1", "P2", "P3", "P4"]: self.inputs["patch"][_name] = tf.placeholder( tf.float32, shape=[batch_size, patch_size, patch_size, nchannel], name=_name, ) self.inputs["xyz"][_name] = tf.placeholder( tf.float32, shape=[ batch_size, 3, ], name=_name, ) self.inputs["angle"][_name] = tf.placeholder( tf.float32, shape=[ batch_size, 1, ], name=_name, ) if self.config.use_augmented_set: self.inputs["aug_rot"][_name] = { "cs": tf.placeholder( tf.float32, shape=[ batch_size, 2, ], name=_name, ), "angle": tf.placeholder( tf.float32, shape=[ batch_size, 1, ], name=_name, ) } # Add to summary to view them image_summary_nhwc( "input/" + _name, self.inputs["patch"][_name], ) # For Image based test self.inputs["img"] = { "img": tf.placeholder( tf.float32, shape=[None, None, None, nchannel], name="img", ) } # For runmode in dropout and batch_norm self.is_training = tf.placeholder( tf.bool, shape=(), name="is_training", )
def __init__(self, sess, config, dataset, force_mean_std=None): # Save pointer to the tensorflow session self.sess = sess # Save pointer to config self.config = config # Save pointer to the data module self.dataset = dataset # # Summaries to compute for this network # self.summary = [] # Normalizer for the input data (they are raw images) # Currently normalized to be between -1 and 1 self.mean = {} self.std = {} # Load values if they already exist if force_mean_std is not None: self.mean = force_mean_std["mean"] self.std = force_mean_std["std"] elif self.config.mean_std_type == "hardcoded": print("-- Using default values for mean/std") for _module in ["kp", "ori", "desc"]: self.mean[_module] = 128.0 self.std[_module] = 128.0 elif self.config.mean_std_type == "old": print("-- Using old (piccadilly) values for mean/std") self.mean[ "kp"] = 116.4368117568544249706974369473755359649658203125 self.std["kp"] = 88.083076379771597430590190924704074859619140625 self.mean[ "ori"] = 116.4368117568544249706974369473755359649658203125 self.std["ori"] = 88.083076379771597430590190924704074859619140625 self.mean["desc"] = 110.75389862060546875 self.std["desc"] = 61.53688812255859375 elif self.config.mean_std_type == "dataset": t = time() print("-- Recomputing dataset mean/std...") # Account for augmented sets if self.config.use_augmented_set: b = int( (get_patch_size(config) - get_patch_size_no_aug(config)) / 2) else: b = 0 if b > 0: _d = self.dataset.data["train"]["patch"][:, :, b:-b, b:-b] else: _d = self.dataset.data["train"]["patch"][:, :, :, :] # Do this incrementally to avoid memory problems jump = 1000 data_mean = np.zeros(_d.shape[0]) data_std = np.zeros(_d.shape[0]) for i in tqdm(range(0, _d.shape[0], jump)): data_mean[i:i + jump] = _d[i:i + jump].mean() data_std[i:i + jump] = _d[i:i + jump].std() data_mean = data_mean.mean() data_std = data_std.mean() print('-- Dataset mean: {0:.03f}, std = {1:.03f}'.format( data_mean, data_std)) for _module in ["kp", "ori", "desc"]: self.mean[_module] = data_mean self.std[_module] = data_std print("-- Done in {0:.02f} sec".format(time() - t)) elif self.config.mean_std_type == "batch": t = time() print("-- Will recompute mean/std per batch...") elif self.config.mean_std_type == "sample": t = time() print("-- Will recompute mean/std per sample...") elif self.config.mean_std_type == "sequence": t = time() print("-- Will recompute mean/std per sequence...") raise RuntimeError("TODO") else: raise RuntimeError("Unknown mean-std strategy") # Account for the keypoint scale change while augmenting rotations self.scale_aug = float(get_patch_size(self.config)) / \ float(get_patch_size_no_aug(self.config)) # Allocate placeholders with tf.variable_scope("placeholders"): self._build_placeholders() # Build the network with tf.variable_scope("network"): self._build_network() # Build loss with tf.variable_scope("loss"): self._build_loss() # Build the optimization op with tf.variable_scope("optimization"): self._build_optim() # Build the legacy component. This is only used for accessing old # framework weights. You can safely ignore this part # build_legacy(self) # Show all variables in the network show_all_variables() # Add all variables into histogram summary for _module in ["kp", "ori", "desc"]: for _param in self.params[_module]: tf.summary.histogram(_param.name, _param) # Collect all summary (Lazy...) self.summary = tf.summary.merge_all()
return True # Save successful except: return False # Save failed ####################### configuration ############################# print 'Reading configuration...' config = read_config(args.config) cfg_name = args.model out_name = args.output CNN_INPUT_DIR = config['training_on_patches']['input_dir'] # input image dimensions PATCH_SIZE_W, PATCH_SIZE_D = get_patch_size(CNN_INPUT_DIR) img_rows, img_cols = PATCH_SIZE_W, PATCH_SIZE_D batch_size = config['training_on_patches']['batch_size'] nb_epoch = config['training_on_patches']['nb_epoch'] nb_classes = config['training_on_patches']['nb_classes'] ###################### CNN commpilation ########################### print 'Compiling CNN model...' with tf.device('/gpu:' + args.gpu): model = load_model(cfg_name) sgd = SGD(lr=0.002, decay=1e-5, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss={ 'em_trk_none_netout': 'categorical_crossentropy',