예제 #1
0
    def load_data(self):
        """Returns the patch, given the keypoint structure

        LATER: Cleanup. We currently re-use the utils we had from data
               extraction.

        """

        # Load image
        img = cv2.imread(self.config.test_img_file)
        # If color image, turn it to gray
        if len(img.shape) == 3:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        in_dim = 1

        # Load keypoints
        kp = np.asarray(loadKpListFromTxt(self.config.test_kp_file))

        # Use load patches function
        # Assign dummy values to y, ID, angle
        y = np.zeros((len(kp), ))
        ID = np.zeros((len(kp), ), dtype='int64')
        # angle = np.zeros((len(kp),))
        angle = np.pi / 180.0 * kp[:, IDX_ANGLE]  # store angle in radians

        # load patches with id (drop out of boundary)
        bPerturb = False
        fPerturbInfo = np.zeros((3, ))
        dataset = load_patches(img,
                               kp,
                               y,
                               ID,
                               angle,
                               get_ratio_scale(self.config),
                               1.0,
                               int(get_patch_size(self.config)),
                               int(self.config.desc_input_size),
                               in_dim,
                               bPerturb,
                               fPerturbInfo,
                               bReturnCoords=True,
                               is_test=True)

        # Change old dataset return structure to necessary data
        x = dataset[0]
        # y = dataset[1]
        # ID = dataset[2]
        pos = dataset[3]
        angle = dataset[4]
        coords = dataset[5]

        # Return the dictionary structure
        cur_data = {}
        cur_data["patch"] = np.transpose(x, (0, 2, 3, 1))  # In NHWC
        cur_data["kps"] = coords
        cur_data["xyz"] = pos
        # Make sure that angle is a Nx1 vector
        cur_data["angle"] = np.reshape(angle, (-1, 1))

        return cur_data
예제 #2
0
    def __init__(self, config, rng):

        # Placeholder for the data dictionary
        self.data = {}

        # Use the old  data module to load data. Processing data loading for
        # differen tasks
        for task in ["train", "valid", "test"]:
            param = config_to_param(config)
            old_data = old_impl.data_obj(param, task)
            # Some sanity check to make sure that the data module is behaving
            # as intended.
            assert old_data.patch_height == old_data.patch_width
            assert old_data.patch_height == get_patch_size(config)
            assert old_data.num_channel == config.nchannel
            assert old_data.out_dim == config.nchannel
            self.data[task] = {
                "patch": old_data.x,
                "xyz": old_data.pos,
                "angle": old_data.angle.reshape(-1, 1),
                "ID": old_data.ID,
            }

        # data ordering of this class
        self.data_order = "NCHW"

        # Save the hash, for the pairs folder
        self.hash = old_data.hash
예제 #3
0
    def __init__(self, sess, config, dataset):
        # Save pointer to the tensorflow session
        self.sess = sess
        # Save pointer to config
        self.config = config
        # Save pointer to the data module
        self.dataset = dataset
        # # Summaries to compute for this network
        # self.summary = []

        # Normalizer for the input data (they are raw images)
        # Currently normalized to be between -1 and 1
        self.mean = {}
        self.std = {}
        for _module in ["kp", "ori", "desc"]:
            self.mean[_module] = 128.0
            self.std[_module] = 128.0

        if self.config.use_old_mean_std:
            self.mean[
                "kp"] = 116.4368117568544249706974369473755359649658203125
            self.std["kp"] = 88.083076379771597430590190924704074859619140625
            self.mean[
                "ori"] = 116.4368117568544249706974369473755359649658203125
            self.std["ori"] = 88.083076379771597430590190924704074859619140625
            self.mean["desc"] = 110.75389862060546875
            self.std["desc"] = 61.53688812255859375

        # Account for the keypoint scale change while augmenting rotations
        self.scale_aug = float(get_patch_size(self.config)) / \
            float(get_patch_size_no_aug(self.config))

        # Allocate placeholders
        with tf.variable_scope("placeholders"):
            self._build_placeholders()
        # Build the network
        with tf.variable_scope("network"):
            self._build_network()
        # Build loss
        with tf.variable_scope("loss"):
            self._build_loss()
        # Build the optimization op
        with tf.variable_scope("optimization"):
            self._build_optim()

        # Build the legacy component. This is only used for accessing old
        # framework weights. You can safely ignore this part
        build_legacy(self)

        # Show all variables in the network
        show_all_variables()

        # Add all variables into histogram summary
        for _module in ["kp", "ori", "desc"]:
            for _param in self.params[_module]:
                tf.summary.histogram(_param.name, _param)

        # Collect all summary (Lazy...)
        self.summary = tf.summary.merge_all()
예제 #4
0
def config_to_param(config):
    """The function that takes care of the transfer to the new framework"""

    param = paramStruct()

    # Param Group "dataset"
    param.dataset.nTestPercent = int(20)
    param.dataset.dataType = "ECCV"
    param.dataset.nValidPercent = int(20)
    param.dataset.fMinKpSize = float(2.0)
    param.dataset.nPosPerImg = int(-1)
    # Note that we are passing a list. This module actually supports
    # concatenating datsets.
    param.dataset.trainSetList = ["ECCV/" + config.data_name]
    param.dataset.nNegPerImg = int(1000)
    param.dataset.nTrainPercent = int(60)

    # Param Group "patch"
    if config.old_data_compat:
        param.patch.nPatchSize = int(get_patch_size(config))
    else:
        param.patch.nPatchSize = int(get_patch_size_no_aug(config))
        param.patch.nPatchSizeAug = int(get_patch_size(config))
    param.patch.noscale = False
    param.patch.fNegOverlapTh = float(0.1)
    param.patch.sNegMineMethod = "use_all_SIFT_points"
    param.patch.fRatioScale = float(get_ratio_scale(config))
    param.patch.fPerturbInfo = np.array([0.2, 0.2, 0.0]).astype(float)
    if config.old_data_compat:
        param.patch.nMaxRandomNegMineIter = int(500)
    else:
        param.patch.nMaxRandomNegMineIter = int(100)
    param.patch.fMaxScale = 1.0
    param.patch.bPerturb = 1.0

    # Param Group "model"
    param.model.nDescInputSize = int(config.desc_input_size)

    # override folders from config
    setattr(param, "data_dir", config.data_dir)
    setattr(param, "temp_dir", config.temp_dir)
    setattr(param, "scratch_dir", config.scratch_dir)

    return param
예제 #5
0
    def _compute_kp(self):
        """Compute Keypoints.

        LATER: Clean up code

        """

        total_time = 0.0

        # Read image
        image_color, image_gray, load_prep_time = self.dataset.load_image()

        # check size
        image_height = image_gray.shape[0]
        image_width = image_gray.shape[1]

        # Multiscale Testing
        scl_intv = self.config.test_scl_intv
        # min_scale_log2 = 1  # min scale = 2
        # max_scale_log2 = 4  # max scale = 16
        min_scale_log2 = self.config.test_min_scale_log2
        max_scale_log2 = self.config.test_max_scale_log2
        # Test starting with double scale if small image
        min_hw = np.min(image_gray.shape[:2])
        # for the case of testing on same scale, do not double scale
        if min_hw <= 1600 and min_scale_log2 != max_scale_log2:
            print("INFO: Testing double scale")
            min_scale_log2 -= 1
        # range of scales to check
        num_division = (max_scale_log2 - min_scale_log2) * (scl_intv + 1) + 1
        scales_to_test = 2**np.linspace(min_scale_log2, max_scale_log2,
                                        num_division)

        # convert scale to image resizes
        resize_to_test = ((float(self.config.kp_input_size - 1) / 2.0) /
                          (get_ratio_scale(self.config) * scales_to_test))

        # check if resize is valid
        min_hw_after_resize = resize_to_test * np.min(image_gray.shape[:2])
        is_resize_valid = min_hw_after_resize > self.config.kp_filter_size + 1

        # if there are invalid scales and resizes
        if not np.prod(is_resize_valid):
            # find first invalid
            # first_invalid = np.where(True - is_resize_valid)[0][0]
            first_invalid = np.where(~is_resize_valid)[0][0]

            # remove scales from testing
            scales_to_test = scales_to_test[:first_invalid]
            resize_to_test = resize_to_test[:first_invalid]

        print('resize to test is {}'.format(resize_to_test))
        print('scales to test is {}'.format(scales_to_test))

        # Run for each scale
        test_res_list = []
        for resize in resize_to_test:

            # resize according to how we extracted patches when training
            new_height = np.cast['int'](np.round(image_height * resize))
            new_width = np.cast['int'](np.round(image_width * resize))
            start_time = time.clock()
            image = cv2.resize(image_gray, (new_width, new_height))
            end_time = time.clock()
            resize_time = (end_time - start_time) * 1000.0
            print("Time taken to resize image is {}ms".format(resize_time))
            total_time += resize_time

            # run test
            # LATER: Compatibility with the previous implementations
            start_time = time.clock()

            # Run the network to get the scoremap (the valid region only)
            scoremap = None
            if self.config.test_kp_use_tensorflow:
                scoremap = self.network.test(
                    self.config.subtask,
                    image.reshape(1, new_height, new_width, 1)).squeeze()
            else:
                # OpenCV Version
                raise NotImplementedError("TODO: Implement OpenCV Version")

            end_time = time.clock()
            compute_time = (end_time - start_time) * 1000.0
            print("Time taken for image size {}"
                  " is {} milliseconds".format(image.shape, compute_time))

            total_time += compute_time

            # pad invalid regions and add to list
            start_time = time.clock()
            test_res_list.append(
                np.pad(scoremap,
                       int((self.config.kp_filter_size - 1) / 2),
                       mode='constant',
                       constant_values=-np.inf))
            end_time = time.clock()
            pad_time = (end_time - start_time) * 1000.0
            print("Time taken for padding and stacking is {} ms".format(
                pad_time))
            total_time += pad_time

        # ------------------------------------------------------------------------
        # Non-max suppresion and draw.

        # The nonmax suppression implemented here is very very slow. Consider
        # this as just a proof of concept implementation as of now.

        # Standard nearby : nonmax will check approximately the same area as
        # descriptor support region.
        nearby = int(
            np.round((0.5 * (self.config.kp_input_size - 1.0) *
                      float(self.config.desc_input_size) /
                      float(get_patch_size(self.config)))))
        fNearbyRatio = self.config.test_nearby_ratio
        # Multiply by quarter to compensate
        fNearbyRatio *= 0.25
        nearby = int(np.round(nearby * fNearbyRatio))
        nearby = max(nearby, 1)

        nms_intv = self.config.test_nms_intv
        edge_th = self.config.test_edge_th

        print("Performing NMS")
        start_time = time.clock()
        res_list = test_res_list
        # check whether the return result for socre is right
        #        print(res_list[0][400:500,300:400])
        XYZS = get_XYZS_from_res_list(
            res_list,
            resize_to_test,
            scales_to_test,
            nearby,
            edge_th,
            scl_intv,
            nms_intv,
            do_interpolation=True,
        )
        end_time = time.clock()
        XYZS = XYZS[:self.config.test_num_keypoint]

        # For debugging
        # TODO: Remove below
        draw_XYZS_to_img(XYZS, image_color, self.config.test_out_file + '.jpg')

        nms_time = (end_time - start_time) * 1000.0
        print("NMS time is {} ms".format(nms_time))
        total_time += nms_time
        print("Total time for detection is {} ms".format(total_time))
        # if bPrintTime:
        #     # Also print to a file by appending
        #     with open("../timing-code/timing.txt", "a") as timing_file:
        #         print("------ Keypoint Timing ------\n"
        #               "NMS time is {} ms\n"
        #               "Total time is {} ms\n".format(
        #                   nms_time, total_time
        #               ),
        #               file=timing_file)

        # # resize score to original image size
        # res_list = [cv2.resize(score,
        #                        (image_width, image_height),
        #                        interpolation=cv2.INTER_NEAREST)
        #             for score in test_res_list]
        # # make as np array
        # res_scores = np.asarray(res_list)
        # with h5py.File('test/scores.h5', 'w') as score_file:
        #     score_file['score'] = res_scores

        # ------------------------------------------------------------------------
        # Save as keypoint file to be used by the oxford thing
        print("Turning into kp_list")
        kp_list = XYZS2kpList(XYZS)  # note that this is already sorted

        # ------------------------------------------------------------------------
        # LATER: take care of the orientations somehow...
        # # Also compute angles with the SIFT method, since the keypoint
        # # component alone has no orientations.
        # print("Recomputing Orientations")
        # new_kp_list, _ = recomputeOrientation(image_gray, kp_list,
        #                                       bSingleOrientation=True)

        print("Saving to txt")
        saveKpListToTxt(kp_list, None, self.config.test_out_file)
예제 #6
0
    def compute_kp(self, image_gray):
        """Compute Keypoints.

        LATER: Clean up code

        """
        total_time = 0.0

        # check size
        image_height = image_gray.shape[0]
        image_width = image_gray.shape[1]

        # Multiscale Testing
        scl_intv = self.config.test_scl_intv
        # min_scale_log2 = 1  # min scale = 2
        # max_scale_log2 = 4  # max scale = 16
        min_scale_log2 = self.config.test_min_scale_log2
        max_scale_log2 = self.config.test_max_scale_log2
        # Test starting with double scale if small image
        min_hw = np.min(image_gray.shape[:2])
        # for the case of testing on same scale, do not double scale
        if min_hw <= 1600 and min_scale_log2!=max_scale_log2:
            print("INFO: Testing double scale")
            min_scale_log2 -= 1
        # range of scales to check
        num_division = (max_scale_log2 - min_scale_log2) * (scl_intv + 1) + 1
        scales_to_test = 2**np.linspace(min_scale_log2, max_scale_log2,
                                        num_division)

        # convert scale to image resizes
        resize_to_test = ((float(self.config.kp_input_size - 1) / 2.0) /
                          (get_ratio_scale(self.config) * scales_to_test))

        # check if resize is valid
        min_hw_after_resize = resize_to_test * np.min(image_gray.shape[:2])
        is_resize_valid = min_hw_after_resize > self.config.kp_filter_size + 1

        # if there are invalid scales and resizes
        if not np.prod(is_resize_valid):
            # find first invalid
            first_invalid = np.where(~is_resize_valid)[0][0]

            # remove scales from testing
            scales_to_test = scales_to_test[:first_invalid]
            resize_to_test = resize_to_test[:first_invalid]

        print('resize to test is {}'.format(resize_to_test))
        print('scales to test is {}'.format(scales_to_test))

        # Run for each scale
        test_res_list = []
        for resize in resize_to_test:

            # resize according to how we extracted patches when training
            new_height = np.cast['int'](np.round(image_height * resize))
            new_width = np.cast['int'](np.round(image_width * resize))
            start_time = time.clock()
            image = cv2.resize(image_gray, (new_width, new_height))
            end_time = time.clock()
            resize_time = (end_time - start_time) * 1000.0
            print("Time taken to resize image is {}ms".format(
                resize_time
            ))
            total_time += resize_time

            # run test
            # LATER: Compatibility with the previous implementations
            start_time = time.clock()

            # Run the network to get the scoremap (the valid region only)
            scoremap = None
            if self.config.test_kp_use_tensorflow:
                scoremap = self.graph_kp.test_squeeze(image.reshape(1, new_height, new_width, 1))
            else:
                # OpenCV Version
                raise NotImplementedError(
                    "TODO: Implement OpenCV Version")

            end_time = time.clock()
            compute_time = (end_time - start_time) * 1000.0
            print("Time taken for image size {}"
                  " is {} milliseconds".format(
                      image.shape, compute_time))

            total_time += compute_time

            # pad invalid regions and add to list
            start_time = time.clock()
            test_res_list.append(
                np.pad(scoremap, int((self.config.kp_filter_size - 1) / 2),
                       mode='constant',
                       constant_values=-np.inf)
            )
            end_time = time.clock()
            pad_time = (end_time - start_time) * 1000.0
            print("Time taken for padding and stacking is {} ms".format(
                pad_time
            ))
            total_time += pad_time

        # ------------------------------------------------------------------------
        # Non-max suppresion and draw.

        # The nonmax suppression implemented here is very very slow. Consider
        # this as just a proof of concept implementation as of now.

        # Standard nearby : nonmax will check approximately the same area as
        # descriptor support region.
        nearby = int(np.round(
            (0.5 * (self.config.kp_input_size - 1.0) *
             float(self.config.desc_input_size) /
             float(get_patch_size(self.config)))
        ))
        fNearbyRatio = self.config.test_nearby_ratio
        # Multiply by quarter to compensate
        fNearbyRatio *= 0.25
        nearby = int(np.round(nearby * fNearbyRatio))
        nearby = max(nearby, 1)

        nms_intv = self.config.test_nms_intv
        edge_th = self.config.test_edge_th

        print("Performing NMS")
        start_time = time.clock()
        res_list = test_res_list
        #print(res_list[0][400:500,300:400])
        # check whether the return result for socre is right
        XYZS = get_XYZS_from_res_list(
            res_list, resize_to_test, scales_to_test, nearby, edge_th,
            scl_intv, nms_intv, do_interpolation=True,
        )
        end_time = time.clock()
        XYZS = XYZS[:self.config.test_num_keypoint]

        nms_time = (end_time - start_time) * 1000.0
        print("NMS time is {} ms".format(nms_time))
        total_time += nms_time
        print("Total time for detection is {} ms".format(total_time))
        # ------------------------------------------------------------------------
        # Save as keypoint file to be used by the oxford thing
        print("Turning into kp_list")
        kp_list = XYZS2kpList(XYZS)  # note that this is already sorted
        return kp_list
예제 #7
0
    def _build_network(self):
        """Define all the architecture here. Use the modules if necessary."""

        # Import modules according to the configurations
        self.modules = {}
        for _key in ["kp", "ori", "desc"]:
            self.modules[_key] = importlib.import_module("modules.{}".format(
                getattr(self.config, "module_" + _key)))

        # prepare dictionary for the output and parameters of each module
        self.outputs = {}
        self.params = {}
        self.allparams = {}
        for _key in self.modules:
            self.outputs[_key] = {}
            self.params[_key] = []
            self.allparams[_key] = []
        # create a joint params list
        # NOTE: params is a list, not a dict!
        self.params["joint"] = []
        self.allparams["joint"] = []
        # create outputs placeholder for crop and rot
        self.outputs["resize"] = {}
        self.outputs["crop"] = {}
        self.outputs["rot"] = {}

        # Actual Network definition
        with tf.variable_scope("lift"):
            # Graph construction depends on the subtask
            subtask = self.config.subtask

            # ----------------------------------------
            # Initial resize for the keypoint module
            # Includes rotation when augmentations are used
            #
            if self.config.use_augmented_set:
                rot = self.inputs["aug_rot"]
            else:
                rot = None
            self._build_st(
                module="resize",
                xyz=None,
                cs=rot,
                names=["P1", "P2", "P3", "P4"],
                out_size=self.config.kp_input_size,
                reduce_ratio=float(get_patch_size_no_aug(self.config)) /
                float(get_patch_size(self.config)),
            )

            # ----------------------------------------
            # Keypoint Detector
            #
            # The keypoint detector takes each patch input and outputs (1)
            # "score": the score of the patch, (2) "xy": keypoint position in
            # side the patch. The score output is the soft-maximum (not a
            # softmax) of the scores. The position output from the network
            # should be in the form friendly to the spatial
            # transformer. Outputs are always dictionaries.
            # Rotate ground truth coordinates when augmenting rotations.
            aug_rot = self.inputs["aug_rot"] \
                if self.config.augment_rotations else None
            xyz_gt_scaled = self.transform_xyz(self.inputs["xyz"],
                                               aug_rot,
                                               self.config.batch_size,
                                               self.scale_aug,
                                               transpose=True,
                                               names=["P1", "P2", "P3", "P4"])
            self._build_module(
                module="kp",
                inputs=self.outputs["resize"],
                bypass=xyz_gt_scaled,
                names=["P1", "P2", "P3", "P4"],
                skip=subtask == "ori" or subtask == "desc",
            )

            # For image based test
            # self._build_module(
            #     module="kp",
            #     inputs=self.inputs["img"],
            #     bypass=self.inputs["img"],  # This is a dummy
            #     names=["img"],
            #     skip=subtask != "kp",
            #     reuse=True,
            #     test_only=True,
            # )

            # ----------------------------------------
            # The Crop Spatial Transformer
            # Output: use the same support region as for the descriptor
            #
            xyz_kp_scaled = self.transform_kp(self.outputs["kp"],
                                              aug_rot,
                                              self.config.batch_size,
                                              1 / self.scale_aug,
                                              transpose=False,
                                              names=["P1", "P2", "P3"])
            self._build_st(
                module="crop",
                xyz=xyz_kp_scaled,
                cs=aug_rot,
                names=["P1", "P2", "P3"],
                out_size=self.config.ori_input_size,
                reduce_ratio=float(self.config.desc_input_size) /
                float(get_patch_size(self.config)),
            )

            # ----------------------------------------
            # Orientation Estimator
            #
            # The orientation estimator takes the crop outputs as input and
            # outputs orientations for the spatial transformer to
            # use. Actually, since we output cos and sin, we can simply use the
            # *UNNORMALIZED* version of the two, normalize them, and directly
            # use it for our affine transform. In short it returns "cs": the
            # cos and the sin, but unnormalized. Outputs are always
            # dictionaries.
            # Bypass: just the GT angle
            if self.config.augment_rotations:
                rot = {}
                for name in ["P1", "P2", "P3"]:
                    rot[name] = self.inputs["angle"][name] - \
                        self.inputs["aug_rot"][name]["angle"]
            else:
                rot = self.inputs["angle"]
            self._build_module(
                module="ori",
                inputs=self.outputs["crop"],
                bypass=rot,
                names=["P1", "P2", "P3"],
                skip=subtask == "kp" or subtask == "desc",
            )

            # ----------------------------------------
            # The Rot Spatial Transformer.
            # - No rotation augmentation:
            # Operates over the original patch with the ground truth angle when
            # bypassing. Otherwise, we combine the augmented angle and the
            # output of the orientation module.
            # We do not consider rotation augmentations for the descriptor.
            if self.config.augment_rotations:
                rot = self.chain_cs(self.inputs["aug_rot"],
                                    self.outputs["ori"],
                                    names=["P1", "P2", "P3"])
                # rot = self.outputs["ori"]
                # xyz_desc_scaled = self.transform_kp(
                #     self.outputs["kp"],
                #     rot,
                #     self.config.batch_size,
                #     1 / self.scale_aug,
                #     transpose=False,
                #     names=["P1", "P2", "P3"])
            elif self.config.use_augmented_set:
                rot = self.outputs["ori"]
                # xyz_desc_scaled = self.transform_kp(
                #     self.outputs["kp"],
                #     rot,
                #     self.config.batch_size,
                #     1 / self.scale_aug,
                #     transpose=False,
                #     names=["P1", "P2", "P3"])
            else:
                rot = None
                # xyz_desc_scaled = self.inputs["xyz"]
            self._build_st(
                module="rot",
                xyz=xyz_kp_scaled,
                cs=rot,
                names=["P1", "P2", "P3"],
                out_size=self.config.desc_input_size,
                reduce_ratio=float(self.config.desc_input_size) /
                float(get_patch_size(self.config)),
            )

            # ----------------------------------------
            # Feature Descriptor
            #
            # The descriptor simply computes the descriptors, given the patch.
            self._build_module(
                module="desc",
                inputs=self.outputs["rot"],
                bypass=self.outputs["rot"],
                names=["P1", "P2", "P3"],
                skip=False,
            )
예제 #8
0
    def _build_placeholders(self):
        """Builds Tensorflow Placeholders"""

        # The inputs placeholder dictionary
        self.inputs = {}
        # multiple types
        # LATER: label might not be necessary
        types = ["patch", "xyz", "angle"]
        if self.config.use_augmented_set:
            types += ["aug_rot"]
        for _type in types:
            self.inputs[_type] = {}

        # We *ARE* going to specify the input size, since the spatial
        # transformer implementation *REQUIRES* us to do so. Note that this
        # has to be dealt with in the validate loop.

        # batch_size = self.config.batch_size

        # Use variable batch size
        batch_size = None

        # We also read nchannel from the configuration. Make sure that the data
        # module is behaving accordingly
        nchannel = self.config.nchannel

        # Get the input patch size from config
        patch_size = float(get_patch_size(self.config))

        # Compute the r_base (i.e. overlap radius when computing the keypoint
        # overlaps.
        self.r_base = (float(self.config.desc_input_size) /
                       float(get_patch_size_no_aug(self.config)))

        # P1, P2, P3, P4 in the paper. P1, P2, P3 are keypoints, P1, P2
        # correspond, P1, and P3 don't correspond, P4 is a non-keypoint patch.
        for _name in ["P1", "P2", "P3", "P4"]:
            self.inputs["patch"][_name] = tf.placeholder(
                tf.float32,
                shape=[batch_size, patch_size, patch_size, nchannel],
                name=_name,
            )
            self.inputs["xyz"][_name] = tf.placeholder(
                tf.float32,
                shape=[
                    batch_size,
                    3,
                ],
                name=_name,
            )
            self.inputs["angle"][_name] = tf.placeholder(
                tf.float32,
                shape=[
                    batch_size,
                    1,
                ],
                name=_name,
            )
            if self.config.use_augmented_set:
                self.inputs["aug_rot"][_name] = {
                    "cs":
                    tf.placeholder(
                        tf.float32,
                        shape=[
                            batch_size,
                            2,
                        ],
                        name=_name,
                    ),
                    "angle":
                    tf.placeholder(
                        tf.float32,
                        shape=[
                            batch_size,
                            1,
                        ],
                        name=_name,
                    )
                }
            # Add to summary to view them
            image_summary_nhwc(
                "input/" + _name,
                self.inputs["patch"][_name],
            )

        # For Image based test
        self.inputs["img"] = {
            "img":
            tf.placeholder(
                tf.float32,
                shape=[None, None, None, nchannel],
                name="img",
            )
        }

        # For runmode in dropout and batch_norm
        self.is_training = tf.placeholder(
            tf.bool,
            shape=(),
            name="is_training",
        )
예제 #9
0
    def __init__(self, sess, config, dataset, force_mean_std=None):
        # Save pointer to the tensorflow session
        self.sess = sess
        # Save pointer to config
        self.config = config
        # Save pointer to the data module
        self.dataset = dataset
        # # Summaries to compute for this network
        # self.summary = []

        # Normalizer for the input data (they are raw images)
        # Currently normalized to be between -1 and 1
        self.mean = {}
        self.std = {}
        # Load values if they already exist
        if force_mean_std is not None:
            self.mean = force_mean_std["mean"]
            self.std = force_mean_std["std"]
        elif self.config.mean_std_type == "hardcoded":
            print("-- Using default values for mean/std")
            for _module in ["kp", "ori", "desc"]:
                self.mean[_module] = 128.0
                self.std[_module] = 128.0
        elif self.config.mean_std_type == "old":
            print("-- Using old (piccadilly) values for mean/std")
            self.mean[
                "kp"] = 116.4368117568544249706974369473755359649658203125
            self.std["kp"] = 88.083076379771597430590190924704074859619140625
            self.mean[
                "ori"] = 116.4368117568544249706974369473755359649658203125
            self.std["ori"] = 88.083076379771597430590190924704074859619140625
            self.mean["desc"] = 110.75389862060546875
            self.std["desc"] = 61.53688812255859375
        elif self.config.mean_std_type == "dataset":
            t = time()
            print("-- Recomputing dataset mean/std...")
            # Account for augmented sets
            if self.config.use_augmented_set:
                b = int(
                    (get_patch_size(config) - get_patch_size_no_aug(config)) /
                    2)
            else:
                b = 0

            if b > 0:
                _d = self.dataset.data["train"]["patch"][:, :, b:-b, b:-b]
            else:
                _d = self.dataset.data["train"]["patch"][:, :, :, :]

            # Do this incrementally to avoid memory problems
            jump = 1000
            data_mean = np.zeros(_d.shape[0])
            data_std = np.zeros(_d.shape[0])
            for i in tqdm(range(0, _d.shape[0], jump)):
                data_mean[i:i + jump] = _d[i:i + jump].mean()
                data_std[i:i + jump] = _d[i:i + jump].std()
            data_mean = data_mean.mean()
            data_std = data_std.mean()
            print('-- Dataset mean: {0:.03f}, std = {1:.03f}'.format(
                data_mean, data_std))

            for _module in ["kp", "ori", "desc"]:
                self.mean[_module] = data_mean
                self.std[_module] = data_std
            print("-- Done in {0:.02f} sec".format(time() - t))
        elif self.config.mean_std_type == "batch":
            t = time()
            print("-- Will recompute mean/std per batch...")
        elif self.config.mean_std_type == "sample":
            t = time()
            print("-- Will recompute mean/std per sample...")
        elif self.config.mean_std_type == "sequence":
            t = time()
            print("-- Will recompute mean/std per sequence...")
            raise RuntimeError("TODO")
        else:
            raise RuntimeError("Unknown mean-std strategy")

        # Account for the keypoint scale change while augmenting rotations
        self.scale_aug = float(get_patch_size(self.config)) / \
            float(get_patch_size_no_aug(self.config))

        # Allocate placeholders
        with tf.variable_scope("placeholders"):
            self._build_placeholders()
        # Build the network
        with tf.variable_scope("network"):
            self._build_network()
        # Build loss
        with tf.variable_scope("loss"):
            self._build_loss()
        # Build the optimization op
        with tf.variable_scope("optimization"):
            self._build_optim()

        # Build the legacy component. This is only used for accessing old
        # framework weights. You can safely ignore this part
        # build_legacy(self)

        # Show all variables in the network
        show_all_variables()

        # Add all variables into histogram summary
        for _module in ["kp", "ori", "desc"]:
            for _param in self.params[_module]:
                tf.summary.histogram(_param.name, _param)

        # Collect all summary (Lazy...)
        self.summary = tf.summary.merge_all()
        return True  # Save successful
    except:
        return False  # Save failed


#######################  configuration  #############################
print 'Reading configuration...'

config = read_config(args.config)

cfg_name = args.model
out_name = args.output

CNN_INPUT_DIR = config['training_on_patches']['input_dir']
# input image dimensions
PATCH_SIZE_W, PATCH_SIZE_D = get_patch_size(CNN_INPUT_DIR)
img_rows, img_cols = PATCH_SIZE_W, PATCH_SIZE_D

batch_size = config['training_on_patches']['batch_size']
nb_epoch = config['training_on_patches']['nb_epoch']
nb_classes = config['training_on_patches']['nb_classes']

######################  CNN commpilation  ###########################
print 'Compiling CNN model...'
with tf.device('/gpu:' + args.gpu):
    model = load_model(cfg_name)

    sgd = SGD(lr=0.002, decay=1e-5, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd,
                  loss={
                      'em_trk_none_netout': 'categorical_crossentropy',