예제 #1
0
    def __init__(self, sess, config, dataset):
        # Save pointer to the tensorflow session
        self.sess = sess
        # Save pointer to config
        self.config = config
        # Save pointer to the data module
        self.dataset = dataset
        # # Summaries to compute for this network
        # self.summary = []

        # Normalizer for the input data (they are raw images)
        # Currently normalized to be between -1 and 1
        self.mean = {}
        self.std = {}
        for _module in ["kp", "ori", "desc"]:
            self.mean[_module] = 128.0
            self.std[_module] = 128.0

        if self.config.use_old_mean_std:
            self.mean[
                "kp"] = 116.4368117568544249706974369473755359649658203125
            self.std["kp"] = 88.083076379771597430590190924704074859619140625
            self.mean[
                "ori"] = 116.4368117568544249706974369473755359649658203125
            self.std["ori"] = 88.083076379771597430590190924704074859619140625
            self.mean["desc"] = 110.75389862060546875
            self.std["desc"] = 61.53688812255859375

        # Account for the keypoint scale change while augmenting rotations
        self.scale_aug = float(get_patch_size(self.config)) / \
            float(get_patch_size_no_aug(self.config))

        # Allocate placeholders
        with tf.variable_scope("placeholders"):
            self._build_placeholders()
        # Build the network
        with tf.variable_scope("network"):
            self._build_network()
        # Build loss
        with tf.variable_scope("loss"):
            self._build_loss()
        # Build the optimization op
        with tf.variable_scope("optimization"):
            self._build_optim()

        # Build the legacy component. This is only used for accessing old
        # framework weights. You can safely ignore this part
        build_legacy(self)

        # Show all variables in the network
        show_all_variables()

        # Add all variables into histogram summary
        for _module in ["kp", "ori", "desc"]:
            for _param in self.params[_module]:
                tf.summary.histogram(_param.name, _param)

        # Collect all summary (Lazy...)
        self.summary = tf.summary.merge_all()
예제 #2
0
def config_to_param(config):
    """The function that takes care of the transfer to the new framework"""

    param = paramStruct()

    # Param Group "dataset"
    param.dataset.nTestPercent = int(20)
    param.dataset.dataType = "ECCV"
    param.dataset.nValidPercent = int(20)
    param.dataset.fMinKpSize = float(2.0)
    param.dataset.nPosPerImg = int(-1)
    # Note that we are passing a list. This module actually supports
    # concatenating datsets.
    param.dataset.trainSetList = ["ECCV/" + config.data_name]
    param.dataset.nNegPerImg = int(1000)
    param.dataset.nTrainPercent = int(60)

    # Param Group "patch"
    if config.old_data_compat:
        param.patch.nPatchSize = int(get_patch_size(config))
    else:
        param.patch.nPatchSize = int(get_patch_size_no_aug(config))
        param.patch.nPatchSizeAug = int(get_patch_size(config))
    param.patch.noscale = False
    param.patch.fNegOverlapTh = float(0.1)
    param.patch.sNegMineMethod = "use_all_SIFT_points"
    param.patch.fRatioScale = float(get_ratio_scale(config))
    param.patch.fPerturbInfo = np.array([0.2, 0.2, 0.0]).astype(float)
    if config.old_data_compat:
        param.patch.nMaxRandomNegMineIter = int(500)
    else:
        param.patch.nMaxRandomNegMineIter = int(100)
    param.patch.fMaxScale = 1.0
    param.patch.bPerturb = 1.0

    # Param Group "model"
    param.model.nDescInputSize = int(config.desc_input_size)

    # override folders from config
    setattr(param, "data_dir", config.data_dir)
    setattr(param, "temp_dir", config.temp_dir)
    setattr(param, "scratch_dir", config.scratch_dir)

    return param
예제 #3
0
    def _build_network(self):
        """Define all the architecture here. Use the modules if necessary."""

        # Import modules according to the configurations
        self.modules = {}
        for _key in ["kp", "ori", "desc"]:
            self.modules[_key] = importlib.import_module("modules.{}".format(
                getattr(self.config, "module_" + _key)))

        # prepare dictionary for the output and parameters of each module
        self.outputs = {}
        self.params = {}
        self.allparams = {}
        for _key in self.modules:
            self.outputs[_key] = {}
            self.params[_key] = []
            self.allparams[_key] = []
        # create a joint params list
        # NOTE: params is a list, not a dict!
        self.params["joint"] = []
        self.allparams["joint"] = []
        # create outputs placeholder for crop and rot
        self.outputs["resize"] = {}
        self.outputs["crop"] = {}
        self.outputs["rot"] = {}

        # Actual Network definition
        with tf.variable_scope("lift"):
            # Graph construction depends on the subtask
            subtask = self.config.subtask

            # ----------------------------------------
            # Initial resize for the keypoint module
            # Includes rotation when augmentations are used
            #
            if self.config.use_augmented_set:
                rot = self.inputs["aug_rot"]
            else:
                rot = None
            self._build_st(
                module="resize",
                xyz=None,
                cs=rot,
                names=["P1", "P2", "P3", "P4"],
                out_size=self.config.kp_input_size,
                reduce_ratio=float(get_patch_size_no_aug(self.config)) /
                float(get_patch_size(self.config)),
            )

            # ----------------------------------------
            # Keypoint Detector
            #
            # The keypoint detector takes each patch input and outputs (1)
            # "score": the score of the patch, (2) "xy": keypoint position in
            # side the patch. The score output is the soft-maximum (not a
            # softmax) of the scores. The position output from the network
            # should be in the form friendly to the spatial
            # transformer. Outputs are always dictionaries.
            # Rotate ground truth coordinates when augmenting rotations.
            aug_rot = self.inputs["aug_rot"] \
                if self.config.augment_rotations else None
            xyz_gt_scaled = self.transform_xyz(self.inputs["xyz"],
                                               aug_rot,
                                               self.config.batch_size,
                                               self.scale_aug,
                                               transpose=True,
                                               names=["P1", "P2", "P3", "P4"])
            self._build_module(
                module="kp",
                inputs=self.outputs["resize"],
                bypass=xyz_gt_scaled,
                names=["P1", "P2", "P3", "P4"],
                skip=subtask == "ori" or subtask == "desc",
            )

            # For image based test
            # self._build_module(
            #     module="kp",
            #     inputs=self.inputs["img"],
            #     bypass=self.inputs["img"],  # This is a dummy
            #     names=["img"],
            #     skip=subtask != "kp",
            #     reuse=True,
            #     test_only=True,
            # )

            # ----------------------------------------
            # The Crop Spatial Transformer
            # Output: use the same support region as for the descriptor
            #
            xyz_kp_scaled = self.transform_kp(self.outputs["kp"],
                                              aug_rot,
                                              self.config.batch_size,
                                              1 / self.scale_aug,
                                              transpose=False,
                                              names=["P1", "P2", "P3"])
            self._build_st(
                module="crop",
                xyz=xyz_kp_scaled,
                cs=aug_rot,
                names=["P1", "P2", "P3"],
                out_size=self.config.ori_input_size,
                reduce_ratio=float(self.config.desc_input_size) /
                float(get_patch_size(self.config)),
            )

            # ----------------------------------------
            # Orientation Estimator
            #
            # The orientation estimator takes the crop outputs as input and
            # outputs orientations for the spatial transformer to
            # use. Actually, since we output cos and sin, we can simply use the
            # *UNNORMALIZED* version of the two, normalize them, and directly
            # use it for our affine transform. In short it returns "cs": the
            # cos and the sin, but unnormalized. Outputs are always
            # dictionaries.
            # Bypass: just the GT angle
            if self.config.augment_rotations:
                rot = {}
                for name in ["P1", "P2", "P3"]:
                    rot[name] = self.inputs["angle"][name] - \
                        self.inputs["aug_rot"][name]["angle"]
            else:
                rot = self.inputs["angle"]
            self._build_module(
                module="ori",
                inputs=self.outputs["crop"],
                bypass=rot,
                names=["P1", "P2", "P3"],
                skip=subtask == "kp" or subtask == "desc",
            )

            # ----------------------------------------
            # The Rot Spatial Transformer.
            # - No rotation augmentation:
            # Operates over the original patch with the ground truth angle when
            # bypassing. Otherwise, we combine the augmented angle and the
            # output of the orientation module.
            # We do not consider rotation augmentations for the descriptor.
            if self.config.augment_rotations:
                rot = self.chain_cs(self.inputs["aug_rot"],
                                    self.outputs["ori"],
                                    names=["P1", "P2", "P3"])
                # rot = self.outputs["ori"]
                # xyz_desc_scaled = self.transform_kp(
                #     self.outputs["kp"],
                #     rot,
                #     self.config.batch_size,
                #     1 / self.scale_aug,
                #     transpose=False,
                #     names=["P1", "P2", "P3"])
            elif self.config.use_augmented_set:
                rot = self.outputs["ori"]
                # xyz_desc_scaled = self.transform_kp(
                #     self.outputs["kp"],
                #     rot,
                #     self.config.batch_size,
                #     1 / self.scale_aug,
                #     transpose=False,
                #     names=["P1", "P2", "P3"])
            else:
                rot = None
                # xyz_desc_scaled = self.inputs["xyz"]
            self._build_st(
                module="rot",
                xyz=xyz_kp_scaled,
                cs=rot,
                names=["P1", "P2", "P3"],
                out_size=self.config.desc_input_size,
                reduce_ratio=float(self.config.desc_input_size) /
                float(get_patch_size(self.config)),
            )

            # ----------------------------------------
            # Feature Descriptor
            #
            # The descriptor simply computes the descriptors, given the patch.
            self._build_module(
                module="desc",
                inputs=self.outputs["rot"],
                bypass=self.outputs["rot"],
                names=["P1", "P2", "P3"],
                skip=False,
            )
예제 #4
0
    def _build_placeholders(self):
        """Builds Tensorflow Placeholders"""

        # The inputs placeholder dictionary
        self.inputs = {}
        # multiple types
        # LATER: label might not be necessary
        types = ["patch", "xyz", "angle"]
        if self.config.use_augmented_set:
            types += ["aug_rot"]
        for _type in types:
            self.inputs[_type] = {}

        # We *ARE* going to specify the input size, since the spatial
        # transformer implementation *REQUIRES* us to do so. Note that this
        # has to be dealt with in the validate loop.

        # batch_size = self.config.batch_size

        # Use variable batch size
        batch_size = None

        # We also read nchannel from the configuration. Make sure that the data
        # module is behaving accordingly
        nchannel = self.config.nchannel

        # Get the input patch size from config
        patch_size = float(get_patch_size(self.config))

        # Compute the r_base (i.e. overlap radius when computing the keypoint
        # overlaps.
        self.r_base = (float(self.config.desc_input_size) /
                       float(get_patch_size_no_aug(self.config)))

        # P1, P2, P3, P4 in the paper. P1, P2, P3 are keypoints, P1, P2
        # correspond, P1, and P3 don't correspond, P4 is a non-keypoint patch.
        for _name in ["P1", "P2", "P3", "P4"]:
            self.inputs["patch"][_name] = tf.placeholder(
                tf.float32,
                shape=[batch_size, patch_size, patch_size, nchannel],
                name=_name,
            )
            self.inputs["xyz"][_name] = tf.placeholder(
                tf.float32,
                shape=[
                    batch_size,
                    3,
                ],
                name=_name,
            )
            self.inputs["angle"][_name] = tf.placeholder(
                tf.float32,
                shape=[
                    batch_size,
                    1,
                ],
                name=_name,
            )
            if self.config.use_augmented_set:
                self.inputs["aug_rot"][_name] = {
                    "cs":
                    tf.placeholder(
                        tf.float32,
                        shape=[
                            batch_size,
                            2,
                        ],
                        name=_name,
                    ),
                    "angle":
                    tf.placeholder(
                        tf.float32,
                        shape=[
                            batch_size,
                            1,
                        ],
                        name=_name,
                    )
                }
            # Add to summary to view them
            image_summary_nhwc(
                "input/" + _name,
                self.inputs["patch"][_name],
            )

        # For Image based test
        self.inputs["img"] = {
            "img":
            tf.placeholder(
                tf.float32,
                shape=[None, None, None, nchannel],
                name="img",
            )
        }

        # For runmode in dropout and batch_norm
        self.is_training = tf.placeholder(
            tf.bool,
            shape=(),
            name="is_training",
        )
예제 #5
0
파일: lift_kp.py 프로젝트: rory0114/tf-lift
def process(inputs, bypass, name, skip, config, is_training):
    """WRITEME.

    LATER: Clean up

    inputs: input to the network
    bypass: gt to by used when trying to bypass
    name: name of the siamese branch
    skip: whether to apply the bypass information

    """

    # let's look at the inputs that get fed into this layer except when we are
    # looking at the whole image
    if name != "img":
        image_summary_nhwc(name + "-input", inputs)

    if skip:
        return bypass_kp(bypass)

    # we always expect a dictionary as return value to be more explicit
    res = {}

    # now abuse cur_in so that we can simply copy paste
    cur_in = inputs

    # lets apply batch normalization on the input - we did not normalize the
    # input range!
    # with tf.variable_scope("input-bn"):
    #     if config.use_input_batch_norm:
    #         cur_in = batch_norm(cur_in, training=is_training)

    with tf.variable_scope("conv-ghh-1"):
        nu = 1
        ns = 4
        nm = 4
        cur_in = conv_2d(cur_in, config.kp_filter_size, nu * ns * nm, 1,
                         "VALID")
        # batch norm on the output of convolutions!
        # if config.use_batch_norm:
        #     cur_in = batch_norm(cur_in, training=is_training)
        cur_in = ghh(cur_in, ns, nm)

    res["scoremap-uncut"] = cur_in

    # ---------------------------------------------------------------------
    # Check how much we need to cut
    kp_input_size = config.kp_input_size
    patch_size = get_patch_size_no_aug(config)
    desc_input_size = config.desc_input_size
    rf = float(kp_input_size) / float(patch_size)

    input_shape = get_tensor_shape(inputs)
    uncut_shape = get_tensor_shape(cur_in)
    req_boundary = np.ceil(rf * np.sqrt(2) * desc_input_size / 2.0).astype(int)
    cur_boundary = (input_shape[2] - uncut_shape[2]) // 2
    crop_size = req_boundary - cur_boundary

    # Stop building the network outputs if we are building for the full image
    if name == "img":
        return res

    # # Debug messages
    # resized_shape = get_tensor_shape(inputs)
    # print(' -- kp_info: output score map shape {}'.format(uncut_shape))
    # print(' -- kp_info: input size after resizing {}'.format(resized_shape[2]))
    # print(' -- kp_info: output score map size {}'.format(uncut_shape[2]))
    # print(' -- kp info: required boundary {}'.format(req_boundary))
    # print(' -- kp info: current boundary {}'.format(cur_boundary))
    # print(' -- kp_info: additional crop size {}'.format(crop_size))
    # print(' -- kp_info: additional crop size {}'.format(crop_size))
    # print(' -- kp_info: final cropped score map size {}'.format(
    #     uncut_shape[2] - 2 * crop_size))
    # print(' -- kp_info: movement ratio will be {}'.format((
    #     float(uncut_shape[2] - 2.0 * crop_size) /
    #     float(kp_input_size - 1))))

    # Crop center
    cur_in = cur_in[:, crop_size:-crop_size, crop_size:-crop_size, :]
    res["scoremap"] = cur_in

    # ---------------------------------------------------------------------
    # Mapping layer to x,y,z
    com_strength = config.kp_com_strength
    # eps = 1e-10
    scoremap_shape = get_tensor_shape(cur_in)

    od = len(scoremap_shape)
    # CoM to get the coordinates
    pos_array_x = tf.range(scoremap_shape[2], dtype=tf.float32)
    pos_array_y = tf.range(scoremap_shape[1], dtype=tf.float32)

    out = cur_in
    max_out = tf.reduce_max(out, axis=list(range(1, od)), keep_dims=True)
    o = tf.exp(com_strength * (out - max_out))  # + eps
    sum_o = tf.reduce_sum(o, axis=list(range(1, od)), keep_dims=True)
    x = tf.reduce_sum(o * tf.reshape(pos_array_x, [1, 1, -1, 1]),
                      axis=list(range(1, od)),
                      keep_dims=True) / sum_o
    y = tf.reduce_sum(o * tf.reshape(pos_array_y, [1, -1, 1, 1]),
                      axis=list(range(1, od)),
                      keep_dims=True) / sum_o

    # Remove the unecessary dimensions (i.e. flatten them)
    x = tf.reshape(x, (-1, ))
    y = tf.reshape(y, (-1, ))

    # --------------
    # Turn x, and y into range -1 to 1, where the patch size is
    # mapped to -1 and 1
    orig_patch_width = (scoremap_shape[2] +
                        np.cast["float32"](req_boundary * 2.0))
    orig_patch_height = (scoremap_shape[1] +
                         np.cast["float32"](req_boundary * 2.0))

    x = ((x + np.cast["float32"](req_boundary)) / np.cast["float32"](
        (orig_patch_width - 1.0) * 0.5) - np.cast["float32"](1.0))
    y = ((y + np.cast["float32"](req_boundary)) / np.cast["float32"](
        (orig_patch_height - 1.0) * 0.5) - np.cast["float32"](1.0))

    # --------------
    # No movement in z direction
    z = tf.zeros_like(x)

    res["xyz"] = tf.stack([x, y, z], axis=1)

    # ---------------------------------------------------------------------
    # Mapping layer to x,y,z
    res["score"] = softmax(
        res["scoremap"],
        axis=list(range(1, od)),
        softmax_strength=config.kp_scoremap_softmax_strength)

    return res
예제 #6
0
    def __init__(self, sess, config, dataset, force_mean_std=None):
        # Save pointer to the tensorflow session
        self.sess = sess
        # Save pointer to config
        self.config = config
        # Save pointer to the data module
        self.dataset = dataset
        # # Summaries to compute for this network
        # self.summary = []

        # Normalizer for the input data (they are raw images)
        # Currently normalized to be between -1 and 1
        self.mean = {}
        self.std = {}
        # Load values if they already exist
        if force_mean_std is not None:
            self.mean = force_mean_std["mean"]
            self.std = force_mean_std["std"]
        elif self.config.mean_std_type == "hardcoded":
            print("-- Using default values for mean/std")
            for _module in ["kp", "ori", "desc"]:
                self.mean[_module] = 128.0
                self.std[_module] = 128.0
        elif self.config.mean_std_type == "old":
            print("-- Using old (piccadilly) values for mean/std")
            self.mean[
                "kp"] = 116.4368117568544249706974369473755359649658203125
            self.std["kp"] = 88.083076379771597430590190924704074859619140625
            self.mean[
                "ori"] = 116.4368117568544249706974369473755359649658203125
            self.std["ori"] = 88.083076379771597430590190924704074859619140625
            self.mean["desc"] = 110.75389862060546875
            self.std["desc"] = 61.53688812255859375
        elif self.config.mean_std_type == "dataset":
            t = time()
            print("-- Recomputing dataset mean/std...")
            # Account for augmented sets
            if self.config.use_augmented_set:
                b = int(
                    (get_patch_size(config) - get_patch_size_no_aug(config)) /
                    2)
            else:
                b = 0

            if b > 0:
                _d = self.dataset.data["train"]["patch"][:, :, b:-b, b:-b]
            else:
                _d = self.dataset.data["train"]["patch"][:, :, :, :]

            # Do this incrementally to avoid memory problems
            jump = 1000
            data_mean = np.zeros(_d.shape[0])
            data_std = np.zeros(_d.shape[0])
            for i in tqdm(range(0, _d.shape[0], jump)):
                data_mean[i:i + jump] = _d[i:i + jump].mean()
                data_std[i:i + jump] = _d[i:i + jump].std()
            data_mean = data_mean.mean()
            data_std = data_std.mean()
            print('-- Dataset mean: {0:.03f}, std = {1:.03f}'.format(
                data_mean, data_std))

            for _module in ["kp", "ori", "desc"]:
                self.mean[_module] = data_mean
                self.std[_module] = data_std
            print("-- Done in {0:.02f} sec".format(time() - t))
        elif self.config.mean_std_type == "batch":
            t = time()
            print("-- Will recompute mean/std per batch...")
        elif self.config.mean_std_type == "sample":
            t = time()
            print("-- Will recompute mean/std per sample...")
        elif self.config.mean_std_type == "sequence":
            t = time()
            print("-- Will recompute mean/std per sequence...")
            raise RuntimeError("TODO")
        else:
            raise RuntimeError("Unknown mean-std strategy")

        # Account for the keypoint scale change while augmenting rotations
        self.scale_aug = float(get_patch_size(self.config)) / \
            float(get_patch_size_no_aug(self.config))

        # Allocate placeholders
        with tf.variable_scope("placeholders"):
            self._build_placeholders()
        # Build the network
        with tf.variable_scope("network"):
            self._build_network()
        # Build loss
        with tf.variable_scope("loss"):
            self._build_loss()
        # Build the optimization op
        with tf.variable_scope("optimization"):
            self._build_optim()

        # Build the legacy component. This is only used for accessing old
        # framework weights. You can safely ignore this part
        # build_legacy(self)

        # Show all variables in the network
        show_all_variables()

        # Add all variables into histogram summary
        for _module in ["kp", "ori", "desc"]:
            for _param in self.params[_module]:
                tf.summary.histogram(_param.name, _param)

        # Collect all summary (Lazy...)
        self.summary = tf.summary.merge_all()