Пример #1
0
    def __init__(self,
                 filter_count_values=[16, 32, 48, 64],
                 initial_conv=[96, 7, 2],
                 num_classes=40,
                 depth_multiplier=1):
        super().__init__()

        #Store architecture hyper_params for model persistence / loading
        self.hyper_params = zip_params(filter_count_values, initial_conv,
                                       num_classes, depth_multiplier)

        self.conv1 = conv_2d(3,
                             initial_conv[0],
                             initial_conv[1],
                             stride=initial_conv[2])
        self.max_pool1 = nn.MaxPool2d(3, 2)

        self.slim1 = Slim(initial_conv[0], filter_count_values[0])
        self.max_pool2 = nn.MaxPool2d(3, 2)

        self.slim2 = Slim(filter_count_values[0] * 3, filter_count_values[1])
        self.max_pool3 = nn.MaxPool2d(3, 2)

        self.slim3 = Slim(filter_count_values[1] * 3, filter_count_values[2])
        self.max_pool4 = nn.MaxPool2d(3, 2)

        self.slim4 = Slim(filter_count_values[2] * 3, filter_count_values[3])
        self.max_pool5 = nn.MaxPool2d(3, 2)

        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(filter_count_values[3] * 3, num_classes)

        [module.apply(init_weights) for module in [self.conv1, self.fc]]
Пример #2
0
def conv_block(x,
               num_filters=32,
               filter_dims=[5, 5],
               fc_size=1024,
               scope='conv_block',
               batch_size=4):
    s = x.get_shape().as_list()

    with tf.variable_scope(scope):
        # downsample image with stride [3, 3]
        a = conv_2d(x,
                    dims=[7, 7],
                    filters=num_filters,
                    strides=[3, 3],
                    std='xavier',
                    padding='VALID',
                    activation=tf.nn.relu,
                    scope='conv1')

        # no downsampling with stride [1, 1]
        a = conv_2d(a,
                    filter_dims,
                    filters=num_filters,
                    strides=[1, 1],
                    std='xavier',
                    padding='SAME',
                    activation=tf.nn.relu,
                    scope='conv2')

        num_filters = 2 * num_filters
        # downsample image with stride [2, 2]
        a = conv_2d(a,
                    filter_dims,
                    filters=num_filters,
                    strides=[2, 2],
                    std='xavier',
                    padding='VALID',
                    activation=tf.nn.relu,
                    scope='conv3')

        # no downsampling with stride [1, 1]
        a = conv_2d(a,
                    filter_dims,
                    filters=num_filters,
                    strides=[1, 1],
                    std='xavier',
                    padding='SAME',
                    activation=tf.nn.relu,
                    scope='conv4')

        num_filters = 2 * num_filters
        # downsample image with stride [2, 2]
        a = conv_2d(a,
                    filter_dims,
                    filters=num_filters,
                    strides=[2, 2],
                    std='xavier',
                    padding='VALID',
                    activation=tf.nn.relu,
                    scope='conv5')

        # no downsampling with stride [1, 1]
        a = conv_2d(a,
                    filter_dims,
                    filters=num_filters,
                    strides=[1, 1],
                    std='xavier',
                    padding='SAME',
                    activation=tf.nn.relu,
                    scope='conv6')

        # downsample image with stride [2, 2]
        num_filters = 32
        a = conv_2d(a,
                    filter_dims,
                    filters=num_filters,
                    strides=[2, 2],
                    std='xavier',
                    padding='VALID',
                    activation=tf.nn.relu,
                    scope='conv7')

        # Convert to vector with fullyconnected layer
        a = tf.reshape(a, shape=[batch_size, -1])

        a = fully_connected(a,
                            output_units=fc_size,
                            activation=tf.nn.relu,
                            std='xavier',
                            scope='fc')

        print "output vector of conv_block is: {}".format(a)
        return a
Пример #3
0
def process(inputs, bypass, name, skip, config, is_training):
    """WRITEME.

    LATER: Clean up

    inputs: input to the network
    bypass: gt to by used when trying to bypass
    name: name of the siamese branch
    skip: whether to apply the bypass information

    """

    # let's look at the inputs that get fed into this layer except when we are
    # looking at the whole image
    if name != "img":
        image_summary_nhwc(name + "-input", inputs)

    if skip:
        return bypass_kp(bypass)

    # we always expect a dictionary as return value to be more explicit
    res = {}

    # now abuse cur_in so that we can simply copy paste
    cur_in = inputs

    # lets apply batch normalization on the input - we did not normalize the
    # input range!
    # with tf.variable_scope("input-bn"):
    #     if config.use_input_batch_norm:
    #         cur_in = batch_norm(cur_in, training=is_training)

    with tf.variable_scope("conv-ghh-1"):
        nu = 1
        ns = 4
        nm = 4
        cur_in = conv_2d(cur_in, config.kp_filter_size, nu * ns * nm, 1,
                         "VALID")
        # batch norm on the output of convolutions!
        # if config.use_batch_norm:
        #     cur_in = batch_norm(cur_in, training=is_training)
        cur_in = ghh(cur_in, ns, nm)

    res["scoremap-uncut"] = cur_in

    # ---------------------------------------------------------------------
    # Check how much we need to cut
    kp_input_size = config.kp_input_size
    patch_size = get_patch_size_no_aug(config)
    desc_input_size = config.desc_input_size
    rf = float(kp_input_size) / float(patch_size)

    input_shape = get_tensor_shape(inputs)
    uncut_shape = get_tensor_shape(cur_in)
    req_boundary = np.ceil(rf * np.sqrt(2) * desc_input_size / 2.0).astype(int)
    cur_boundary = (input_shape[2] - uncut_shape[2]) // 2
    crop_size = req_boundary - cur_boundary

    # Stop building the network outputs if we are building for the full image
    if name == "img":
        return res

    # # Debug messages
    # resized_shape = get_tensor_shape(inputs)
    # print(' -- kp_info: output score map shape {}'.format(uncut_shape))
    # print(' -- kp_info: input size after resizing {}'.format(resized_shape[2]))
    # print(' -- kp_info: output score map size {}'.format(uncut_shape[2]))
    # print(' -- kp info: required boundary {}'.format(req_boundary))
    # print(' -- kp info: current boundary {}'.format(cur_boundary))
    # print(' -- kp_info: additional crop size {}'.format(crop_size))
    # print(' -- kp_info: additional crop size {}'.format(crop_size))
    # print(' -- kp_info: final cropped score map size {}'.format(
    #     uncut_shape[2] - 2 * crop_size))
    # print(' -- kp_info: movement ratio will be {}'.format((
    #     float(uncut_shape[2] - 2.0 * crop_size) /
    #     float(kp_input_size - 1))))

    # Crop center
    cur_in = cur_in[:, crop_size:-crop_size, crop_size:-crop_size, :]
    res["scoremap"] = cur_in

    # ---------------------------------------------------------------------
    # Mapping layer to x,y,z
    com_strength = config.kp_com_strength
    # eps = 1e-10
    scoremap_shape = get_tensor_shape(cur_in)

    od = len(scoremap_shape)
    # CoM to get the coordinates
    pos_array_x = tf.range(scoremap_shape[2], dtype=tf.float32)
    pos_array_y = tf.range(scoremap_shape[1], dtype=tf.float32)

    out = cur_in
    max_out = tf.reduce_max(out, axis=list(range(1, od)), keep_dims=True)
    o = tf.exp(com_strength * (out - max_out))  # + eps
    sum_o = tf.reduce_sum(o, axis=list(range(1, od)), keep_dims=True)
    x = tf.reduce_sum(o * tf.reshape(pos_array_x, [1, 1, -1, 1]),
                      axis=list(range(1, od)),
                      keep_dims=True) / sum_o
    y = tf.reduce_sum(o * tf.reshape(pos_array_y, [1, -1, 1, 1]),
                      axis=list(range(1, od)),
                      keep_dims=True) / sum_o

    # Remove the unecessary dimensions (i.e. flatten them)
    x = tf.reshape(x, (-1, ))
    y = tf.reshape(y, (-1, ))

    # --------------
    # Turn x, and y into range -1 to 1, where the patch size is
    # mapped to -1 and 1
    orig_patch_width = (scoremap_shape[2] +
                        np.cast["float32"](req_boundary * 2.0))
    orig_patch_height = (scoremap_shape[1] +
                         np.cast["float32"](req_boundary * 2.0))

    x = ((x + np.cast["float32"](req_boundary)) / np.cast["float32"](
        (orig_patch_width - 1.0) * 0.5) - np.cast["float32"](1.0))
    y = ((y + np.cast["float32"](req_boundary)) / np.cast["float32"](
        (orig_patch_height - 1.0) * 0.5) - np.cast["float32"](1.0))

    # --------------
    # No movement in z direction
    z = tf.zeros_like(x)

    res["xyz"] = tf.stack([x, y, z], axis=1)

    # ---------------------------------------------------------------------
    # Mapping layer to x,y,z
    res["score"] = softmax(
        res["scoremap"],
        axis=list(range(1, od)),
        softmax_strength=config.kp_scoremap_softmax_strength)

    return res
Пример #4
0
def process(inputs, bypass, name, skip, config, is_training):
    """WRITEME.

    inputs: input to the network
    bypass: gt to by used when trying to bypass
    name: name of the siamese branch
    skip: whether to apply the bypass information

    """

    # let's look at the inputs that get fed into this layer
    image_summary_nhwc(name + "-input", inputs)

    if skip:
        return bypass_ori(bypass)

    # we always expect a dictionary as return value to be more explicit
    res = {}

    # now abuse cur_in so that we can simply copy paste
    cur_in = inputs

    # lets apply batch normalization on the input - we did not normalize the
    # input range!
    with tf.variable_scope("input-bn"):
        if config.use_input_batch_norm:
            cur_in = batch_norm(cur_in, training=is_training)

    with tf.variable_scope("conv-act-pool-1"):
        cur_in = conv_2d(cur_in, 5, 10, 1, "VALID")
        if config.use_batch_norm:
            cur_in = batch_norm(cur_in, training=is_training)
        cur_in = tf.nn.relu(cur_in)
        cur_in = pool_max(cur_in, 2, 2, "VALID")

    with tf.variable_scope("conv-act-pool-2"):
        cur_in = conv_2d(cur_in, 5, 20, 1, "VALID")
        if config.use_batch_norm:
            cur_in = batch_norm(cur_in, training=is_training)
        cur_in = tf.nn.relu(cur_in)
        cur_in = pool_max(cur_in, 2, 2, "VALID")

    with tf.variable_scope("conv-act-pool-3"):
        cur_in = conv_2d(cur_in, 3, 50, 1, "VALID")
        if config.use_batch_norm:
            cur_in = batch_norm(cur_in, training=is_training)
        cur_in = tf.nn.relu(cur_in)
        cur_in = pool_max(cur_in, 2, 2, "VALID")
    # res["ori_out3"] = cur_in

    with tf.variable_scope("fc-ghh-drop-4"):
        nu = 100
        ns = 4
        nm = 4
        cur_in = fc(cur_in, nu * ns * nm)
        # cur_in = fc(cur_in, nu)
        if config.use_batch_norm:
            cur_in = batch_norm(cur_in, training=is_training)
        if config.ori_activation == 'ghh':
            cur_in = ghh(cur_in, ns, nm)
        elif config.ori_activation == 'tanh':
            cur_in = tf.nn.tanh(cur_in)
        else:
            raise RuntimeError("Bad orientation rectifier")
        # cur_in = tf.nn.relu(cur_in)
        if config.use_dropout_ori:
            raise RuntimeError('Dropout not working properly!')
            cur_in = tf.nn.dropout(
                cur_in,
                keep_prob=1.0 - (0.3 * tf.cast(is_training, tf.float32)),
            )
    # res["ori_out4"] = cur_in

    with tf.variable_scope("fc-ghh-5"):
        nu = 2
        ns = 4
        nm = 4
        cur_in = fc(cur_in, nu * ns * nm)
        # cur_in = fc(cur_in, nu)
        if config.use_batch_norm:
            cur_in = batch_norm(cur_in, training=is_training)
        if config.ori_activation == 'ghh':
            cur_in = ghh(cur_in, ns, nm)
        elif config.ori_activation == 'tanh':
            cur_in = tf.nn.tanh(cur_in)
        else:
            raise RuntimeError("Bad orientation rectifier")
        # cur_in = tf.nn.relu(cur_in)
    # res["ori_out5"] = cur_in

    # with tf.variable_scope("fc-ghh-6"):
    #     cur_in = fc(cur_in, nu)
    # res["ori_out6"] = cur_in

    with tf.variable_scope("cs-norm"):
        eps = 1e-10
        # First, normalize according to the maximum of the two
        cur_in_abs_max = tf.reduce_max(tf.abs(cur_in), axis=1, keep_dims=True)
        cur_in = cur_in / tf.maximum(eps, cur_in_abs_max)
        # Add an epsilon to avoid singularity
        eps = 1e-3
        cur_in += tf.to_float(cur_in >= 0) * eps - tf.to_float(cur_in < 0) * eps
        # Now make norm one without worrying about div by zero
        cur_in_norm = tf.sqrt(tf.reduce_sum(tf.square(
            cur_in), axis=1, keep_dims=True))
        cur_in /= cur_in_norm

    res["cs"] = tf.reshape(cur_in, (-1, 2))

    return res
Пример #5
0
def process(inputs, bypass, name, skip, config, is_training):
    """WRITEME

    inputs: input to the network
    bypass: gt to by used when trying to bypass
    name: name of the siamese branch
    skip: whether to apply the bypass information

    Note
    ----

    We don't have to worry about the reuse flag here, since it is already dealt
    with in the higher level. We just need to inherit it.

    """

    # We never skip descriptor
    assert skip is False

    # we always expect a dictionary as return value to be more explicit
    res = {}

    # let's look at the inputs that get fed into this layer
    image_summary_nhwc(name + "-input", inputs)

    # Import the lift_desc_sub_kernel.h5 to get the kernel file
    # script_dir = os.path.dirname(os.path.realpath(__file__))
    # sub_kernel = loadh5(script_dir + "/lift_desc_sub_kernel.h5")["kernel"]

    # activation
    if config.desc_activ == "tanh":
        activ = tf.nn.tanh
    elif config.desc_activ == "relu":
        activ = tf.nn.relu
    else:
        raise RuntimeError('Unknown activation type')

    # pooling
    def pool(cur_in, desc_pool, ksize):
        if desc_pool == "l2_pool":
            return pool_l2(cur_in, ksize, ksize, "VALID")
        elif desc_pool == "max_pool":
            return tf.nn.max_pool(cur_in, (1, ksize, ksize, 1),
                                  (1, ksize, ksize, 1), "VALID")
        elif desc_pool == "avg_pool":
            return tf.nn.avg_pool(cur_in, (1, ksize, ksize, 1),
                                  (1, ksize, ksize, 1), "VALID")
        else:
            raise RuntimeError('Unknown pooling type')

    # now abuse cur_in so that we can simply copy paste
    cur_in = inputs

    # lets apply batch normalization on the input - we did not normalize the
    # input range!
    with tf.variable_scope("input-bn"):
        if config.use_input_batch_norm:
            cur_in = batch_norm(cur_in, training=is_training)

    with tf.variable_scope("conv-act-pool-norm-1"):
        cur_in = conv_2d(cur_in, 7, 32, 1, "VALID")
        if config.use_batch_norm:
            cur_in = batch_norm(cur_in, training=is_training)
        cur_in = activ(cur_in)
        cur_in = pool(cur_in, config.desc_pool, 2)
        # if config.use_subtractive_norm:
        #     cur_in = norm_spatial_subtractive(cur_in, sub_kernel)

    with tf.variable_scope("conv-act-pool-norm-2"):
        cur_in = conv_2d(cur_in, 6, 64, 1, "VALID")
        if config.use_batch_norm:
            cur_in = batch_norm(cur_in, training=is_training)
        cur_in = activ(cur_in)
        cur_in = pool(cur_in, config.desc_pool, 3)
        # if config.use_subtractive_norm:
        #     cur_in = norm_spatial_subtractive(cur_in, sub_kernel)

    with tf.variable_scope("conv-act-pool-3"):
        cur_in = conv_2d(cur_in, 5, 128, 1, "VALID")
        if config.use_batch_norm:
            cur_in = batch_norm(cur_in, training=is_training)
        cur_in = activ(cur_in)
        cur_in = pool(cur_in, config.desc_pool, 4)

    res["desc"] = tf.reshape(cur_in, (-1, 128))

    return res