示例#1
0
    def resize_shortest_edge(image, image_shape, size):
        shape = tf.cast(image_shape, tf.float32)
        w_greater = tf.greater(image_shape[0], image_shape[1])
        shape = tf.cond(w_greater,
                        lambda: tf.cast([shape[0] / shape[1] * size, size], tf.int32),
                        lambda: tf.cast([size, shape[1] / shape[0] * size], tf.int32))

        return uint8_resize_bicubic(image, shape)
    def build_graph(self, image, label):
        xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE)
                        for x in range(WARP_TARGET_SIZE)], dtype='float32')
        xys = tf.constant(xys, dtype=tf.float32, name='xys')    # p x 3

        image = image / 255.0 - 0.5  # bhw2

        def get_stn(image):
            stn = (LinearWrap(image)
                   .AvgPooling('downsample', 2)
                   .Conv2D('conv0', 20, 5, padding='VALID')
                   .MaxPooling('pool0', 2)
                   .Conv2D('conv1', 20, 5, padding='VALID')
                   .FullyConnected('fc1', 32)
                   .FullyConnected('fct', 6, activation=tf.identity,
                                   kernel_initializer=tf.constant_initializer(),
                                   bias_initializer=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))())
            # output 6 parameters for affine transformation
            stn = tf.reshape(stn, [-1, 2, 3], name='affine')  # bx2x3
            stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1])  # 3 x (bx2)
            coor = tf.reshape(tf.matmul(xys, stn),
                              [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
            coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords')  # b h w 2
            sampled = GridSample('warp', [image, coor], borderMode='constant')
            return sampled

        with argscope([Conv2D, FullyConnected], activation=tf.nn.relu):
            with tf.variable_scope('STN1'):
                sampled1 = get_stn(image)
            with tf.variable_scope('STN2'):
                sampled2 = get_stn(image)

        # For visualization in tensorboard
        with tf.name_scope('visualization'):
            padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
            padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
            img_orig = tf.concat([image[:, :, :, 0], image[:, :, :, 1]], 1)  # b x 2h  x w
            transform1 = tf.concat([padded1[:, :, :, 0], padded1[:, :, :, 1]], 1)
            transform2 = tf.concat([padded2[:, :, :, 0], padded2[:, :, :, 1]], 1)
            stacked = tf.concat([img_orig, transform1, transform2], 2, 'viz')
            tf.summary.image('visualize',
                             tf.expand_dims(stacked, -1), max_outputs=30)

        sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat')
        logits = (LinearWrap(sampled)
                  .FullyConnected('fc1', 256, activation=tf.nn.relu)
                  .FullyConnected('fc2', 128, activation=tf.nn.relu)
                  .FullyConnected('fct', 19, activation=tf.identity)())
        tf.nn.softmax(logits, name='prob')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), tf.float32, name='incorrect_vector')
        summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        summary.add_moving_summary(cost, wd_cost)
        return tf.add_n([wd_cost, cost], name='cost')
def GridSample(inputs, borderMode='repeat'):
    """
    Sample the images using the given coordinates, by bilinear interpolation.
    This was described in the paper:
    `Spatial Transformer Networks <http://arxiv.org/abs/1506.02025>`_.

    This is equivalent to `torch.nn.functional.grid_sample`,
    up to some non-trivial coordinate transformation.

    This implementation returns pixel value at pixel (1, 1) for a floating point coordinate (1.0, 1.0).
    Note that this may not be what you need.

    Args:
        inputs (list): [images, coords]. images has shape NHWC.
            coords has shape (N, H', W', 2), where each pair of the last dimension is a (y, x) real-value
            coordinate.
        borderMode: either "repeat" or "constant" (zero-filled)

    Returns:
        tf.Tensor: a tensor named ``output`` of shape (N, H', W', C).
    """
    image, mapping = inputs
    assert image.get_shape().ndims == 4 and mapping.get_shape().ndims == 4
    input_shape = image.get_shape().as_list()[1:]
    assert None not in input_shape, \
        "Images in GridSample layer must have fully-defined shape"
    assert borderMode in ['repeat', 'constant']

    orig_mapping = mapping
    mapping = tf.maximum(mapping, 0.0)
    lcoor = tf.floor(mapping)
    ucoor = lcoor + 1

    diff = mapping - lcoor
    neg_diff = 1.0 - diff  # bxh2xw2x2

    lcoory, lcoorx = tf.split(lcoor, 2, 3)
    ucoory, ucoorx = tf.split(ucoor, 2, 3)

    lyux = tf.concat([lcoory, ucoorx], 3)
    uylx = tf.concat([ucoory, lcoorx], 3)

    diffy, diffx = tf.split(diff, 2, 3)
    neg_diffy, neg_diffx = tf.split(neg_diff, 2, 3)

    ret = tf.add_n([sample(image, lcoor) * neg_diffx * neg_diffy,
                    sample(image, ucoor) * diffx * diffy,
                    sample(image, lyux) * neg_diffy * diffx,
                    sample(image, uylx) * diffy * neg_diffx], name='sampled')
    if borderMode == 'constant':
        max_coor = tf.constant([input_shape[0] - 1, input_shape[1] - 1], dtype=tf.float32)
        mask = tf.greater_equal(orig_mapping, 0.0)
        mask2 = tf.less_equal(orig_mapping, max_coor)
        mask = tf.logical_and(mask, mask2)  # bxh2xw2x2
        mask = tf.reduce_all(mask, [3])  # bxh2xw2 boolean
        mask = tf.expand_dims(mask, 3)
        ret = ret * tf.cast(mask, tf.float32)
    return tf.identity(ret, name='output')
示例#4
0
    def build_graph(self, image, label):
        """This function should build the model which takes the input variables (defined above)
        and return cost at the end."""

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero
        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        # See tutorial at https://tensorpack.readthedocs.io/tutorial/symbolic.html
        with argscope(Conv2D, kernel_size=3, activation=tf.nn.relu, filters=32):
            # LinearWrap is just a syntax sugar.
            # See tutorial at https://tensorpack.readthedocs.io/tutorial/symbolic.html
            logits = (LinearWrap(image)
                      .Conv2D('conv0')
                      .MaxPooling('pool0', 2)
                      .Conv2D('conv1')
                      .Conv2D('conv2')
                      .MaxPooling('pool1', 2)
                      .Conv2D('conv3')
                      .FullyConnected('fc0', 512, activation=tf.nn.relu)
                      .Dropout('dropout', rate=0.5)
                      .FullyConnected('fc1', 10, activation=tf.identity)())

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(predictions=logits, targets=label, k=1), tf.float32, name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
        # 1. written to tensosrboard
        # 2. written to stat.json
        # 3. printed after each epoch
        # You can also just call `tf.summary.scalar`. But moving summary has some other benefits.
        # See tutorial at https://tensorpack.readthedocs.io/tutorial/summary.html
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # If you don't like regex, you can certainly define the cost in any other methods.
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        total_cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
示例#5
0
    def build_graph(self, input_img, target_img):

        target_img = tf.cast(target_img, tf.float32)
        target_img = tf.image.rgb_to_grayscale(target_img)

        self.prediction_img = tf.identity(self.make_prediction(input_img),
                                          name='prediction_img')

        cost = tf.losses.mean_squared_error(target_img,
                                            self.prediction_img,
                                            reduction=tf.losses.Reduction.MEAN)
        return tf.identity(cost, name='total_costs')
示例#6
0
 def image_preprocess(self, image):
     with tf.name_scope('image_preprocess'):
         if image.dtype.base_dtype != tf.float32:
             image = tf.cast(image, tf.float32)
         mean = [0.485, 0.456, 0.406]    # rgb
         std = [0.229, 0.224, 0.225]
         if self.image_bgr:
             mean = mean[::-1]
             std = std[::-1]
         image_mean = tf.constant(mean, dtype=tf.float32) * 255.
         image_std = tf.constant(std, dtype=tf.float32) * 255.
         image = (image - image_mean) / image_std
         return image
    def build_graph(self, image, label):
        """This function should build the model which takes the input variables
        and return cost at the end"""

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero

        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        with argscope([tf.layers.conv2d], padding='same', activation=tf.nn.relu):
            l = tf.layers.conv2d(image, 32, 3, name='conv0')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv1')
            l = tf.layers.conv2d(l, 32, 3, name='conv2')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv3')
            l = tf.layers.flatten(l)
            l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0')
            l = tf.layers.dropout(l, rate=0.5, training=self.training)
        logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
        # 1. written to tensosrboard
        # 2. written to stat.json
        # 3. printed after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # If you don't like regex, you can certainly define the cost in any other methods.
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/kernel', tf.nn.l2_loss),
                              name='regularize_loss')
        total_cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
示例#8
0
    def make_prediction(self, img):

        img = tf.cast(img, tf.float32)
        img = tf.image.rgb_to_grayscale(img)

        k = tf.get_variable('filter',
                            dtype=tf.float32,
                            initializer=[[[[0.]], [[1.]], [[0.]]],
                                         [[[1.]], [[-4.]], [[1.]]],
                                         [[[0.]], [[1.]], [[0.]]]])
        prediction_img = tf.nn.conv2d(img,
                                      k,
                                      strides=[1, 1, 1, 1],
                                      padding='SAME')
        return prediction_img
示例#9
0
    def build_graph(self, image, label):
        drop_rate = tf.constant(0.5 if self.training else 0.0)

        if self.training:
            tf.summary.image("train_image", image, 10)
        if tf.test.is_gpu_available():
            image = tf.transpose(image, [0, 3, 1, 2])
            data_format = 'channels_first'
        else:
            data_format = 'channels_last'

        image = image / 4.0  # just to make range smaller
        with argscope(Conv2D, activation=BNReLU, use_bias=False, kernel_size=3), \
                argscope([Conv2D, MaxPooling, BatchNorm], data_format=data_format):
            logits = LinearWrap(image) \
                .Conv2D('conv1.1', filters=64) \
                .Conv2D('conv1.2', filters=64) \
                .MaxPooling('pool1', 3, stride=2, padding='SAME') \
                .Conv2D('conv2.1', filters=128) \
                .Conv2D('conv2.2', filters=128) \
                .MaxPooling('pool2', 3, stride=2, padding='SAME') \
                .Conv2D('conv3.1', filters=128, padding='VALID') \
                .Conv2D('conv3.2', filters=128, padding='VALID') \
                .FullyConnected('fc0', 1024 + 512, activation=tf.nn.relu) \
                .Dropout(rate=drop_rate) \
                .FullyConnected('fc1', 512, activation=tf.nn.relu) \
                .FullyConnected('linear', out_dim=self.cifar_classnum)()

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        correct = tf.cast(tf.nn.in_top_k(predictions=logits,
                                         targets=label,
                                         k=1),
                          tf.float32,
                          name='correct')
        # monitor training error
        add_moving_summary(tf.reduce_mean(correct, name='accuracy'))

        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W',
                                  l2_regularizer(4e-4),
                                  name='regularize_loss')
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram']))  # monitor W
        return tf.add_n([cost, wd_cost], name='cost')
示例#10
0
    def build_graph(self, *inputs):
        inputs = dict(zip(self.input_names, inputs))
        if "gt_masks_packed" in inputs:
            gt_masks = tf.cast(unpackbits_masks(inputs.pop("gt_masks_packed")),
                               tf.uint8,
                               name="gt_masks")
            inputs["gt_masks"] = gt_masks

        image = self.preprocess(inputs['image'])  # 1CHW

        features = self.backbone(image)
        anchor_inputs = {
            k: v
            for k, v in inputs.items() if k.startswith('anchor_')
        }
        proposals, rpn_losses = self.rpn(image, features,
                                         anchor_inputs)  # inputs?

        targets = [
            inputs[k] for k in ['gt_boxes', 'gt_labels', 'gt_masks']
            if k in inputs
        ]
        gt_boxes_area = tf.reduce_mean(tf_area(inputs["gt_boxes"]),
                                       name='mean_gt_box_area')
        add_moving_summary(gt_boxes_area)
        head_losses = self.roi_heads(image, features, proposals, targets)

        if self.training:
            wd_cost = regularize_cost('.*/W',
                                      l2_regularizer(cfg.TRAIN.WEIGHT_DECAY),
                                      name='wd_cost')
            total_cost = tf.add_n(rpn_losses + head_losses + [wd_cost],
                                  'total_cost')
            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            # Check that the model defines the tensors it declares for inference
            # For existing models, they are defined in "fastrcnn_predictions(name_scope='output')"
            G = tf.get_default_graph()
            ns = G.get_name_scope()
            for name in self.get_inference_tensor_names()[1]:
                try:
                    name = '/'.join([ns, name]) if ns else name
                    G.get_tensor_by_name(name + ':0')
                except KeyError:
                    raise KeyError(
                        "Your model does not define the tensor '{}' in inference context."
                        .format(name))
示例#11
0
    def build_graph(self, input_img_bytes):
        # prepare input (png encoded strings to images)
        input_img = tf.map_fn(lambda x: tf.image.decode_png(x, channels=3),
                              input_img_bytes,
                              dtype=tf.uint8)

        # just copy the relevant parts to this graph.
        prediction_img = self.make_prediction(input_img)

        # outputs should be png encoded strings agains
        prediction_img = tf.clip_by_value(prediction_img, 0, 255)
        prediction_img = tf.cast(prediction_img, tf.uint8)
        prediction_img_bytes = tf.map_fn(tf.image.encode_png,
                                         prediction_img,
                                         dtype=tf.string)

        tf.identity(prediction_img_bytes, name='prediction_img_bytes')
示例#12
0
    def training_mapper(byte):
        jpeg_shape = tf.image.extract_jpeg_shape(byte)  # hwc
        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
            jpeg_shape,
            bounding_boxes=tf.zeros(shape=[0, 0, 4]),
            min_object_covered=0,
            aspect_ratio_range=[0.75, 1.33],
            area_range=[0.08, 1.0],
            max_attempts=10,
            use_image_if_no_bounding_boxes=True)

        is_bad = tf.reduce_sum(tf.cast(tf.equal(bbox_size, jpeg_shape), tf.int32)) >= 2

        def good():
            offset_y, offset_x, _ = tf.unstack(bbox_begin)
            target_height, target_width, _ = tf.unstack(bbox_size)
            crop_window = tf.stack([offset_y, offset_x, target_height, target_width])

            image = tf.image.decode_and_crop_jpeg(
                byte, crop_window, channels=3, **JPEG_OPT)
            image = uint8_resize_bicubic(image, [224, 224])
            return image

        def bad():
            image = tf.image.decode_jpeg(
                tf.reshape(byte, shape=[]), 3, **JPEG_OPT)
            image = resize_shortest_edge(image, jpeg_shape, 224)
            image = center_crop(image, 224)
            return image

        image = tf.cond(is_bad, bad, good)
        # TODO other imgproc
        image = lighting(image, 0.1,
                         eigval=np.array([0.2175, 0.0188, 0.0045], dtype='float32') * 255.0,
                         eigvec=np.array([[-0.5675, 0.7192, 0.4009],
                                          [-0.5808, -0.0045, -0.8140],
                                          [-0.5836, -0.6948, 0.4203]], dtype='float32'))
        image = tf.image.random_flip_left_right(image)
        image = tf.reverse(image, axis=[2])  # to BGR
        return image
示例#13
0
    def build_graph(self, image, label):
        image = tf.expand_dims(image * 2 - 1, 3)

        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32):
            c0 = Conv2D('conv0', image)
            p0 = MaxPooling('pool0', c0, 2)
            c1 = Conv2D('conv1', p0)
            c2 = Conv2D('conv2', c1)
            p1 = MaxPooling('pool1', c2, 2)
            c3 = Conv2D('conv3', p1)
            fc1 = FullyConnected('fc0', c3, 512, nl=tf.nn.relu)
            fc1 = Dropout('dropout', fc1, 0.5)
            logits = FullyConnected('fc1', fc1, out_dim=10, nl=tf.identity)

        with tf.name_scope('visualizations'):
            visualize_conv_weights(c0.variables.W, 'conv0')
            visualize_conv_activations(c0, 'conv0')
            visualize_conv_weights(c1.variables.W, 'conv1')
            visualize_conv_activations(c1, 'conv1')
            visualize_conv_weights(c2.variables.W, 'conv2')
            visualize_conv_activations(c2, 'conv2')
            visualize_conv_weights(c3.variables.W, 'conv3')
            visualize_conv_activations(c3, 'conv3')

            tf.summary.image('input', (image + 1.0) * 128., 3)

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        tf.reduce_mean(tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32),
                       name='accuracy')

        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        return tf.add_n([wd_cost, cost], name='total_cost')
示例#14
0
    def build_graph(self, image, label):
        image = image / 128.0 - 1

        with argscope(Conv2D, activation=BNReLU, use_bias=False):
            logits = (LinearWrap(image).Conv2D(
                'conv1', 24, 5,
                padding='VALID').MaxPooling('pool1', 2, padding='SAME').Conv2D(
                    'conv2',
                    32, 3, padding='VALID').Conv2D('conv3',
                                                   32,
                                                   3,
                                                   padding='VALID').MaxPooling(
                                                       'pool2',
                                                       2,
                                                       padding='SAME').
                      Conv2D('conv4', 64, 3, padding='VALID').Dropout(
                          'drop', rate=0.5).FullyConnected(
                              'fc0',
                              512,
                              bias_initializer=tf.constant_initializer(0.1),
                              activation=tf.nn.relu).FullyConnected(
                                  'linear', units=10)())
        tf.nn.softmax(logits, name='output')

        accuracy = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32)
        add_moving_summary(tf.reduce_mean(accuracy, name='accuracy'))

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wd_cost = regularize_cost('fc.*/W', l2_regularizer(0.00001))
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram', 'rms']))  # monitor W
        return tf.add_n([cost, wd_cost], name='cost')
示例#15
0
def sample(img, coords):
    """
    Args:
        img: bxhxwxc
        coords: bxh2xw2x2. each coordinate is (y, x) integer.
            Out of boundary coordinates will be clipped.
    Return:
        bxh2xw2xc image
    """
    shape = img.get_shape().as_list()[1:]   # h, w, c
    batch = tf.shape(img)[0]
    shape2 = coords.get_shape().as_list()[1:3]  # h2, w2
    assert None not in shape2, coords.get_shape()
    max_coor = tf.constant([shape[0] - 1, shape[1] - 1], dtype=tf.float32)

    coords = tf.clip_by_value(coords, 0., max_coor)  # borderMode==repeat
    coords = tf.cast(coords, tf.int32)

    batch_index = tf.range(batch, dtype=tf.int32)
    batch_index = tf.reshape(batch_index, [-1, 1, 1, 1])
    batch_index = tf.tile(batch_index, [1, shape2[0], shape2[1], 1])    # bxh2xw2x1
    indices = tf.concat([batch_index, coords], axis=3)  # bxh2xw2x3
    sampled = tf.gather_nd(img, indices)
    return sampled
示例#16
0
 def uint8_resize_bicubic(image, shape):
     ret = tf.image.resize_bicubic([image], shape)
     return tf.cast(tf.clip_by_value(ret, 0, 255), tf.uint8)[0]
示例#17
0
 def prediction_incorrect(logits, label, topk, name):
     return tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, topk)),
                    tf.float32,
                    name=name)
示例#18
0
    def roi_heads(self, image, features, proposals, targets):
        image_shape2d = tf.shape(image)[2:]  # h,w
        featuremap = features[0]

        gt_boxes, gt_labels, *_ = targets

        if self.training:
            # sample proposal boxes in training
            proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes,
                                                 gt_labels)
        # The boxes to be used to crop RoIs.
        # Use all proposal boxes in inference

        boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1])  # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CATEGORY)

        fastrcnn_head = FastRCNNHead(
            proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes,
            tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))

        if self.training:
            all_losses = fastrcnn_head.losses()

            if cfg.MODE_MASK:
                gt_masks = targets[2]
                # maskrcnn loss
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds())
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY,
                    num_convs=0)  # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    proposals.fg_boxes(),
                    proposals.fg_inds_wrt_gt,
                    14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                all_losses.append(
                    maskrcnn_loss(mask_logits, proposals.fg_labels(),
                                  target_masks_for_fg))
            return all_losses
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes()
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')
            label_scores = fastrcnn_head.output_scores(
                name='fastrcnn_all_scores')
            final_boxes, final_scores, final_labels = fastrcnn_predictions(
                decoded_boxes, label_scores, name_scope='output')

            if cfg.MODE_MASK:
                roi_resized = roi_align(
                    featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE),
                    14)
                feature_maskrcnn = resnet_conv5(
                    roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1])
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY,
                    0)  # #result x #cat x 14x14
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.cast(final_labels, tf.int32) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx14x14
                tf.sigmoid(final_mask_logits, name='output/masks')
            return []
示例#19
0
 def prediction_incorrect(logits, label, topk=1, name='incorrect_vector'):
     with tf.name_scope('prediction_incorrect'):
         x = tf.logical_not(tf.nn.in_top_k(logits, label, topk))
     return tf.cast(x, tf.float32, name=name)
示例#20
0
    def roi_heads(self, image, features, proposals, targets):
        image_shape2d = tf.shape(image)[2:]  # h,w
        assert len(features) == 5, "Features have to be P23456!"
        gt_boxes, gt_labels, *_ = targets

        if self.training:
            proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes,
                                                 gt_labels)

        fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC)
        if not cfg.FPN.CASCADE:
            roi_feature_fastrcnn = multilevel_roi_align(
                features[:4], proposals.boxes, 7)

            head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn)
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
                'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CATEGORY)
            fastrcnn_head = FastRCNNHead(
                proposals, fastrcnn_box_logits, fastrcnn_label_logits,
                gt_boxes,
                tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))
        else:

            def roi_func(boxes):
                return multilevel_roi_align(features[:4], boxes, 7)

            fastrcnn_head = CascadeRCNNHead(proposals, roi_func,
                                            fastrcnn_head_func,
                                            (gt_boxes, gt_labels),
                                            image_shape2d,
                                            cfg.DATA.NUM_CATEGORY)

        if self.training:
            all_losses = fastrcnn_head.losses()

            if cfg.MODE_MASK:
                gt_masks = targets[2]
                # maskrcnn loss
                roi_feature_maskrcnn = multilevel_roi_align(
                    features[:4],
                    proposals.fg_boxes(),
                    14,
                    name_scope='multilevel_roi_align_mask')
                maskrcnn_head_func = getattr(model_mrcnn,
                                             cfg.FPN.MRCNN_HEAD_FUNC)
                mask_logits = maskrcnn_head_func(
                    'maskrcnn', roi_feature_maskrcnn,
                    cfg.DATA.NUM_CATEGORY)  # #fg x #cat x 28 x 28

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    proposals.fg_boxes(),
                    proposals.fg_inds_wrt_gt,
                    28,
                    pad_border=False)  # fg x 1x28x28
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                all_losses.append(
                    maskrcnn_loss(mask_logits, proposals.fg_labels(),
                                  target_masks_for_fg))
            return all_losses
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes()
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')
            label_scores = fastrcnn_head.output_scores(
                name='fastrcnn_all_scores')
            final_boxes, final_scores, final_labels = fastrcnn_predictions(
                decoded_boxes, label_scores, name_scope='output')
            if cfg.MODE_MASK:
                # Cascade inference needs roi transform with refined boxes.
                roi_feature_maskrcnn = multilevel_roi_align(
                    features[:4], final_boxes, 14)
                maskrcnn_head_func = getattr(model_mrcnn,
                                             cfg.FPN.MRCNN_HEAD_FUNC)
                mask_logits = maskrcnn_head_func(
                    'maskrcnn', roi_feature_maskrcnn,
                    cfg.DATA.NUM_CATEGORY)  # #fg x #cat x 28 x 28
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.cast(final_labels, tf.int32) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx28x28
                tf.sigmoid(final_mask_logits, name='output/masks')
            return []
示例#21
0
 def lighting(image, std, eigval, eigvec):
     v = tf.random_normal(shape=[3], stddev=std) * eigval
     inc = tf.matmul(eigvec, tf.reshape(v, [3, 1]))
     image = tf.cast(tf.cast(image, tf.float32) + tf.reshape(inc, [3]), image.dtype)
     return image