Пример #1
0
def capsule(input_tensor,
            input_dim,
            output_dim,
            layer_name,
            input_atoms=8,
            output_atoms=8,
            **routing_args):
    """Builds a fully connected capsule layer.

  Given an input tensor of shape `[batch, input_dim, input_atoms]`, this op
  performs the following:

    1. For each input capsule, multiples it with the weight variable to get
      votes of shape `[batch, input_dim, output_dim, output_atoms]`.
    2. Scales the votes for each output capsule by iterative routing.
    3. Squashes the output of each capsule to have norm less than one.

  Each capsule of this layer has one weight tensor for each capsules of layer
  below. Therefore, this layer has the following number of trainable variables:
    w: [input_dim * num_in_atoms, output_dim * num_out_atoms]
    b: [output_dim * num_out_atoms]

  Args:
    input_tensor: tensor, activation output of the layer below.
    input_dim: scalar, number of capsules in the layer below.
    output_dim: scalar, number of capsules in this layer.
    layer_name: string, Name of this layer.
    input_atoms: scalar, number of units in each capsule of input layer.
    output_atoms: scalar, number of units in each capsule of output layer.
    **routing_args: dictionary {leaky, num_routing}, args for routing function.

  Returns:
    Tensor of activations for this layer of shape
      `[batch, output_dim, output_atoms]`.
  """
    with tf.variable_scope(layer_name):
        # weights variable will hold the state of the weights for the layer
        weights = variables.weight_variable(
            [input_dim, input_atoms, output_dim * output_atoms])
        biases = variables.bias_variable([output_dim, output_atoms])
        with tf.name_scope('Wx_plus_b'):
            # Depthwise matmul: [b, d, c] ** [d, c, o_c] = [b, d, o_c]
            # To do this: tile input, do element-wise multiplication and reduce
            # sum over input_atoms dimmension.
            input_tiled = tf.tile(tf.expand_dims(input_tensor, -1),
                                  [1, 1, 1, output_dim * output_atoms])
            votes = tf.reduce_sum(input_tiled * weights, axis=2)
            votes_reshaped = tf.reshape(
                votes, [-1, input_dim, output_dim, output_atoms])
        with tf.name_scope('routing'):
            input_shape = tf.shape(input_tensor)
            logit_shape = tf.stack([input_shape[0], input_dim, output_dim])
            activations = _update_routing(votes=votes_reshaped,
                                          biases=biases,
                                          logit_shape=logit_shape,
                                          num_dims=4,
                                          input_dim=input_dim,
                                          output_dim=output_dim,
                                          **routing_args)
        return activations
Пример #2
0
    def inference(self, features):
        """Adds the inference graph ops.

    Builds the architecture of the neural net to drive logits from features.
    The inference graph includes a series of convolution and fully connected
    layers and outputs a [batch, 10] tensor as the logits.

    Args:
      features: Dictionary of batched feature tensors like images and labels.
    Returns:
      A model.Inferred named tuple of expected outputs of the model like
      'logits' and 'remakes' for the reconstructions (to be added).
    """
        image = features['images']
        image_dim = features['height']
        image_depth = features['depth']
        image_4d = tf.reshape(image, [-1, image_depth, image_dim, image_dim])
        conv = self._add_convs(image_4d, [image_depth, 512, 256])
        hidden1 = tf.contrib.layers.flatten(conv)

        with tf.variable_scope('fc1') as scope:
            dim = hidden1.get_shape()[1].value
            weights = variables.weight_variable(shape=[dim, 1024],
                                                stddev=0.1,
                                                verbose=self._hparams.verbose)
            biases = variables.bias_variable(shape=[1024],
                                             verbose=self._hparams.verbose)
            pre_activation = tf.matmul(hidden1, weights) + biases
            hidden2 = tf.nn.relu(pre_activation, name=scope.name)

        with tf.variable_scope('softmax_layer') as scope:
            weights = variables.weight_variable(
                shape=[1024, features['num_classes']],
                stddev=0.1,
                verbose=self._hparams.verbose)
            biases = variables.bias_variable(shape=[features['num_classes']],
                                             verbose=self._hparams.verbose)
            logits = tf.matmul(hidden2, weights) + biases

        return model.Inferred(logits, None)
Пример #3
0
 def testVariableDeclaration(self):
   """Checks the value and shape of the squidge output given a rank 2 input."""
   with tf.Graph().as_default():
     with self.test_session() as sess:
       weights = variables.weight_variable((1, 2), stddev=0.1)
       bias = variables.bias_variable((1))
       sess.run(tf.global_variables_initializer())
       w_value, b_value = sess.run([weights, bias])
       self.assertNear(w_value[0][0], 0.0, 0.2)
       self.assertNear(w_value[0][1], 0.0, 0.2)
       self.assertEqual(b_value, 0.1)
       trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
       self.assertEqual(len(trainable_vars), 2)
       self.assertStartsWith(trainable_vars[0].name, 'weights')
       self.assertStartsWith(trainable_vars[1].name, 'biases')
Пример #4
0
    def inference(self, features):
        """Adds the inference graph ops.

    Builds the architecture of the neural net to drive logits from features.
    The inference graph includes a convolution layer, a primary capsule layer
    and a 10-capsule final layer. Optionally, it also adds the reconstruction
    network on top of the 10-capsule final layer.

    Args:
      features: Dictionary of batched feature tensors like images and labels.
    Returns:
      A model.Inferred named tuple of expected outputs of the model like
      'logits' and 'recons' for the reconstructions.
    """

        image_dim = features['height']
        image_depth = features['depth']
        image = features['images']
        image_4d = tf.reshape(image, [-1, image_depth, image_dim, image_dim])

        # ReLU Convolution
        with tf.variable_scope('conv1') as scope:
            kernel = variables.weight_variable(shape=[9, 9, image_depth, 256],
                                               stddev=5e-2,
                                               verbose=self._hparams.verbose)
            biases = variables.bias_variable([256],
                                             verbose=self._hparams.verbose)
            conv1 = tf.nn.conv2d(image_4d,
                                 kernel, [1, 1, 1, 1],
                                 padding=self._hparams.padding,
                                 data_format='NCHW')
            pre_activation = tf.nn.bias_add(conv1, biases, data_format='NCHW')
            relu1 = tf.nn.relu(pre_activation, name=scope.name)
            if self._hparams.verbose:
                tf.summary.histogram('activation', relu1)
        hidden1 = tf.expand_dims(relu1, 1)

        # Capsules
        capsule_output = self._build_capsule(hidden1, features['num_classes'])
        logits = tf.norm(capsule_output, axis=-1)

        # Reconstruction
        if self._hparams.remake:
            remake = self._remake(features, capsule_output)
        else:
            remake = None

        return model.Inferred(logits, remake)
Пример #5
0
    def _add_convs(self, input_tensor, channels):
        """Adds the convolution layers.

    Adds a series of convolution layers with ReLU nonlinearity and pooling
    after each of them.

    Args:
      input_tensor: a 4D float tensor as the input to the first convolution.
      channels: A list of channel sizes for input_tensor and following
        convolution layers. Number of channels in input tensor should be
        equal to channels[0].
    Returns:
      A 4D tensor as the output of the last pooling layer.
    """
        for i in range(1, len(channels)):
            with tf.variable_scope('conv{}'.format(i)) as scope:
                kernel = variables.weight_variable(
                    shape=[5, 5, channels[i - 1], channels[i]],
                    stddev=5e-2,
                    verbose=self._hparams.verbose)
                conv = tf.nn.conv2d(input_tensor,
                                    kernel, [1, 1, 1, 1],
                                    padding=self._hparams.padding,
                                    data_format='NCHW')
                biases = variables.bias_variable([channels[i]],
                                                 verbose=self._hparams.verbose)
                pre_activation = tf.nn.bias_add(conv,
                                                biases,
                                                data_format='NCHW')
                relu = tf.nn.relu(pre_activation, name=scope.name)
                if self._hparams.verbose:
                    tf.summary.histogram('activation', relu)
                input_tensor = tf.contrib.layers.max_pool2d(relu,
                                                            kernel_size=2,
                                                            stride=2,
                                                            data_format='NCHW',
                                                            padding='SAME')

        return input_tensor
Пример #6
0
def conv_slim_capsule(input_tensor,
                      input_dim,
                      output_dim,
                      layer_name,
                      input_atoms=8,
                      output_atoms=8,
                      stride=2,
                      kernel_size=5,
                      padding='SAME',
                      **routing_args):
    """Builds a slim convolutional capsule layer.

  This layer performs 2D convolution given 5D input tensor of shape
  `[batch, input_dim, input_atoms, input_height, input_width]`. Then refines
  the votes with routing and applies Squash non linearity for each capsule.

  Each capsule in this layer is a convolutional unit and shares its kernel over
  the position grid and different capsules of layer below. Therefore, number
  of trainable variables in this layer is:

    kernel: [kernel_size, kernel_size, input_atoms, output_dim * output_atoms]
    bias: [output_dim, output_atoms]

  Output of a conv2d layer is a single capsule with channel number of atoms.
  Therefore conv_slim_capsule is suitable to be added on top of a conv2d layer
  with num_routing=1, input_dim=1 and input_atoms=conv_channels.

  Args:
    input_tensor: tensor, of rank 5. Last two dimmensions representing height
      and width position grid.
    input_dim: scalar, number of capsules in the layer below.
    output_dim: scalar, number of capsules in this layer.
    layer_name: string, Name of this layer.
    input_atoms: scalar, number of units in each capsule of input layer.
    output_atoms: scalar, number of units in each capsule of output layer.
    stride: scalar, stride of the convolutional kernel.
    kernel_size: scalar, convolutional kernels are [kernel_size, kernel_size].
    padding: 'SAME' or 'VALID', padding mechanism for convolutional kernels.
    **routing_args: dictionary {leaky, num_routing}, args to be passed to the
      update_routing function.

  Returns:
    Tensor of activations for this layer of shape
      `[batch, output_dim, output_atoms, out_height, out_width]`. If padding is
      'SAME', out_height = in_height and out_width = in_width. Otherwise, height
      and width is adjusted with same rules as 'VALID' in tf.nn.conv2d.
  """
    with tf.variable_scope(layer_name):
        kernel = variables.weight_variable(shape=[
            kernel_size, kernel_size, input_atoms, output_dim * output_atoms
        ])
        biases = variables.bias_variable([output_dim, output_atoms, 1, 1])
        votes, votes_shape, input_shape = _depthwise_conv3d(
            input_tensor, kernel, input_dim, output_dim, input_atoms,
            output_atoms, stride, padding)

        with tf.name_scope('routing'):
            logit_shape = tf.stack([
                input_shape[0], input_dim, output_dim, votes_shape[2],
                votes_shape[3]
            ])
            biases_replicated = tf.tile(biases,
                                        [1, 1, votes_shape[2], votes_shape[3]])
            activations = _update_routing(votes=votes,
                                          biases=biases_replicated,
                                          logit_shape=logit_shape,
                                          num_dims=6,
                                          input_dim=input_dim,
                                          output_dim=output_dim,
                                          **routing_args)
        return activations