示例#1
0
 def _add_convs(self, input_tensor, channels, tower_idx):
     """
     Adds the convolution layers.
     Adds a series of convolution layers with ReLU nonlinearity and pooling
     after each of them.
     :param input_tensor: a 4D tensor as the input to the first Conv layer
     :param channels: a list of channel sizes for input_tensor and following
                      conv layers. Number of channels in input tensor should 
                      be equal to channels[0]
     :param tower_idx: the index number for this tower. Each tower is named
                       as tower_{tower_idx} and resides on 'gpu:{tower_idx}'
     :return: a 4D tensor as the output of the last pooling layer
     """
     for i in range(1, len(channels)):
         with tf.variable_scope('conv{}'.format(i)) as scope:
             kernel = variables.weight_variable(
                 shape=[5, 5, channels[i - 1], channels[i]], stddev=5e-2,
                 verbose=self._hparams.verbose)
             conv = tf.nn.conv2d(
                 input_tensor,
                 kernel, [1, 1, 1, 1],
                 padding=self._hparams.padding,
                 data_format='NCHW')
             biases = variables.bias_variable([channels[i]],
                                              verbose=self._hparams.verbose)
             pre_activation = tf.nn.bias_add(
                 conv, biases, data_format='NCHW', name='logits')
             
             relu = tf.nn.relu(pre_activation, name=scope.name)
             if self._hparams.verbose:
                 tf.summary.histogram('activation', relu)
             input_tensor = tf.contrib.layers.max_pool2d(
                 relu, kernel_size=2, stride=2, data_format='NCHW', padding='SAME')
     
     return input_tensor
示例#2
0
  def _add_convs(self, input_tensor, channels):
    """Adds the convolution layers.

    Adds a series of convolution layers with ReLU nonlinearity and pooling
    after each of them.

    Args:
      input_tensor: a 4D float tensor as the input to the first convolution.
      channels: A list of channel sizes for input_tensor and following
        convolution layers. Number of channels in input tensor should be
        equal to channels[0].
    Returns:
      A 4D tensor as the output of the last pooling layer.
    """
    for i in xrange(1, len(channels)):
      with tf.variable_scope('conv{}'.format(i)) as scope:
        kernel = variables.weight_variable(
            shape=[5, 5, channels[i - 1], channels[i]], stddev=5e-2,
            verbose=self._hparams.verbose
        )
        conv = tf.nn.conv2d(
            input_tensor,
            kernel, [1, 1, 1, 1],
            padding=self._hparams.padding,
            data_format='NCHW')
        biases = variables.bias_variable([channels[i]],
                                         verbose=self._hparams.verbose)
        pre_activation = tf.nn.bias_add(conv, biases, data_format='NCHW')
        relu = tf.nn.relu(pre_activation, name=scope.name)
        if self._hparams.verbose:
          tf.summary.histogram('activation', relu)
        input_tensor = tf.contrib.layers.max_pool2d(
            relu, kernel_size=2, stride=2, data_format='NCHW', padding='SAME')

    return input_tensor
示例#3
0
def capsule(input_tensor,
            input_dim,
            output_dim,
            layer_name,
            input_atoms=8,
            output_atoms=8,
            **routing_args):
    """Builds a fully connected capsule layer.

  Given an input tensor of shape `[batch, input_dim, input_atoms]`, this op
  performs the following:

    1. For each input capsule, multiples it with the weight variable to get
      votes of shape `[batch, input_dim, output_dim, output_atoms]`.
    2. Scales the votes for each output capsule by iterative routing.
    3. Squashes the output of each capsule to have norm less than one.

  Each capsule of this layer has one weight tensor for each capsules of layer
  below. Therefore, this layer has the following number of trainable variables:
    w: [input_dim * num_in_atoms, output_dim * num_out_atoms]
    b: [output_dim * num_out_atoms]

  Args:
    input_tensor: tensor, activation output of the layer below.
    input_dim: scalar, number of capsules in the layer below.
    output_dim: scalar, number of capsules in this layer.
    layer_name: string, Name of this layer.
    input_atoms: scalar, number of units in each capsule of input layer.
    output_atoms: scalar, number of units in each capsule of output layer.
    **routing_args: dictionary {leaky, num_routing}, args for routing function.

  Returns:
    Tensor of activations for this layer of shape
      `[batch, output_dim, output_atoms]`.
  """
    with tf.variable_scope(layer_name):
        # weights variable will hold the state of the weights for the layer
        weights = variables.weight_variable(
            [input_dim, input_atoms, output_dim * output_atoms])
        biases = variables.bias_variable([output_dim, output_atoms])
        with tf.name_scope('Wx_plus_b'):
            # Depthwise matmul: [b, d, c] ** [d, c, o_c] = [b, d, o_c]
            # To do this: tile input, do element-wise multiplication and reduce
            # sum over input_atoms dimmension.
            input_tiled = tf.tile(tf.expand_dims(input_tensor, -1),
                                  [1, 1, 1, output_dim * output_atoms])
            votes = tf.reduce_sum(input_tiled * weights, axis=2)
            votes_reshaped = tf.reshape(
                votes, [-1, input_dim, output_dim, output_atoms])
        with tf.name_scope('routing'):
            input_shape = tf.shape(input_tensor)
            logit_shape = tf.stack([input_shape[0], input_dim, output_dim])
            activations = _update_routing(votes=votes_reshaped,
                                          biases=biases,
                                          logit_shape=logit_shape,
                                          num_dims=4,
                                          input_dim=input_dim,
                                          output_dim=output_dim,
                                          **routing_args)
        return activations
示例#4
0
def capsule(tower_idx, in_tensor, in_dim, in_atoms, out_dim, out_atoms,
            layer_name, reassemble, **routing_args):
    """Builds a fully connected capsule layer.

    Given an input tensor of shape (batch, in_dim, in_atoms), this op
    performs the following:

        1. For each input capsule, multiplies it with the weight variables
        to get votes of shape (batch, in_dim, out_dim, out_atoms);
        2. Scales the votes for each output capsule by routing;
        3. Squashes the output of each capsule to have norm less than one.

    Each capsule of this layer has one weight tensor for each capsule of
    layer below. Therefore, this layer has the following number of 
    trainable variables:
        kernel: (in_dim, in_atoms, out_dim * out_atoms)
        biases: (out_dim, out_atoms)
    
    Args:
        in_tensor: tensor, activation output of the layer below.
        in_dim: scalar, number of capsule types in the layer below.
        in_atoms: scalar, number of units of input capsule.
        out_dim: scalar, number of capsule types in the output layer.
        out_atoms: scalar, number of units of output capsule.
        layer_name: string, the number of this layer.
        **routing_args: dictionary {leaky, num_routing}, args for routing.
    Returns:
        Tensor of activations for this layer of shape (batch, out_dim, out_atoms).
    """
    with tf.variable_scope(layer_name):
        weights = variables.weight_variable(
            [in_dim, in_atoms, out_dim * out_atoms])
        biases = variables.bias_variable([out_dim, out_atoms])
        with tf.name_scope('Wx_plus_b'):
            # Depthwise matmul: [b, d, c] @ [d, c, o_c] = [b, d, o_c]
            # to do this: tile input, do element-wise multiplication and reduce
            # sum over in_atoms dimmension.
            in_tiled = tf.tile(tf.expand_dims(in_tensor, -1),
                               [1, 1, 1, out_dim * out_atoms])
            votes = tf.reduce_sum(in_tiled * weights, axis=2)
            votes_reshaped = tf.reshape(votes,
                                        [-1, in_dim, out_dim, out_atoms])

        with tf.name_scope('routing'):
            in_shape = tf.shape(in_tensor)
            logit_shape = tf.stack([in_shape[0], in_dim, out_dim])
            activations = _update_routing(tower_idx,
                                          votes=votes_reshaped,
                                          biases=biases,
                                          logit_shape=logit_shape,
                                          num_ranks=4,
                                          in_dim=in_dim,
                                          out_dim=out_dim,
                                          reassemble=reassemble,
                                          **routing_args)

        return activations
    def inference(self, features):
        """Adds the inference graph ops.

        Builds the architecture of the neural net to drive logits from
        features.
        The inference graph includes a series of convolution and fully
        connected layers and outputs a [batch, 10] tensor as the logits.

        Args:
          features: Dictionary of batched feature tensors like images and
            labels.
        Returns:
          A model.Inferred named tuple of expected outputs of the model like
          'logits' and 'remakes' for the reconstructions (to be added).
        """
        image = features['images']
        image_dim = features['height']
        image_depth = features['depth']
        image_4d = tf.reshape(image, [-1, image_depth, image_dim, image_dim])
        conv = self._add_convs(image_4d, [image_depth, 512, 256])
        hidden1 = tf.contrib.layers.flatten(conv)

        with tf.variable_scope('fc1') as scope:
            dim = hidden1.get_shape()[1].value
            weights = variables.weight_variable(shape=[dim, 1024],
                                                stddev=0.1,
                                                verbose=self._hparams.verbose)
            biases = variables.bias_variable(shape=[1024],
                                             verbose=self._hparams.verbose)
            pre_activation = tf.matmul(hidden1, weights) + biases
            hidden2 = tf.nn.relu(pre_activation, name=scope.name)

        with tf.variable_scope('softmax_layer') as scope:
            weights = variables.weight_variable(
                shape=[1024, features['num_classes']],
                stddev=0.1,
                verbose=self._hparams.verbose)
            biases = variables.bias_variable(shape=[features['num_classes']],
                                             verbose=self._hparams.verbose)
            logits = tf.matmul(hidden2, weights) + biases

        return model.Inferred(logits, None)
    def inference(self, features):
        """Adds the inference graph ops.

    Builds the architecture of the neural net to drive logits from features.
    The inference graph includes a convolution layer, a primary capsule layer
    and a 10-capsule final layer. Optionally, it also adds the reconstruction
    network on top of the 10-capsule final layer.

    Args:
      features: Dictionary of batched feature tensors like images and labels.
    Returns:
      A model.Inferred named tuple of expected outputs of the model like
      'logits' and 'recons' for the reconstructions.
    """

        image_dim = features['height']
        image_depth = features['depth']
        image = features['images']
        image_4d = tf.reshape(image, [-1, image_depth, image_dim, image_dim])

        # ReLU Convolution (conv1 layer start)
        with tf.variable_scope('conv1') as scope:
            kernel = variables.weight_variable(shape=[9, 9, image_depth, 256],
                                               stddev=5e-2,
                                               verbose=self._hparams.verbose)
            biases = variables.bias_variable([256],
                                             verbose=self._hparams.verbose)
            conv1 = tf.nn.conv2d(image_4d,
                                 kernel, [1, 1, 1, 1],
                                 padding=self._hparams.padding,
                                 data_format='NCHW')
            pre_activation = tf.nn.bias_add(conv1, biases, data_format='NCHW')
            relu1 = tf.nn.relu(pre_activation, name=scope.name)
            if self._hparams.verbose:
                tf.summary.histogram('activation', relu1)
        # conv1 laeyr end, return [128, 256, 20, 20] in NCHW format.
        # Then  expand dims to [128, 1, 256, 20, 20]
        hidden1 = tf.expand_dims(relu1, 1)

        # Capsules, including primary capsules layer and digit capsules layer.
        # The final output here is [batch_size, 10, 16]
        capsule_output = self._build_capsule(hidden1, features['num_classes'])
        # calculate the length of v using vector norm or 2-norm ||v||_2
        # return [batch_size, 10]
        # equals to the sqrt(reduce_sum(square(v)))
        logits = tf.norm(capsule_output, axis=-1)

        # Reconstruction
        if self._hparams.remake:
            remake = self._remake(features, capsule_output)
        else:
            remake = None

        return model.Inferred(logits, remake)
示例#7
0
  def inference(self, features):
    """Adds the inference graph ops.

    Builds the architecture of the neural net to drive logits from features.
    The inference graph includes a series of convolution and fully connected
    layers and outputs a [batch, 10] tensor as the logits.

    Args:
      features: Dictionary of batched feature tensors like images and labels.
    Returns:
      A model.Inferred named tuple of expected outputs of the model like
      'logits' and 'remakes' for the reconstructions (to be added).
    """
    image = features['images']
    image_dim = features['height']
    image_depth = features['depth']
    image_4d = tf.reshape(image, [-1, image_depth, image_dim, image_dim])
    conv = self._add_convs(image_4d, [image_depth, 512, 256])
    hidden1 = tf.contrib.layers.flatten(conv)

    with tf.variable_scope('fc1') as scope:
      dim = hidden1.get_shape()[1].value
      weights = variables.weight_variable(shape=[dim, 1024], stddev=0.1,
                                          verbose=self._hparams.verbose)
      biases = variables.bias_variable(shape=[1024],
                                       verbose=self._hparams.verbose)
      pre_activation = tf.matmul(hidden1, weights) + biases
      hidden2 = tf.nn.relu(pre_activation, name=scope.name)

    with tf.variable_scope('softmax_layer') as scope:
      weights = variables.weight_variable(
          shape=[1024, features['num_classes']], stddev=0.1,
          verbose=self._hparams.verbose
      )
      biases = variables.bias_variable(shape=[features['num_classes']],
                                       verbose=self._hparams.verbose)
      logits = tf.matmul(hidden2, weights) + biases

    return model.Inferred(logits, None)
示例#8
0
    def inference(self, features):
        """Adds the inference graph ops.

    Builds the architecture of the neural net to drive logits from features.
    The inference graph includes a convolution layer, a primary capsule layer
    and a 10-capsule final layer. Optionally, it also adds the reconstruction
    network on top of the 10-capsule final layer.

    Args:
      features: Dictionary of batched feature tensors like images and labels.
    Returns:
      A model.Inferred named tuple of expected outputs of the model like
      'logits' and 'recons' for the reconstructions.
    """

        image_height = features['height']
        image_width = features['width']
        image_depth = features['depth']
        image = features['images']
        image_4d = tf.reshape(image,
                              [-1, image_depth, image_height, image_width])

        # ReLU Convolution
        with tf.variable_scope('conv1') as scope:
            kernel = variables.weight_variable(shape=[9, 9, image_depth, 256],
                                               stddev=5e-2,
                                               verbose=self._hparams.verbose)
            biases = variables.bias_variable([256],
                                             verbose=self._hparams.verbose)
            conv1 = tf.nn.conv2d(image_4d,
                                 kernel, [1, 1, 1, 1],
                                 padding=self._hparams.padding,
                                 data_format='NCHW')
            pre_activation = tf.nn.bias_add(conv1, biases, data_format='NCHW')
            relu1 = tf.nn.relu(pre_activation, name=scope.name)
            if self._hparams.verbose:
                tf.summary.histogram('activation', relu1)
        hidden1 = tf.expand_dims(relu1, 1)

        # Capsules
        capsule_output = self._build_capsule(hidden1, features['num_classes'])
        logits = tf.norm(capsule_output, axis=-1)

        # Reconstruction
        if self._hparams.remake:
            remake = self._remake(features, capsule_output)
        else:
            remake = None

        return model.Inferred(logits, remake)
示例#9
0
 def testVariableDeclaration(self):
   """Checks the value and shape of the squidge output given a rank 2 input."""
   with tf.Graph().as_default():
     with self.test_session() as sess:
       weights = variables.weight_variable((1, 2), stddev=0.1)
       bias = variables.bias_variable((1))
       sess.run(tf.global_variables_initializer())
       w_value, b_value = sess.run([weights, bias])
       self.assertNear(w_value[0][0], 0.0, 0.2)
       self.assertNear(w_value[0][1], 0.0, 0.2)
       self.assertEqual(b_value, 0.1)
       trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
       self.assertEqual(len(trainable_vars), 2)
       self.assertStartsWith(trainable_vars[0].name, 'weights')
       self.assertStartsWith(trainable_vars[1].name, 'biases')
示例#10
0
  def inference(self, features):
    """Adds the inference graph ops.

    Builds the architecture of the neural net to drive logits from features.
    The inference graph includes a convolution layer, a primary capsule layer
    and a 10-capsule final layer. Optionally, it also adds the reconstruction
    network on top of the 10-capsule final layer.

    Args:
      features: Dictionary of batched feature tensors like images and labels.
    Returns:
      A model.Inferred named tuple of expected outputs of the model like
      'logits' and 'recons' for the reconstructions.
    """

    image_dim = features['height']
    image_depth = features['depth']
    image = features['images']
    image_4d = tf.reshape(image, [-1, image_depth, image_dim, image_dim])

    # ReLU Convolution
    with tf.variable_scope('conv1') as scope:
      kernel = variables.weight_variable(
          shape=[9, 9, image_depth, 256], stddev=5e-2,
          verbose=self._hparams.verbose)
      biases = variables.bias_variable([256], verbose=self._hparams.verbose)
      conv1 = tf.nn.conv2d(
          image_4d,
          kernel, [1, 1, 1, 1],
          padding=self._hparams.padding,
          data_format='NCHW')
      pre_activation = tf.nn.bias_add(conv1, biases, data_format='NCHW')
      relu1 = tf.nn.relu(pre_activation, name=scope.name)
      if self._hparams.verbose:
        tf.summary.histogram('activation', relu1)
    hidden1 = tf.expand_dims(relu1, 1)

    # Capsules
    capsule_output = self._build_capsule(hidden1, features['num_classes'])
    logits = tf.norm(capsule_output, axis=-1)

    # Reconstruction
    if self._hparams.remake:
      remake = self._remake(features, capsule_output)
    else:
      remake = None

    return model.Inferred(logits, remake)
示例#11
0
 def testVariableDeclaration(self):
     """Checks the value and shape of the squidge output given a rank 2 input."""
     with tf.Graph().as_default():
         with self.test_session() as sess:
             weights = variables.weight_variable((1, 2), stddev=0.1)
             bias = variables.bias_variable((1))
             sess.run(tf.global_variables_initializer())
             w_value, b_value = sess.run([weights, bias])
             self.assertNear(w_value[0][0], 0.0, 0.2)
             self.assertNear(w_value[0][1], 0.0, 0.2)
             self.assertEqual(b_value, 0.1)
             trainable_vars = tf.get_collection(
                 tf.GraphKeys.TRAINABLE_VARIABLES)
             self.assertEqual(len(trainable_vars), 2)
             self.assertStartsWith(trainable_vars[0].name, 'weights')
             self.assertStartsWith(trainable_vars[1].name, 'biases')
def conv_slim_capsule(input_tensor,
                      input_dim,
                      output_dim,
                      layer_name,
                      input_atoms=8,
                      output_atoms=8,
                      stride=2,
                      kernel_size=5,
                      padding='SAME',
                      **routing_args):
  """Builds a slim convolutional capsule layer.

  This layer performs 2D convolution given 5D input tensor of shape
  `[batch, input_dim, input_atoms, input_height, input_width]`. Then refines
  the votes with routing and applies Squash non linearity for each capsule.

  Each capsule in this layer is a convolutional unit and shares its kernel over
  the position grid and different capsules of layer below. Therefore, number
  of trainable variables in this layer is:

    kernel: [kernel_size, kernel_size, input_atoms, output_dim * output_atoms]
    bias: [output_dim, output_atoms]

  Output of a conv2d layer is a single capsule with channel number of atoms.
  Therefore conv_slim_capsule is suitable to be added on top of a conv2d layer
  with num_routing=1, input_dim=1 and input_atoms=conv_channels.

  Args:
    input_tensor: tensor, of rank 5. Last two dimmensions representing height
      and width position grid.
    input_dim: scalar, number of capsules in the layer below.
    output_dim: scalar, number of capsules in this layer.
    layer_name: string, Name of this layer.
    input_atoms: scalar, number of units in each capsule of input layer.
    output_atoms: scalar, number of units in each capsule of output layer.
    stride: scalar, stride of the convolutional kernel.
    kernel_size: scalar, convolutional kernels are [kernel_size, kernel_size].
    padding: 'SAME' or 'VALID', padding mechanism for convolutional kernels.
    **routing_args: dictionary {leaky, num_routing}, args to be passed to the
      update_routing function.

  Returns:
    Tensor of activations for this layer of shape
      `[batch, output_dim, output_atoms, out_height, out_width]`. If padding is
      'SAME', out_height = in_height and out_width = in_width. Otherwise, height
      and width is adjusted with same rules as 'VALID' in tf.nn.conv2d.
  """
  with tf.variable_scope(layer_name):
    # convolution. return [batch_size, 1, 32, 8, 6, 6]
    kernel = variables.weight_variable(shape=[
        kernel_size, kernel_size, input_atoms, output_dim * output_atoms
    ])
    biases = variables.bias_variable([output_dim, output_atoms, 1, 1])
    votes, votes_shape, input_shape = _depthwise_conv3d(
        input_tensor, kernel, input_dim, output_dim, input_atoms, output_atoms,
        stride, padding)
    # convolution End

    with tf.name_scope('routing'):
      logit_shape = tf.stack([
          input_shape[0], input_dim, output_dim, votes_shape[2], votes_shape[3]
      ])
      biases_replicated = tf.tile(biases,
                                  [1, 1, votes_shape[2], votes_shape[3]])
      # Do routing algorithm inside primaryCaps layer
      # What is interesting is that the paper does not mention the routing
      # here. And the words 'One can see PrimaryCapsules as a Convolution
      # layer with Eq. 1 as its block non-linearity' and 'no routing is used
      # between Conv1 and PrimaryCapsules.' in paper are easy to mislead people
      # to think it as an ordinary convolution layer with squash operation.
      # Ok, in a word, the code is inconsistent with the paper.
      activations = _update_routing(
          votes=votes,
          biases=biases_replicated,
          logit_shape=logit_shape,
          num_dims=6,
          input_dim=input_dim,
          output_dim=output_dim,
          **routing_args)
    return activations
示例#13
0
    def build_replica(self, tower_idx):
        """Adds a replica graph ops.

        Builds the architecture of the neural net to derive logits from 
        batched_dataset. The inference graph defined here should involve 
        trainable variables otherwise the optimizer will raise a ValueError.

        Args:
            tower_idx: the index number for this tower. Each tower is named
                as tower_{tower_idx} and resides on gpu:{tower_idx}.
        Returns:
            Inferred namedtuple containing (logits, None).
        """
        # Image specs
        image_size = self._specs['image_size']
        image_depth = self._specs['depth']
        num_classes = self._specs['num_classes']

        # Define input_tensor for input batched_images
        batched_images = tf.placeholder(tf.float32, 
            shape=[None, image_depth, image_size, image_size], 
            name='batched_images')
        """visual"""
        tf.add_to_collection('tower_%d_batched_images' % tower_idx, batched_images)
        
        # Add convolutional layers
        conv_out = self._add_convs(batched_images, [image_depth, 512, 256], tower_idx)
        hidden1 = tf.contrib.layers.flatten(conv_out) # flatten neurons, shape (?, rest)

        # Add fully connected layer 1, activation = relu
        with tf.variable_scope('fc1') as scope:
            dim = hidden1.get_shape()[1].value
            weights = variables.weight_variable(shape=[dim, 1024], stddev=0.1,
                                                verbose=self._hparams.verbose)
            biases = variables.bias_variable(shape=[1024],
                                             verbose=self._hparams.verbose)
            pre_activation = tf.add(tf.matmul(hidden1, weights), biases, name='logits')
            """visual"""
            tf.add_to_collection('tower_%d_visual' % tower_idx, pre_activation)

            hidden2 = tf.nn.relu(pre_activation, name=scope.name)
        
        # Add fully connected layer 2, activation = None
        with tf.variable_scope('softmax_layer') as scope:
            weights = variables.weight_variable(
                shape=[1024, num_classes], stddev=0.1,
                verbose=self._hparams.verbose)
            biases = variables.bias_variable(
                shape=[num_classes],
                verbose=self._hparams.verbose)
            logits = tf.add(tf.matmul(hidden2, weights), biases, name='logits')
            """visual"""
            tf.add_to_collection('tower_%d_visual' % tower_idx, logits)
        
        # Declare one-hot format placeholder for batched_labels
        batched_labels = tf.placeholder(tf.int32,
            shape=[None, num_classes], name='batched_labels') # 'tower_i/batched_labels:0'
        """visual"""
        tf.add_to_collection('tower_%d_batched_labels' % tower_idx, batched_labels)

        return model.Inferred(logits, None)
示例#14
0
def conv_slim_capsule(tower_idx,
                      in_tensor,
                      in_dim,
                      in_atoms,
                      out_dim,
                      out_atoms,
                      layer_name,
                      kernel_size=5,
                      stride=2,
                      padding='SAME',
                      reassemble=False,
                      **routing_args):
    """
    Builds a slim convolutional capsule layer.
    This layer performs 2D convolution given 5R input tensor of shape
    (batch, in_dim, in_atoms, in_h, in_w). Then refines the votes with 
    routing and applies Squash nonlinearity for each capsule.
    Each capsule in this layer is a convolutional unit and shares its kernel
    over its positional grid (e.g. 9x9) and different capsules below. Therefore,
    number of trainable variables in this layer is:

        kernel: (kernel_size, kernel_size, in_atoms, out_dim * out_atoms)
        bias: (out_dim, out_atoms)
    
    Output of a conv2d layer is a single capsule with channel number of atoms.
    Therefore conv_slim_capsule is suitable to be added on top of a conv2d layer
    with num_routing=1, in_dim=1 and in_atoms = conv_channels.
    :param tower_idx: the index number for this tower. Each tower is named as 
                      tower_{tower_idx} and resides on gpu:{tower_idx}
    :param in_tensor: 5D tensor, last two dimmensions representing height and width
    :param in_dim: number of capsule types of input
    :param in_atoms: number of units of each input capsule
    :param out_dim: number of capsule types of output
    :param out_atoms: number of units of each output capsule 
    :param layer_name: name of this layer
    :param kernel_size: convolutional kernel size [kernel_size, kernel_size]
    :param stride: stride of the convolutional kernel
    :param padding: 'SAME' or 'VALID', padding mechanism for convolutional kernels
    :param **routing_args: dictionary {leaky, num_routing}, args to be passed to 
                           the routing procedure
    :return: tensor of activations for this layer of shape [batch, out_dim, out_atoms,
             out_h, out_w]
    """
    with tf.variable_scope(layer_name):
        kernel = variables.weight_variable(
            shape=[kernel_size, kernel_size, in_atoms, out_dim * out_atoms])
        biases = variables.bias_variable(shape=[out_dim, out_atoms, 1, 1])
        votes, votes_shape, in_shape = _depthwise_conv3d(
            tower_idx, in_tensor, in_dim, in_atoms, out_dim, out_atoms, kernel,
            stride, padding)

        with tf.name_scope('routing'):
            logit_shape = tf.stack(
                [in_shape[0], in_dim, out_dim, votes_shape[2], votes_shape[3]])
            biases_replicated = tf.tile(biases,
                                        [1, 1, votes_shape[2], votes_shape[3]])

            activations = _update_routing(tower_idx,
                                          votes=votes,
                                          biases=biases_replicated,
                                          logit_shape=logit_shape,
                                          num_ranks=6,
                                          in_dim=in_dim,
                                          out_dim=out_dim,
                                          reassemble=reassemble,
                                          **routing_args)
        return activations
示例#15
0
文件: layers.py 项目: DuanHQO/models
def capsule(input_tensor,
            input_dim,
            output_dim,
            layer_name,
            input_atoms=8,
            output_atoms=8,
            **routing_args):
  """Builds a fully connected capsule layer.

  Given an input tensor of shape `[batch, input_dim, input_atoms]`, this op
  performs the following:

    1. For each input capsule, multiples it with the weight variable to get
      votes of shape `[batch, input_dim, output_dim, output_atoms]`.
    2. Scales the votes for each output capsule by iterative routing.
    3. Squashes the output of each capsule to have norm less than one.

  Each capsule of this layer has one weight tensor for each capsules of layer
  below. Therefore, this layer has the following number of trainable variables:
    w: [input_dim * num_in_atoms, output_dim * num_out_atoms]
    b: [output_dim * num_out_atoms]

  Args:
    input_tensor: tensor, activation output of the layer below.
    input_dim: scalar, number of capsules in the layer below.
    output_dim: scalar, number of capsules in this layer.
    layer_name: string, Name of this layer.
    input_atoms: scalar, number of units in each capsule of input layer.
    output_atoms: scalar, number of units in each capsule of output layer.
    **routing_args: dictionary {leaky, num_routing}, args for routing function.

  Returns:
    Tensor of activations for this layer of shape
      `[batch, output_dim, output_atoms]`.
  """
  with tf.variable_scope(layer_name):
    # weights variable will hold the state of the weights for the layer
    weights = variables.weight_variable(
        [input_dim, input_atoms, output_dim * output_atoms])
    biases = variables.bias_variable([output_dim, output_atoms])
    with tf.name_scope('Wx_plus_b'):
      # Depthwise matmul: [b, d, c] ** [d, c, o_c] = [b, d, o_c]
      # To do this: tile input, do element-wise multiplication and reduce
      # sum over input_atoms dimmension.
      input_tiled = tf.tile(
          tf.expand_dims(input_tensor, -1),
          [1, 1, 1, output_dim * output_atoms])
      votes = tf.reduce_sum(input_tiled * weights, axis=2)
      votes_reshaped = tf.reshape(votes,
                                  [-1, input_dim, output_dim, output_atoms])
    with tf.name_scope('routing'):
      input_shape = tf.shape(input_tensor)
      logit_shape = tf.stack([input_shape[0], input_dim, output_dim])
      activations = _update_routing(
          votes=votes_reshaped,
          biases=biases,
          logit_shape=logit_shape,
          num_dims=4,
          input_dim=input_dim,
          output_dim=output_dim,
          **routing_args)
    return activations
示例#16
0
def conv_slim_capsule(input_tensor,
                      input_dim,
                      output_dim,
                      layer_name,
                      input_atoms=8,
                      output_atoms=8,
                      stride=2,
                      kernel_size=5,
                      padding='SAME',
                      **routing_args):
    """Builds a slim convolutional capsule layer.

  This layer performs 2D convolution given 5D input tensor of shape
  `[batch, input_dim, input_atoms, input_height, input_width]`. Then refines
  the votes with routing and applies Squash non linearity for each capsule.

  Each capsule in this layer is a convolutional unit and shares its kernel over
  the position grid and different capsules of layer below. Therefore, number
  of trainable variables in this layer is:

    kernel: [kernel_size, kernel_size, input_atoms, output_dim * output_atoms]
    bias: [output_dim, output_atoms]

  Output of a conv2d layer is a single capsule with channel number of atoms.
  Therefore conv_slim_capsule is suitable to be added on top of a conv2d layer
  with num_routing=1, input_dim=1 and input_atoms=conv_channels.

  Args:
    input_tensor: tensor, of rank 5. Last two dimmensions representing height
      and width position grid.
      (128,1,256,20,20) (batch, , channels, img_height, img_width)
    input_dim: scalar, number of capsules in the layer below.
    output_dim: scalar, number of capsules in this layer.
    layer_name: string, Name of this layer.
    input_atoms: scalar, number of units in each capsule of input layer.
    output_atoms: scalar, number of units in each capsule of output layer.
    stride: scalar, stride of the convolutional kernel.
    kernel_size: scalar, convolutional kernels are [kernel_size, kernel_size].
    padding: 'SAME' or 'VALID', padding mechanism for convolutional kernels.
    **routing_args: dictionary {leaky, num_routing}, args to be passed to the
      update_routing function.

  Returns:
    Tensor of activations for this layer of shape
      `[batch, output_dim, output_atoms, out_height, out_width]`. If padding is
      'SAME', out_height = in_height and out_width = in_width. Otherwise, height
      and width is adjusted with same rules as 'VALID' in tf.nn.conv2d.
  """
    with tf.variable_scope(layer_name):  # layer_name = 'conv_capsule1'
        kernel = variables.weight_variable(shape=[
            kernel_size, kernel_size, input_atoms, output_dim * output_atoms
        ])
        biases = variables.bias_variable([output_dim, output_atoms, 1, 1])
        votes, votes_shape, input_shape = _depthwise_conv3d(
            input_tensor, kernel, input_dim, output_dim, input_atoms,
            output_atoms, stride, padding)
        # votes: second convolution result $u_i$
        # (128,1,32,8,6,6) 32: output_dims 8: output_atoms
        # votes_shape (128,256,6,6)
        # input_shape (128,1,256,20,20) (batch, , channels, img_height, img_width)

        with tf.name_scope('routing'):
            logit_shape = tf.stack([
                input_shape[0], input_dim, output_dim, votes_shape[2],
                votes_shape[3]
            ],
                                   name="lc_stack")
            # (128,1,32,6,6)
            biases_replicated = tf.tile(biases,
                                        [1, 1, votes_shape[2], votes_shape[3]],
                                        name="lc_tile")
            # (32,8,6,6)
            activations = _update_routing(votes=votes,
                                          biases=biases_replicated,
                                          logit_shape=logit_shape,
                                          num_dims=6,
                                          input_dim=input_dim,
                                          output_dim=output_dim,
                                          **routing_args)
        return activations
    def build_replica(self, tower_idx):
        """Adds a replica graph ops.

        Builds the architecture of the neural net to derive logits from 
        batched_dataset. The inference graph defined here should involve 
        trainable variables otherwise the optimizer will raise a ValueError.

        Args:
            tower_idx: the index number for this tower. Each tower is named
                as tower_{tower_idx} and resides on gpu:{tower_idx}.
        Returns:
            Inferred namedtuple containing (logits, recons).
        """
        # Image specs
        image_size = self._specs['image_size']
        image_depth = self._specs['depth']
        num_classes = self._specs['num_classes']

        # Define input_tensor for input batched_images
        batched_images = tf.placeholder(
            tf.float32,
            shape=[None, image_depth, image_size, image_size],
            name='batched_images')  # (?, 3, h, w)
        """visual"""
        tf.add_to_collection('tower_%d_batched_images' % tower_idx,
                             batched_images)

        # declare the threshold placeholder for ensemble evaluation
        threshold = tf.placeholder(tf.float32, name='threshold')
        tf.add_to_collection('tower_%d_batched_threshold' % tower_idx,
                             threshold)

        # ReLU Convolution
        with tf.variable_scope('conv1') as scope:
            kernel = variables.weight_variable(shape=[9, 9, image_depth, 256],
                                               stddev=5e-2,
                                               verbose=self._hparams.verbose)
            biases = variables.bias_variable([256],
                                             verbose=self._hparams.verbose)
            conv1 = tf.nn.conv2d(batched_images,
                                 kernel,
                                 strides=[1, 1, 1, 1],
                                 padding=self._hparams.padding,
                                 data_format='NCHW')
            pre_activation = tf.nn.bias_add(conv1,
                                            biases,
                                            data_format='NCHW',
                                            name='logits')
            """visual"""
            tf.add_to_collection('tower_%d_visual' % tower_idx, pre_activation)
            relu1 = tf.nn.relu(pre_activation, name=scope.name)
            if self._hparams.verbose:
                tf.summary.histogram(scope.name + '/activation', relu1)
        hidden1 = tf.expand_dims(
            relu1, 1)  # (?, 1, 3, h, w) h,w are different from previous ones.

        # Capsules
        capsule_output = self._build_capsule(hidden1, num_classes, tower_idx)
        logits = tf.norm(capsule_output, axis=-1, name='logits')
        """visual"""
        tf.add_to_collection('tower_%d_visual' % tower_idx, logits)

        # Declare one-hot format placeholder for batched_labels
        batched_labels = tf.placeholder(tf.int32,
                                        shape=[None, num_classes],
                                        name='batched_labels')
        tf.add_to_collection('tower_%d_batched_labels' % tower_idx,
                             batched_labels)

        # Reconstruction
        remake = None
        if self._hparams.remake:
            remake = self._remake(capsule_output, batched_images,
                                  batched_labels)
            tf.add_to_collection('tower_%d_recons' % tower_idx, remake)
        else:
            remake = None

        return model.Inferred(logits, remake)
示例#18
0
文件: layers.py 项目: DuanHQO/models
def conv_slim_capsule(input_tensor,
                      input_dim,
                      output_dim,
                      layer_name,
                      input_atoms=8,
                      output_atoms=8,
                      stride=2,
                      kernel_size=5,
                      padding='SAME',
                      **routing_args):
  """Builds a slim convolutional capsule layer.

  This layer performs 2D convolution given 5D input tensor of shape
  `[batch, input_dim, input_atoms, input_height, input_width]`. Then refines
  the votes with routing and applies Squash non linearity for each capsule.

  Each capsule in this layer is a convolutional unit and shares its kernel over
  the position grid and different capsules of layer below. Therefore, number
  of trainable variables in this layer is:

    kernel: [kernel_size, kernel_size, input_atoms, output_dim * output_atoms]
    bias: [output_dim, output_atoms]

  Output of a conv2d layer is a single capsule with channel number of atoms.
  Therefore conv_slim_capsule is suitable to be added on top of a conv2d layer
  with num_routing=1, input_dim=1 and input_atoms=conv_channels.

  Args:
    input_tensor: tensor, of rank 5. Last two dimmensions representing height
      and width position grid.
    input_dim: scalar, number of capsules in the layer below.
    output_dim: scalar, number of capsules in this layer.
    layer_name: string, Name of this layer.
    input_atoms: scalar, number of units in each capsule of input layer.
    output_atoms: scalar, number of units in each capsule of output layer.
    stride: scalar, stride of the convolutional kernel.
    kernel_size: scalar, convolutional kernels are [kernel_size, kernel_size].
    padding: 'SAME' or 'VALID', padding mechanism for convolutional kernels.
    **routing_args: dictionary {leaky, num_routing}, args to be passed to the
      update_routing function.

  Returns:
    Tensor of activations for this layer of shape
      `[batch, output_dim, output_atoms, out_height, out_width]`. If padding is
      'SAME', out_height = in_height and out_width = in_width. Otherwise, height
      and width is adjusted with same rules as 'VALID' in tf.nn.conv2d.
  """
  with tf.variable_scope(layer_name):
    kernel = variables.weight_variable(shape=[
        kernel_size, kernel_size, input_atoms, output_dim * output_atoms
    ])
    biases = variables.bias_variable([output_dim, output_atoms, 1, 1])
    votes, votes_shape, input_shape = _depthwise_conv3d(
        input_tensor, kernel, input_dim, output_dim, input_atoms, output_atoms,
        stride, padding)

    with tf.name_scope('routing'):
      logit_shape = tf.stack([
          input_shape[0], input_dim, output_dim, votes_shape[2], votes_shape[3]
      ])
      biases_replicated = tf.tile(biases,
                                  [1, 1, votes_shape[2], votes_shape[3]])
      activations = _update_routing(
          votes=votes,
          biases=biases_replicated,
          logit_shape=logit_shape,
          num_dims=6,
          input_dim=input_dim,
          output_dim=output_dim,
          **routing_args)
    return activations