def _build_capsule(self, input_tensor, num_classes): """Adds the capsule layers. A slim convolutional capsule layer transforms the input tensor to capsule format. The nonlinearity for slim convolutional capsule is squash function but there is no routing and each spatial instantiation of capsule is derived as traditional convolutional layer. In order to connect the convolutional capsule layer to the top fully connected capsule layer the grid position of convolution capsule is merged with different types of capsules dimmension and capsule2 learns different transformations for each of them. Args: input_tensor: 5D input tensor, shape: [batch, 1, 256, height, width]. num_classes: Number of object categories. Used as the output dimmension. Returns: A 3D tensor of the top capsule layer with 10 capsule embeddings. """ # PrimaryCaps Layer Start capsule1 = layers.conv_slim_capsule( input_tensor, input_dim=1, output_dim=self._hparams.num_prime_capsules, layer_name='conv_capsule1', num_routing=1, input_atoms=256, output_atoms=8, stride=2, kernel_size=9, padding=self._hparams.padding, leaky=self._hparams.leaky, ) capsule1_atom_last = tf.transpose(capsule1, [0, 1, 3, 4, 2]) # PrimaryCaps Layer End, return [batch_size, 32, 6, 6, 8] in format # [batch_size, capsule_channels, height, width, capsule_dims] # Since the digitCaps layer is a fully connected capsule layer, reshape to # [batch_size, 1152, 8] will be easier to deal with capsule1_3d = tf.reshape(capsule1_atom_last, [tf.shape(input_tensor)[0], -1, 8]) _, _, _, height, width = capsule1.get_shape() # get 1152, the number of capsules in primaryCaps layer input_dim = self._hparams.num_prime_capsules * height.value * width.value # DigitCaps layer, return [batch_size, 10, 16] return layers.capsule( input_tensor=capsule1_3d, input_dim=input_dim, output_dim=num_classes, layer_name='capsule2', input_atoms=8, output_atoms=16, num_routing=self._hparams.routing, leaky=self._hparams.leaky, )
def testCapsule(self): """Tests the correct output and variable declaration of layers.capsule.""" input_tensor = tf.random_uniform((4, 3, 2)) output = layers.capsule( input_tensor=input_tensor, input_dim=3, output_dim=2, layer_name='capsule', input_atoms=2, output_atoms=5, num_routing=3, leaky=False) self.assertListEqual(output.get_shape().as_list(), [4, 2, 5]) trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) self.assertEqual(len(trainable_vars), 2) self.assertStartsWith(trainable_vars[0].name, 'capsule')
def _build_capsule(self, input_tensor, num_classes): """Adds the capsule layers. A slim convolutional capsule layer transforms the input tensor to capsule format. The nonlinearity for slim convolutional capsule is squash function but there is no routing and each spatial instantiation of capsule is derived as traditional convolutional layer. In order to connect the convolutional capsule layer to the top fully connected capsule layer the grid position of convolution capsule is merged with different types of capsules dimmension and capsule2 learns different transformations for each of them. Args: input_tensor: 5D input tensor, shape: [batch, 1, 256, height, width]. num_classes: Number of object categories. Used as the output dimmension. Returns: A 3D tensor of the top capsule layer with 10 capsule embeddings. """ capsule1 = layers.conv_slim_capsule( input_tensor, input_dim=1, output_dim=self._hparams.num_prime_capsules, layer_name='conv_capsule1', num_routing=1, input_atoms=self._hparams.conv1_channel, output_atoms=self._hparams.prime_capsule_dim, stride=2, kernel_size=9, padding=self._hparams.padding, leaky=self._hparams.leaky, ) capsule1_atom_last = tf.transpose(capsule1, [0, 1, 3, 4, 2]) capsule1_3d = tf.reshape( capsule1_atom_last, [tf.shape(input_tensor)[0], -1, self._hparams.prime_capsule_dim]) _, _, _, height, width = capsule1.get_shape() input_dim = self._hparams.num_prime_capsules * height.value * width.value return layers.capsule( input_tensor=capsule1_3d, input_dim=input_dim, output_dim=num_classes, layer_name='capsule2', input_atoms=self._hparams.prime_capsule_dim, output_atoms=self._hparams.digit_capsule_dim, num_routing=self._hparams.routing, leaky=self._hparams.leaky, )
def _build_capsule(self, input_tensor, num_classes): """Adds the capsule layers. A slim convolutional capsule layer transforms the input tensor to capsule format. The nonlinearity for slim convolutional capsule is squash function but there is no routing and each spatial instantiation of capsule is derived as traditional convolutional layer. In order to connect the convolutional capsule layer to the top fully connected capsule layer the grid position of convolution capsule is merged with different types of capsules dimmension and capsule2 learns different transformations for each of them. Args: input_tensor: 5D input tensor, shape: [batch, 1, 256, height, width]. num_classes: Number of object categories. Used as the output dimmension. Returns: A 3D tensor of the top capsule layer with 10 capsule embeddings. """ capsule1 = layers.conv_slim_capsule( input_tensor, input_dim=1, output_dim=self._hparams.num_prime_capsules, layer_name='conv_capsule1', num_routing=1, input_atoms=256, output_atoms=8, stride=2, kernel_size=9, padding=self._hparams.padding, leaky=self._hparams.leaky,) capsule1_atom_last = tf.transpose(capsule1, [0, 1, 3, 4, 2]) capsule1_3d = tf.reshape(capsule1_atom_last, [tf.shape(input_tensor)[0], -1, 8]) _, _, _, height, width = capsule1.get_shape() input_dim = self._hparams.num_prime_capsules * height.value * width.value return layers.capsule( input_tensor=capsule1_3d, input_dim=input_dim, output_dim=num_classes, layer_name='capsule2', input_atoms=8, output_atoms=16, num_routing=self._hparams.routing, leaky=self._hparams.leaky,)