def eval(self, inputs, labels=None): _, probs = self.forward(inputs) logits_idx = tf.cast(tf.argmax(cl.softmax(probs, axis=1), axis=1), tf.int32) if labels is None: return probs, logits_idx, None return probs, logits_idx, self.accuracy(probs, labels)
def accuracy(self, probs, labels): logits_idx = tf.cast(tf.argmax(cl.softmax(probs, axis=1), axis=1), tf.int32) correct_prediction = tf.equal(tf.cast(labels, tf.int32), logits_idx) correct = tf.reduce_sum(tf.cast(correct_prediction, tf.float32)) accuracy = tf.reduce_mean(correct / tf.cast(tf.shape(probs)[0], tf.float32)) cl.summary.scalar('accuracy', accuracy, verbose=cfg.summary_verbose) return accuracy
def dynamicRouting_v1(votes, num_routing=3, use_bias=True, leaky=True): """ Dynamic routing algorithm. See [Sabour et al., 2017](https://arxiv.org/abs/1710.09829). Args: votes: A 5-D or 7-D tensor with shape [batch_size, ..., in_channels/num_inputs, num_outputs] + out_caps_dims. num_routing: Integer, number of routing iterations. use_bias: Boolean, whether the layer uses a bias. leaky: Boolean, whether the algorithm uses leaky routing. Returns: poses: A 4-D or 6-D tensor. probs: A 2-D or 4-D tensor. """ vote_shape = cl.shape(votes) logit_shape = vote_shape[:-2] + [1, 1] logits = tf.fill(logit_shape, 0.0) squash_on = -2 if vote_shape[-1] == 1 else [-2, -1] if use_bias: bias_shape = [1 for i in range(len(vote_shape) - 3)] + vote_shape[-3:] biases = tf.get_variable("biases", bias_shape, initializer=tf.constant_initializer(0.1), dtype=tf.float32) vote_stopped = tf.stop_gradient(votes, name="stop_gradient") for i in range(num_routing): with tf.variable_scope("iter_" + str(i)): if leaky: route = _leaky_routing(logits) else: route = cl.softmax(logits, axis=-3) if i == num_routing - 1: if use_bias: preactivate = cl.reduce_sum(tf.multiply(route, votes), axis=-4, keepdims=True) + biases else: preactivate = cl.reduce_sum(tf.multiply(route, votes), axis=-4, keepdims=True) poses = cl.ops.squash(preactivate, axis=squash_on) else: if use_bias: preactivate = cl.reduce_sum(tf.multiply(route, vote_stopped), axis=1, keepdims=True) + biases else: preactivate = cl.reduce_sum(tf.multiply(route, vote_stopped), axis=1, keepdims=True) poses = cl.ops.squash(preactivate, axis=squash_on) logits += cl.reduce_sum(vote_stopped * poses, axis=-4, keepdims=True) poses = tf.squeeze(poses, axis=-4) probs = tf.norm(poses, axis=(-2, -1)) return(poses, probs)
def _leaky_routing(logits): leak_shape = cl.shape(logits) leak = tf.zeros(leak_shape[:-3] + [1, 1, 1]) leaky_logits = tf.concat([leak, logits], axis=-3) leaky_routing = cl.softmax(leaky_logits, axis=-3) return tf.split(leaky_routing, [1, leak_shape[-3]], axis=-3)[1]
def create_network(self, inputs, labels): """ Setup capsule network. Args: inputs: Tensor or array with shape [batch_size, height, width, channels] or [batch_size, height * width * channels]. labels: Tensor or array with shape [batch_size]. Returns: poses: [batch_size, num_label, 16, 1]. probs: Tensor with shape [batch_size, num_label], the probability of entity presence. """ self.raw_imgs = inputs self.labels = labels with tf.variable_scope('Conv1_layer'): # Conv1, return with shape [batch_size, 20, 20, 256] inputs = tf.reshape( self.raw_imgs, shape=[-1, self.height, self.width, self.channels]) conv1 = tf.layers.conv2d(inputs, filters=256, kernel_size=9, strides=1, padding='VALID', activation=tf.nn.relu) with tf.variable_scope('PrimaryCaps_layer'): primaryCaps, activation = cl.layers.primaryCaps( conv1, filters=64, # MNIST 32 kernel_size=9, strides=2, out_caps_dims=[8, 1], method="norm") with tf.variable_scope('DigitCaps_layer'): routing_method = "EMRouting" num_inputs = np.prod(cl.shape(primaryCaps)[1:4]) primaryCaps = tf.reshape(primaryCaps, shape=[-1, num_inputs, 8, 1]) activation = tf.reshape(activation, shape=[-1, num_inputs]) self.poses, self.probs = cl.layers.dense( primaryCaps, activation, num_outputs=self.num_label, out_caps_dims=[16, 1], routing_method=routing_method) cl.summary.histogram('activation', self.probs, verbose=cfg.summary_verbose) # Decoder structure # Reconstructe the inputs with 3 FC layers with tf.variable_scope('Decoder'): labels = tf.one_hot(self.labels, depth=self.num_label, axis=-1, dtype=tf.float32) self.labels_one_hoted = tf.reshape(labels, (-1, self.num_label, 1, 1)) masked_caps = tf.multiply(self.poses, self.labels_one_hoted) num_inputs = np.prod(masked_caps.get_shape().as_list()[1:]) active_caps = tf.reshape(masked_caps, shape=(-1, num_inputs)) fc1 = tf.layers.dense(active_caps, units=512, activation=tf.nn.relu) fc2 = tf.layers.dense(fc1, units=1024, activation=tf.nn.relu) num_outputs = self.height * self.width * self.channels self.recon_imgs = tf.layers.dense(fc2, units=num_outputs, activation=tf.sigmoid) recon_imgs = tf.reshape( self.recon_imgs, shape=[-1, self.height, self.width, self.channels]) cl.summary.image('reconstruction_img', recon_imgs, verbose=cfg.summary_verbose) with tf.variable_scope('accuracy'): logits_idx = tf.to_int32( tf.argmax(cl.softmax(self.probs, axis=1), axis=1)) correct_prediction = tf.equal(tf.to_int32(self.labels), logits_idx) correct = tf.reduce_sum(tf.cast(correct_prediction, tf.float32)) self.accuracy = tf.reduce_mean( correct / tf.cast(tf.shape(self.probs)[0], tf.float32)) cl.summary.scalar('accuracy', self.accuracy, verbose=cfg.summary_verbose) return self.poses, self.probs
def create_network(self, inputs, labels): """ Setup capsule network. Args: inputs: Tensor or array with shape [batch_size, height, width, channels] or [batch_size, height * width * channels]. labels: Tensor or array with shape [batch_size]. Returns: poses: Tensor with shape [batch_size, num_label, 16, 1]. probs: Tensor with shape [batch_size, num_label], the probability of entity presence. """ self.raw_imgs = inputs self.labels = labels probs = [] inputs = tf.reshape(inputs, shape=[-1, self.height, self.width, self.channels]) conv1 = tf.layers.conv2d(inputs, filters=32, kernel_size=5, strides=2, padding='VALID', activation=tf.nn.relu, name="Conv1_layer") convCaps, activation = cl.layers.primaryCaps(conv1, filters=32, kernel_size=1, strides=1, out_caps_dims=[4, 4], method="logistic", name="PrimaryCaps_layer") probs.append(tf.reduce_mean(activation)) routing_method = 'EMRouting' convCaps, activation = cl.layers.conv2d(convCaps, activation, filters=32, out_caps_dims=[4, 4], kernel_size=(3, 3), strides=(2, 2), routing_method=routing_method, name="ConvCaps1_layer") probs.append(tf.reduce_mean(activation)) convCaps, activation = cl.layers.conv2d(convCaps, activation, filters=32, out_caps_dims=[4, 4], kernel_size=(3, 3), strides=(1, 1), routing_method=routing_method, name="ConvCaps2_layer") probs.append(tf.reduce_mean(activation)) self.poses, self.probs = cl.layers.dense(convCaps, activation, num_outputs=self.num_label, out_caps_dims=[4, 4], routing_method=routing_method, coordinate_addition=True, name="ClassCaps_layer") probs.append(tf.reduce_mean(self.probs)) tf.summary.scalar("probs", tf.reduce_mean(probs)) # Decoder structure # Reconstructe the inputs with 3 FC layers # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512] with tf.variable_scope('Decoder'): labels = tf.one_hot(self.labels, depth=self.num_label, axis=-1, dtype=tf.float32) self.labels_one_hoted = tf.reshape(labels, (-1, self.num_label, 1, 1)) masked_caps = tf.multiply(self.poses, self.labels_one_hoted) num_inputs = np.prod(masked_caps.get_shape().as_list()[1:]) active_caps = tf.reshape(masked_caps, shape=(-1, num_inputs)) fc1 = tf.layers.dense(active_caps, units=512, activation=tf.nn.relu) fc2 = tf.layers.dense(fc1, units=1024, activation=tf.nn.relu) num_outputs = self.height * self.width * self.channels self.recon_imgs = tf.layers.dense(fc2, units=num_outputs, activation=tf.sigmoid) recon_imgs = tf.reshape( self.recon_imgs, shape=[-1, self.height, self.width, self.channels]) cl.summary.image('reconstruction_img', recon_imgs, verbose=cfg.summary_verbose) with tf.variable_scope('accuracy'): cl.summary.histogram('activation', tf.nn.softmax(self.probs, 1), verbose=cfg.summary_verbose) logits_idx = tf.to_int32( tf.argmax(cl.softmax(self.probs, axis=1), axis=1)) correct_prediction = tf.equal(tf.to_int32(self.labels), logits_idx) correct = tf.reduce_sum(tf.cast(correct_prediction, tf.float32)) self.accuracy = tf.reduce_mean( correct / tf.cast(tf.shape(self.probs)[0], tf.float32)) cl.summary.scalar('accuracy', self.accuracy, verbose=cfg.summary_verbose) return self.poses, self.probs