Exemplo n.º 1
0
    def eval(self, inputs, labels=None):
        _, probs = self.forward(inputs)
        logits_idx = tf.cast(tf.argmax(cl.softmax(probs, axis=1), axis=1),
                             tf.int32)

        if labels is None:
            return probs, logits_idx, None

        return probs, logits_idx, self.accuracy(probs, labels)
Exemplo n.º 2
0
    def accuracy(self, probs, labels):
        logits_idx = tf.cast(tf.argmax(cl.softmax(probs, axis=1), axis=1),
                             tf.int32)
        correct_prediction = tf.equal(tf.cast(labels, tf.int32), logits_idx)
        correct = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))
        accuracy = tf.reduce_mean(correct /
                                  tf.cast(tf.shape(probs)[0], tf.float32))

        cl.summary.scalar('accuracy', accuracy, verbose=cfg.summary_verbose)

        return accuracy
Exemplo n.º 3
0
def dynamicRouting_v1(votes,
                      num_routing=3,
                      use_bias=True,
                      leaky=True):
    """ Dynamic routing algorithm.

    See [Sabour et al., 2017](https://arxiv.org/abs/1710.09829).

    Args:
        votes: A 5-D or 7-D tensor with shape [batch_size, ..., in_channels/num_inputs, num_outputs] + out_caps_dims.
        num_routing: Integer, number of routing iterations.
        use_bias: Boolean, whether the layer uses a bias.
        leaky: Boolean, whether the algorithm uses leaky routing.

    Returns:
        poses: A 4-D or 6-D tensor.
        probs: A 2-D or 4-D tensor.
    """
    vote_shape = cl.shape(votes)
    logit_shape = vote_shape[:-2] + [1, 1]
    logits = tf.fill(logit_shape, 0.0)
    squash_on = -2 if vote_shape[-1] == 1 else [-2, -1]
    if use_bias:
        bias_shape = [1 for i in range(len(vote_shape) - 3)] + vote_shape[-3:]
        biases = tf.get_variable("biases",
                                 bias_shape,
                                 initializer=tf.constant_initializer(0.1),
                                 dtype=tf.float32)

    vote_stopped = tf.stop_gradient(votes, name="stop_gradient")
    for i in range(num_routing):
        with tf.variable_scope("iter_" + str(i)):
            if leaky:
                route = _leaky_routing(logits)
            else:
                route = cl.softmax(logits, axis=-3)
            if i == num_routing - 1:
                if use_bias:
                    preactivate = cl.reduce_sum(tf.multiply(route, votes), axis=-4, keepdims=True) + biases
                else:
                    preactivate = cl.reduce_sum(tf.multiply(route, votes), axis=-4, keepdims=True)
                poses = cl.ops.squash(preactivate, axis=squash_on)
            else:
                if use_bias:
                    preactivate = cl.reduce_sum(tf.multiply(route, vote_stopped), axis=1, keepdims=True) + biases
                else:
                    preactivate = cl.reduce_sum(tf.multiply(route, vote_stopped), axis=1, keepdims=True)
                poses = cl.ops.squash(preactivate, axis=squash_on)
                logits += cl.reduce_sum(vote_stopped * poses, axis=-4, keepdims=True)

    poses = tf.squeeze(poses, axis=-4)
    probs = tf.norm(poses, axis=(-2, -1))
    return(poses, probs)
Exemplo n.º 4
0
def _leaky_routing(logits):
    leak_shape = cl.shape(logits)
    leak = tf.zeros(leak_shape[:-3] + [1, 1, 1])
    leaky_logits = tf.concat([leak, logits], axis=-3)
    leaky_routing = cl.softmax(leaky_logits, axis=-3)
    return tf.split(leaky_routing, [1, leak_shape[-3]], axis=-3)[1]
Exemplo n.º 5
0
    def create_network(self, inputs, labels):
        """ Setup capsule network.

        Args:
            inputs: Tensor or array with shape [batch_size, height, width, channels] or [batch_size, height * width * channels].
            labels: Tensor or array with shape [batch_size].

        Returns:
            poses: [batch_size, num_label, 16, 1].
            probs: Tensor with shape [batch_size, num_label], the probability of entity presence.
        """
        self.raw_imgs = inputs
        self.labels = labels
        with tf.variable_scope('Conv1_layer'):
            # Conv1, return with shape [batch_size, 20, 20, 256]
            inputs = tf.reshape(
                self.raw_imgs,
                shape=[-1, self.height, self.width, self.channels])
            conv1 = tf.layers.conv2d(inputs,
                                     filters=256,
                                     kernel_size=9,
                                     strides=1,
                                     padding='VALID',
                                     activation=tf.nn.relu)

        with tf.variable_scope('PrimaryCaps_layer'):
            primaryCaps, activation = cl.layers.primaryCaps(
                conv1,
                filters=64,  # MNIST 32
                kernel_size=9,
                strides=2,
                out_caps_dims=[8, 1],
                method="norm")

        with tf.variable_scope('DigitCaps_layer'):
            routing_method = "EMRouting"
            num_inputs = np.prod(cl.shape(primaryCaps)[1:4])
            primaryCaps = tf.reshape(primaryCaps, shape=[-1, num_inputs, 8, 1])
            activation = tf.reshape(activation, shape=[-1, num_inputs])
            self.poses, self.probs = cl.layers.dense(
                primaryCaps,
                activation,
                num_outputs=self.num_label,
                out_caps_dims=[16, 1],
                routing_method=routing_method)
            cl.summary.histogram('activation',
                                 self.probs,
                                 verbose=cfg.summary_verbose)

        # Decoder structure
        # Reconstructe the inputs with 3 FC layers
        with tf.variable_scope('Decoder'):
            labels = tf.one_hot(self.labels,
                                depth=self.num_label,
                                axis=-1,
                                dtype=tf.float32)
            self.labels_one_hoted = tf.reshape(labels,
                                               (-1, self.num_label, 1, 1))
            masked_caps = tf.multiply(self.poses, self.labels_one_hoted)
            num_inputs = np.prod(masked_caps.get_shape().as_list()[1:])
            active_caps = tf.reshape(masked_caps, shape=(-1, num_inputs))
            fc1 = tf.layers.dense(active_caps,
                                  units=512,
                                  activation=tf.nn.relu)
            fc2 = tf.layers.dense(fc1, units=1024, activation=tf.nn.relu)
            num_outputs = self.height * self.width * self.channels
            self.recon_imgs = tf.layers.dense(fc2,
                                              units=num_outputs,
                                              activation=tf.sigmoid)
            recon_imgs = tf.reshape(
                self.recon_imgs,
                shape=[-1, self.height, self.width, self.channels])
            cl.summary.image('reconstruction_img',
                             recon_imgs,
                             verbose=cfg.summary_verbose)

        with tf.variable_scope('accuracy'):
            logits_idx = tf.to_int32(
                tf.argmax(cl.softmax(self.probs, axis=1), axis=1))
            correct_prediction = tf.equal(tf.to_int32(self.labels), logits_idx)
            correct = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))
            self.accuracy = tf.reduce_mean(
                correct / tf.cast(tf.shape(self.probs)[0], tf.float32))
            cl.summary.scalar('accuracy',
                              self.accuracy,
                              verbose=cfg.summary_verbose)

        return self.poses, self.probs
Exemplo n.º 6
0
    def create_network(self, inputs, labels):
        """ Setup capsule network.
        Args:
            inputs: Tensor or array with shape [batch_size, height, width, channels] or [batch_size, height * width * channels].
            labels: Tensor or array with shape [batch_size].

        Returns:
            poses: Tensor with shape [batch_size, num_label, 16, 1].
            probs: Tensor with shape [batch_size, num_label], the probability of entity presence.
        """
        self.raw_imgs = inputs
        self.labels = labels
        probs = []
        inputs = tf.reshape(inputs,
                            shape=[-1, self.height, self.width, self.channels])
        conv1 = tf.layers.conv2d(inputs,
                                 filters=32,
                                 kernel_size=5,
                                 strides=2,
                                 padding='VALID',
                                 activation=tf.nn.relu,
                                 name="Conv1_layer")

        convCaps, activation = cl.layers.primaryCaps(conv1,
                                                     filters=32,
                                                     kernel_size=1,
                                                     strides=1,
                                                     out_caps_dims=[4, 4],
                                                     method="logistic",
                                                     name="PrimaryCaps_layer")

        probs.append(tf.reduce_mean(activation))
        routing_method = 'EMRouting'
        convCaps, activation = cl.layers.conv2d(convCaps,
                                                activation,
                                                filters=32,
                                                out_caps_dims=[4, 4],
                                                kernel_size=(3, 3),
                                                strides=(2, 2),
                                                routing_method=routing_method,
                                                name="ConvCaps1_layer")

        probs.append(tf.reduce_mean(activation))
        convCaps, activation = cl.layers.conv2d(convCaps,
                                                activation,
                                                filters=32,
                                                out_caps_dims=[4, 4],
                                                kernel_size=(3, 3),
                                                strides=(1, 1),
                                                routing_method=routing_method,
                                                name="ConvCaps2_layer")

        probs.append(tf.reduce_mean(activation))
        self.poses, self.probs = cl.layers.dense(convCaps,
                                                 activation,
                                                 num_outputs=self.num_label,
                                                 out_caps_dims=[4, 4],
                                                 routing_method=routing_method,
                                                 coordinate_addition=True,
                                                 name="ClassCaps_layer")
        probs.append(tf.reduce_mean(self.probs))
        tf.summary.scalar("probs", tf.reduce_mean(probs))

        # Decoder structure
        # Reconstructe the inputs with 3 FC layers
        # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512]
        with tf.variable_scope('Decoder'):
            labels = tf.one_hot(self.labels,
                                depth=self.num_label,
                                axis=-1,
                                dtype=tf.float32)
            self.labels_one_hoted = tf.reshape(labels,
                                               (-1, self.num_label, 1, 1))
            masked_caps = tf.multiply(self.poses, self.labels_one_hoted)
            num_inputs = np.prod(masked_caps.get_shape().as_list()[1:])
            active_caps = tf.reshape(masked_caps, shape=(-1, num_inputs))
            fc1 = tf.layers.dense(active_caps,
                                  units=512,
                                  activation=tf.nn.relu)
            fc2 = tf.layers.dense(fc1, units=1024, activation=tf.nn.relu)
            num_outputs = self.height * self.width * self.channels
            self.recon_imgs = tf.layers.dense(fc2,
                                              units=num_outputs,
                                              activation=tf.sigmoid)
            recon_imgs = tf.reshape(
                self.recon_imgs,
                shape=[-1, self.height, self.width, self.channels])
            cl.summary.image('reconstruction_img',
                             recon_imgs,
                             verbose=cfg.summary_verbose)

        with tf.variable_scope('accuracy'):
            cl.summary.histogram('activation',
                                 tf.nn.softmax(self.probs, 1),
                                 verbose=cfg.summary_verbose)
            logits_idx = tf.to_int32(
                tf.argmax(cl.softmax(self.probs, axis=1), axis=1))
            correct_prediction = tf.equal(tf.to_int32(self.labels), logits_idx)
            correct = tf.reduce_sum(tf.cast(correct_prediction, tf.float32))
            self.accuracy = tf.reduce_mean(
                correct / tf.cast(tf.shape(self.probs)[0], tf.float32))
            cl.summary.scalar('accuracy',
                              self.accuracy,
                              verbose=cfg.summary_verbose)

        return self.poses, self.probs