Exemplo n.º 1
0
    def routing(self, x, b_IJ, W,batch_size,routing_iter):
        x1 = x.view(batch_size, 256, 1, 6, 6)
        x_tile = x1.repeat(1, 1, 10, 1, 1)
        x_view = x_tile.view(batch_size, 1152, 10, 8, 1)
        stride_i = W.repeat(batch_size, 1, 1, 1, 1)
        stride_j = stride_i.view(batch_size, 1152, 10, 16, 8)
        dot_op = torch.matmul(stride_j, x_view)
        dot_op_stopped = Variable(dot_op.data.clone(), requires_grad=False)

        for r_iter in range(routing_iter):
            id_capsule = F.softmax(b_IJ, dim=2)
            if r_iter == routing_iter - 1:
                route_I = torch.mul(id_capsule, dot_op)
                route_I_sum = torch.sum(route_I, dim=1, keepdim=True) + self.bias
                V_J = squash(route_I_sum,self.epsilon)
            if r_iter < routing_iter - 1:

                dot_op_stopped_tmp = dot_op_stopped.data.numpy()
                dot_op_stopped_tmp = np.reshape(dot_op_stopped_tmp, (batch_size, 1152, 10, 16, 1))
                id_capsule_tmp = id_capsule.data.numpy()
                route_I_tmp = id_capsule_tmp * dot_op_stopped_tmp
                route_I_tmp_sum = np.sum(route_I_tmp, axis=1, keepdims=True) + self.bias.data.numpy()
                V_J_tmp = squash(torch.Tensor(route_I_tmp_sum),self.epsilon)

                V_J_tmp_tiled = np.tile(V_J_tmp.numpy(), (1, 1152, 1, 1, 1))
                dot_op_stopped_tmp = np.reshape(dot_op_stopped_tmp, (batch_size, 1152, 10, 1, 16))

                u_produce_v = np.matmul(dot_op_stopped_tmp, V_J_tmp_tiled)

                b_IJ.data += torch.Tensor(u_produce_v)

        return V_J
Exemplo n.º 2
0
    def forward(self, x):
        # x.size=[batch, in_num_caps, in_dim_caps]
        # expanded to    [batch, 1,            in_num_caps, in_dim_caps,  1]
        # weight.size   =[       out_num_caps, in_num_caps, out_dim_caps, in_dim_caps]
        # torch.matmul: [out_dim_caps, in_dim_caps] x [in_dim_caps, 1] -> [out_dim_caps, 1]
        # => x_hat.size =[batch, out_num_caps, in_num_caps, out_dim_caps]
        x_hat = torch.squeeze(torch.matmul(self.weight, x[:, None, :, :,
                                                          None]),
                              dim=-1)

        # In forward pass, `x_hat_detached` = `x_hat`;
        # In backward, no gradient can flow from `x_hat_detached` back to `x_hat`.
        x_hat_detached = x_hat.detach()

        # The prior for coupling coefficient, initialized as zeros.
        # b.size = [batch, out_num_caps, in_num_caps]
        if self.USE_CUDA:
            b = Variable(
                torch.zeros(x.size(0), self.out_num_caps,
                            self.in_num_caps)).cuda()
        else:
            b = Variable(
                torch.zeros(x.size(0), self.out_num_caps, self.in_num_caps))

        assert self.routings > 0, 'The \'routings\' should be > 0.'
        for i in range(self.routings):
            # c.size = [batch, out_num_caps, in_num_caps]
            c = F.softmax(b, dim=1)

            # At last iteration, use `x_hat` to compute `outputs` in order to backpropagate gradient
            if i == self.routings - 1:
                # c.size expanded to [batch, out_num_caps, in_num_caps, 1           ]
                # x_hat.size     =   [batch, out_num_caps, in_num_caps, out_dim_caps]
                # => outputs.size=   [batch, out_num_caps, 1,           out_dim_caps]
                outputs = utils.squash(
                    torch.sum(c[:, :, :, None] * x_hat, dim=-2, keepdim=True))
                # outputs = squash(torch.matmul(c[:, :, None, :], x_hat))  # alternative way
            else:  # Otherwise, use `x_hat_detached` to update `b`. No gradients flow on this path.
                outputs = utils.squash(
                    torch.sum(c[:, :, :, None] * x_hat_detached,
                              dim=-2,
                              keepdim=True))
                # outputs = squash(torch.matmul(c[:, :, None, :], x_hat_detached))  # alternative way

                # outputs.size       =[batch, out_num_caps, 1,           out_dim_caps]
                # x_hat_detached.size=[batch, out_num_caps, in_num_caps, out_dim_caps]
                # => b.size          =[batch, out_num_caps, in_num_caps]
                b = b + torch.sum(outputs * x_hat_detached, dim=-1)

        return torch.squeeze(outputs, dim=-2)
Exemplo n.º 3
0
    def routing(self, x):

        # x : [None, 1152, 10, 16, 1]
        input_shape = tf.shape(x)

        # initialize b to zero
        # b : [None, 1152, 10, 1]
        b = tf.zeros((input_shape[0], input_shape[1], self.num_capsule, 1))

        # routing by aggriement
        for _ in range(self.routing_iter):

            #normalize b so it sums to 1 for each capsule of primary layer
            # c : [None, 1152, 10, 1]
            c = tf.nn.softmax(b, axis=2)

            # compute mean capsule
            #s : [None, 10, 16, 1]
            s = tf.reduce_sum(tf.multiply(x, c), axis=1, keepdims=True)

            # normalize capsule so its length is < 1
            #v : [None, 10, 16, 1]
            v = squash(s, axis=2)

            # update b using aggriment between candidate capsule and computed digit capsules
            b = b + tf.reduce_sum(tf.multiply(x, v), axis=-1, keepdims=True)

        return b
 def update(self):
     state = self._state
     decay = -self.decay_rate * state
     self._I_syn = np.dot(self._weights, state)
     state = state + self.tau * (self._bias + decay + self._I_syn)
     state = squash(state)
     self._state = state
Exemplo n.º 5
0
    def forward(self, x):
        batch_size = x.size(0)
        # x -> [batch_size, primary_capsule_num,digit_channels primary_vec_length, 1]
        x = torch.stack([x] * self.digit_channels, dim=2)
        # W -> [batch_size, primary_capsule_num, digit_channels, digit_vec_length, primary_vec_length]
        W = torch.stack([self.W]*batch_size, dim=0)
        #b_ij will be reset everytime forwarded
        b_ij = Variable(torch.zeros(batch_size, self.primary_capsule_num, self.digit_channels, 1, 1)).cuda()

        # u_hat -> [batch_size, primary_capsule_num, digit_channels, digit_vec_length, 1]
        u_hat = torch.matmul(W, x)
        #in dynamic routing, b_ij will be updated 3 times, in the first two times we just calculate v_ij for update b_ij, the result shouldn't influence other weights since b_ij is still in unstable state, which means that backpro shouldn't be executed for these two iterations, so we use u_hat_stopped to detach gradient backpro, several implementation ignore this
        u_hat_stopped = u_hat.detach()

        num_iterations = 3
        for iteration in range(num_iterations):
            # c_ij -> [batch_size, primary_capsule_num, digit_channels, 1, 1]
            c_ij = softmax(b_ij, dim=3)

            # s_j -> [batch_size, digit_channels, digit_vec_length, 1]
            if iteration == num_iterations-1:
                s_j = (c_ij * u_hat).sum(dim=1)
            else:
                s_j = (c_ij * u_hat_stopped).sum(dim=1)
            
            # v_j -> [batch_size, digit_channels, digit_vec_length, 1]
            v_j = squash(s_j, 2)

            if iteration != num_iterations -1:
                # update -> [batch_size, primary_capsule_num, digit_channels, 1, 1]
                update = torch.matmul(torch.transpose(u_hat, -1, -2) , torch.stack([v_j]*self.primary_capsule_num, dim=1))

                b_ij = b_ij + update

        return v_j
Exemplo n.º 6
0
    def _capsule(self, input, i_c, o_c, idx):
        """
        compute a capsule,
        conv op with kernel: 9x9, stride: 2,
        padding: VALID, output channels: 8 per capsule.
        :arg
            input: input for computing capsule, shape: [None, w, h, c]
            i_c: input channels
            o_c: output channels
            idx: index of the capsule about to create

        :return
            capsule: computed capsule
        """
        with tf.variable_scope('cap_' + str(idx)):
            w = tf.get_variable('w', shape=[9, 9, i_c, o_c], dtype=tf.float32)
            cap = tf.nn.conv2d(input, w, [1, 2, 2, 1],
                               padding='VALID', name='cap_conv')
            if cfg.USE_BIAS:
                b = tf.get_variable('b', shape=[o_c, ], dtype=tf.float32,
                                    initializer=self._b_initializer)
                cap = cap + b
            # cap with shape [None, 6, 6, 8] for mnist dataset
            # use "squash" as its non-linearity.
            capsule = squash(cap)
            
            # capsule with shape: [None, 6, 6, 8]
            # expand the dimensions to [None, 1, 6, 6, 8] for following concat
            capsule = tf.expand_dims(capsule, axis=1)

            # return capsule with shape [None, 1, 6, 6, 8]
            return capsule
Exemplo n.º 7
0
    def call(self, u, **kwargs):
        # Wij @ ui for every batch and every i, j pair; Shapes are as follows:
        # shape(W) = [incaps(i), outcaps(o), outdim(m), indim(n)]
        # shape(u) = [B(b), incaps(i), indim(n)]
        # shape(uhat) = [B(b), incaps(i), outcaps(o), outdim(m)]
        uhat = tf.einsum('iomn,bin->biom', self.W, u)

        # Prepare logits for routing; shape of logits is [B, incaps, outcaps]
        u_shape = u.get_shape().as_list()
        batch_size = tf.shape(u)[0]
        prev_caps = u_shape[1]
        b = tf.zeros([batch_size, prev_caps, self.n_caps], tf.float32)
        for i in range(self.routing_iters):
            # We compute softmax over outcaps (i.e. c(i,j) = exp(b(i, j)) / sum_k{exp(b(i, k))})
            c = tf.nn.softmax(b, axis=-1)

            # Next, compute s(j) = sum_i{c(i,j)*uhat(j|i)}; shape of s is [b, outcaps, outdim]
            # NOTE: Adding self.bias is extension not documented in a paper (probably as implementation detail)
            s = tf.einsum('biom,bio->bom', uhat, c) + self.bias

            # Squash s to get v; same shape as s
            v = U.squash(s)

            # If not last iteration add contribution to logits; b(i,j) += v(j) . uhat(j|i)
            if i != self.routing_iters - 1:
                b += tf.einsum('biom,bom->bio', uhat, v)

        return v
    def no_routing(self, x):
        """
        Get output for each unit.
        A unit has batch, channels, height, width.
        An example of a unit output shape is [128, 32, 6, 6]

        :return: vector output of capsule j
        """
        # Create 8 convolutional unit.
        # A convolutional unit uses normal convolutional layer with a non-linearity (squash).
        unit = [self.conv_units[i](x) for i, l in enumerate(self.conv_units)]

        # Stack all unit outputs.
        # Stacked of 8 unit output shape: [128, 8, 32, 6, 6]
        unit = torch.stack(unit, dim=1)

        batch_size = x.size(0)

        # Flatten the 32 of 6x6 grid into 1152.
        # Shape: [128, 8, 1152]
        unit = unit.view(batch_size, self.num_unit, -1)

        # Add non-linearity
        # Return squashed outputs of shape: [128, 8, 1152]
        return utils.squash(
            unit, dim=2)  # dim 2 is the third dim (1152D array) in our tensor
Exemplo n.º 9
0
    def __call__(self, inputs, kernel=None, strides=None):
        """
        Here the logic of capsule layers is applied.

        :param inputs: 4-D input tensor following `NHWC` format [batch, height, width, out_channels]
        :param kernel: 4-D tensor having shape: [height, width, in_channels, out_channels]
        :param strides: An integer, would be replicated for both H and W
        """
        if kernel is not None:
            # PrimaryCaps layer (low-level capsules)
            conv_out = tf.nn.conv2d(input=inputs,
                                    filter=kernel,
                                    strides=strides,
                                    padding='VALID',
                                    name='conv_out')
            conv_out = tf.nn.relu(conv_out)
            capsules = tf.reshape(
                conv_out,
                shape=[tf.shape(inputs)[0], -1, self.primary_caps_vec_len],
                name='capsules')  # (B,1152,8)
            squashed_capsules = squash(capsules)
            return squashed_capsules
        elif kernel is None and strides is None:
            # DigitCaps layer
            # Here we need to apply the routing mechanism
            return self.routing(inputs)
        else:
            raise ValueError(
                'kernel_size and strides params should be either both None \
                       (for DigitCaps) or both not None (for PrimaryCaps)')
Exemplo n.º 10
0
    def forward(self, x):
        #input x,shape=[batch_size,in_features,in_dim]
        #[batch_size,1152,8]
        # (batch, input_features, in_dim) -> (batch, in_features, out_features,1,in_dim)
        x = torch.stack([x] * self.out_features, dim=2).unsqueeze(3)

        W = torch.cat([self.W] * conf.batch_size, dim=0)
        # u_hat shape->(batch_size,in_features,out_features,out_dim)=(batch,1152,10,1,16)
        u_hat = torch.matmul(x, W)
        #b for generate weight c,with shape->[1,1152,10,1]
        b = torch.zeros([1, self.in_features, self.out_features, 1]).double()
        if self.cuda:
            b = b.cuda()
        b = Variable(b)
        for i in range(3):
            c = F.softmax(b, dim=2)
            #c shape->[batch_size,1152,10,1,1]
            c = torch.cat([c] * conf.batch_size, dim=0).unsqueeze(dim=4)
            #s shape->[batch_size,1,10,1,16]
            s = (u_hat * c).sum(dim=1, keepdim=True)
            #output shape->[batch_size,1,10,1,16]
            v = utils.squash(s, dim=-1)
            v_1 = torch.cat([v] * self.in_features, dim=1)
            #(batch,1152,10,1,16)matmul(batch,1152,10,16,1)->(batch,1152,10,1,1)
            #squeeze
            #mean->(1,1152,10,1)
            #print u_hat.shape,v_1.shape
            update_b = torch.matmul(u_hat, v_1.transpose(
                3, 4)).squeeze(dim=4).mean(dim=0, keepdim=True)
            b = b + update_b
        return v.squeeze(1).transpose(2, 3)
Exemplo n.º 11
0
def get_primary_capsules(X):
    # print("Primary Capsules")
    # printShape(X)  # (?, 28, 28, 1)
    caps1_n_maps = 32
    caps1_n_dims = 8

    conv1 = tf.layers.conv2d(X, filters=256, kernel_size=9, strides=1,
                             padding="valid", activation=tf.nn.relu)
    # printShape(conv1)  # (?, 20, 20, 256)

    # stride of 2!
    conv2_n_filters = caps1_n_maps * caps1_n_dims
    conv2 = tf.layers.conv2d(conv1, filters=conv2_n_filters, kernel_size=9, strides=2,
                             padding="valid", activation=tf.nn.relu)
    # printShape(conv2)  # (?, 6, 6, 256)

    # what we have: 256 feature maps of 6 x 6 scalar values (total: 9216)
    # what we want: 32 maps of 6x6 vectors (8 dimensions a vector) (total: 9216)

    # BUT since we are going to be FULLY CONNECTING this to the next layer
    # we can just make it one long array [32 * 6 * 6, 8] = [1152, 8] = 1152 x 8 = 9216
    caps1_n_caps = caps1_n_maps * 6 * 6  # 1152 primary capsules
    caps1_raw = tf.reshape(conv2, [-1, caps1_n_caps, caps1_n_dims])
    # printShape(caps1_raw)  # (?, 1152, 8)

    # squash to keep the vectors under 1
    return squash(caps1_raw)
Exemplo n.º 12
0
    def _build_net(self):
        """
        build the graph of the network
        arg:
            self
        return:
            none        
        """
        # reshape for conv ops
        with tf.name_scope('x_reshape'):
            x_image = tf.reshape(self._x, [-1, 28, 28, 1])

        # initial conv1 op
        # 1). conv1 with kernel 9x9, stride 1, output channels 256
        with tf.variable_scope('conv1'):
            # specially initialize it with xavier initializer with no good reason.
            w = tf.get_variable('w', shape=[9, 9, 1, 256], dtype=tf.float32,
                                initializer=tf.contrib.layers.xavier_initializer()
                                )
            # conv op
            conv1 = tf.nn.conv2d(x_image, w, [1, 1, 1, 1],
                                 padding='VALID', name='conv1')
            if cfg.USE_BIAS:
                b = tf.get_variable('b', shape=[256, ], dtype=tf.float32,
                                    initializer=self._b_initializer)
                conv1 = tf.nn.relu(conv1 + b)
            else:
                conv1 = tf.nn.relu(conv1)

            # update dimensions of feature map
            self._dim = (self._dim - 9) // 1 + 1
            assert self._dim == 20, "after conv1, dimensions of feature map" \
                                    "should be 20x20"

            # conv1 with shape [None, 20, 20, 256]

        # build up primary capsules
        with tf.variable_scope('PrimaryCaps'):

            # update dim of capsule grid
            self._dim = (self._dim - 9) // 2 + 1
            # number of primary caps: 6x6x32 = 1152
            self._num_caps.append(self._dim ** 2 * cfg.PRIMARY_CAPS_CHANNELS)
            assert self._dim == 6, "dims for primary caps grid should be 6x6."

            # build up PrimaryCaps with 32 channels and 8-D vector
            primary_caps = slim.conv2d(conv1, 32 * 8, 9, 2, padding='VALID', activation_fn=None)
            primary_caps = tf.reshape(primary_caps, [-1, 1, self._num_caps[1], 1, 8])
            primary_caps = squash(primary_caps)

        # dynamic routing
        with tf.variable_scope("digit_caps"):
            self._digit_caps = self._dynamic_routing(primary_caps, 1)

            self._digit_caps_norm = tf.norm(self._digit_caps, ord=2, axis=2,
                                            name='digit_caps_norm')
Exemplo n.º 13
0
    def forward(self, x):
        """it's necessary to mention that the output of convolution can be relued before sending to next layer, in capsnet paper, nothing has mentioned about this, so we just remind you here that it's a possible option, in the tensorflow implementation of capsnet written by naturomics, relu is used"""
        batch_size = x.size(0)
        conv = self.distributed_conv_fcn(torch.stack([x] * self.primary_vec_length, dim=1))

        # squ -> [batch, primary_capsule_num, primary_vec_length, 1]
        squ = torch.transpose(squash(conv, dim=1).view(batch_size, self.primary_vec_length, -1), 1, 2)
        squ = squ.unsqueeze(3)

        return squ
Exemplo n.º 14
0
 def call(self, inputs):
     batch_size = tf.shape(inputs)[0]
     conv2d_outputs = super(PrimaryCapsConv2D, self).call(inputs)
     conv2d_outputs_int_shape = conv2d_outputs.get_shape().as_list()
     caps_outputs = conv2d_outputs_int_shape[1] * conv2d_outputs_int_shape[
         2] * self.n_caps
     s = tf.reshape(conv2d_outputs,
                    [batch_size, caps_outputs, self.caps_dim])
     v = U.squash(s)
     return v
Exemplo n.º 15
0
    def forward(self, x):
        #input x with shape ->[batch_size,in_features,height,width]
        #output with shape->[batch_size,32,6,6]

        x = [self.conv[i](x) for i in range(self.out_dim)]
        #output with shape->[batch_size,8,32,6,6]
        x = torch.stack(x, dim=1)
        #return shape->[batch_size,1152,8]
        x = x.view(x.size(0), self.out_dim, -1).transpose(1, 2)
        #return shape->[batch_size,1152,8]
        x = utils.squash(x, dim=2)
        return x
Exemplo n.º 16
0
        def body(i, prior, cap_out):
            c = tf.nn.softmax(prior, axis=1)
            c_expand = tf.expand_dims(c, axis=-1)
            s_t = tf.multiply(cap_predictions, c_expand)
            s = tf.reduce_sum(s_t, axis=[2])
            cap_out = squash(s)
            delta_prior = tf.reduce_sum(tf.multiply(tf.expand_dims(cap_out, axis=2),
                                                    cap_predictions),
                                        axis=[-1])
            prior = prior + delta_prior

            return [i - 1, prior, cap_out]
Exemplo n.º 17
0
    def __call__(self, input):
        '''

        :param input: 4D tensor
        :return:
        '''
        conv_output = self._get_conv_output(input)
        # flatten
        flatten_caps = tf.reshape(conv_output,
                                  [-1, self.n_capsules, self.capsule_length])

        # squash to keep the vectors under 1
        return squash(flatten_caps)
Exemplo n.º 18
0
    def routing(self, x):
        batch_size = x.size(0)
        x = x.transpose(1, 2)
        if (not self.high_cap_conv) & (not self.noTM):
            x = torch.stack([x] * self.num_unit, dim=2).unsqueeze(4)
            batch_weight = torch.cat([self.weight] * batch_size, dim=0)
            u_hat = torch.matmul(batch_weight, x)
        elif self.high_cap_conv & (not self.noTM):
            if self.single_conv:
                x = torch.stack([x] * self.num_unit, dim=1).unsqueeze(4)
                x = x.view(batch_size, -1, self.in_unit, 1)
                u_hat = self.weight_conv(x)
                u_hat = (u_hat.view(batch_size, self.num_unit, self.in_channel,
                                    u_hat.size(2),
                                    1)).transpose(1, 2).contiguous()
            else:
                x = torch.stack([x] * self.num_unit, dim=1).unsqueeze(4)
                x = x.view(batch_size, -1, self.in_unit, 1)
                u_hat_1 = self.weight_conv_1(x)
                u_hat_2 = self.weight_conv_2(x)
                u_hat_3 = self.weight_conv_3(x)
                u_hat_4 = self.weight_conv_4(x)
                u_hat = torch.cat((u_hat_1, u_hat_2, u_hat_3, u_hat_4), 2)
                u_hat = (u_hat.view(batch_size, self.num_unit, self.in_channel,
                                    u_hat.size(2),
                                    1)).transpose(1, 2).contiguous()
        elif self.noTM:
            u_hat = x

        if not self.fc:
            b_ij = Variable(torch.zeros(1, self.in_channel, self.num_unit, 1))
            b_ij = b_ij.cuda()
            if self.penalty_attention:
                penalty = torch.cat([self.penalty] * batch_size, dim=0)
            for iteration in range(self.num_routing):
                c_ij = F.softmax(b_ij, dim=2)
                c_ij = torch.cat([c_ij] * batch_size, dim=0).unsqueeze(4)
                s_j = (c_ij * u_hat).sum(dim=1, keepdim=True)
                v_j = utils.squash(s_j, dim=3)

                if self.penalty_attention:
                    v_j = v_j * penalty
                v_j1 = torch.cat([v_j] * self.in_channel, dim=1)
                u_vj1 = torch.matmul(u_hat.transpose(3, 4),
                                     v_j1).squeeze(4).mean(dim=0, keepdim=True)
                b_ij = b_ij + u_vj1
            return v_j.squeeze(1)
        else:
            v = u_hat.view(batch_size, -1)
            v = self.fully_connected(v)
            return v.unsqueeze(2).unsqueeze(3)
Exemplo n.º 19
0
 def forward(self, x):
     # Shape of x = [128 x 256 x 20 x 20]
     # Apply Convolutions to Input x to generate capsules.
     outputs = [capsule(x) for capsule in self.capsules]
     # Shape of each output in list = [128 x 8 x 6 x 6 x 1]
     outputs = torch.cat(outputs, dim=-1)
     # Shape of outputs = [128 x 8 x 6 x 6 x 32]
     outputs = outputs.view(outputs.shape[0], outputs.shape[1], -1)
     # Shape of outputs = [128 x 8 x 1152]
     outputs = outputs.transpose(1, len(outputs.shape) - 1)
     # Shape of outputs = [128 x 1152 x 8]
     outputs = squash(outputs)
     # Shape of outputs = [128 x 1152 x 8]
     return outputs
Exemplo n.º 20
0
    def routing(self, x, b_IJ, W, batch_size, routing_iter):
        x1 = x.view(batch_size, 256, 1, 6, 6)
        x_tile = x1.repeat(1, 1, 10, 1, 1)
        x_view = x_tile.view(batch_size, 1152, 10, 8, 1)
        stride_i = W.repeat(batch_size, 1, 1, 1, 1)
        stride_j = stride_i.view(batch_size, 1152, 10, 16, 8)
        dot_op = torch.matmul(stride_j, x_view)
        dot_op_stopped = Variable(dot_op.data.clone(), requires_grad=False)

        for r_iter in range(routing_iter):
            id_capsule = F.softmax(b_IJ, dim=2)
            if r_iter == routing_iter - 1:
                route_I = torch.mul(id_capsule, dot_op)
                route_I_sum = torch.sum(route_I, dim=1,
                                        keepdim=True) + self.bias
                V_J = squash(route_I_sum, self.epsilon)
            if r_iter < routing_iter - 1:

                dot_op_stopped_tmp = dot_op_stopped.data.numpy()
                dot_op_stopped_tmp = np.reshape(dot_op_stopped_tmp,
                                                (batch_size, 1152, 10, 16, 1))
                id_capsule_tmp = id_capsule.data.numpy()
                route_I_tmp = id_capsule_tmp * dot_op_stopped_tmp
                route_I_tmp_sum = np.sum(
                    route_I_tmp, axis=1,
                    keepdims=True) + self.bias.data.numpy()
                V_J_tmp = squash(torch.Tensor(route_I_tmp_sum), self.epsilon)

                V_J_tmp_tiled = np.tile(V_J_tmp.numpy(), (1, 1152, 1, 1, 1))
                dot_op_stopped_tmp = np.reshape(dot_op_stopped_tmp,
                                                (batch_size, 1152, 10, 1, 16))

                u_produce_v = np.matmul(dot_op_stopped_tmp, V_J_tmp_tiled)

                b_IJ.data += torch.Tensor(u_produce_v)

        return V_J
Exemplo n.º 21
0
    def forward(self, x):
        """ Forward pass

        Args:
            x (FloatTensor): Input image of shape [batch_size, in_channels, height_input, width_input]

        Returns:
            caps_raw (FloatTensor): Primary capsules in grid of shape
                [batch_size, out_channels, width grid, height grid, vec_len].
        """
        features = self.conv(x)
        _, _, h, w = features.shape
        caps_raw = features.contiguous().view(-1, self.out_channels, self.vector_length, h, w)  # [b, c, vec, h, w]
        caps_raw = caps_raw.permute(0, 1, 3, 4, 2)  # [b, c, h, w, vec]

        # squash on the vector dimension
        return squash(caps_raw)
Exemplo n.º 22
0
    def routing(self, x):
        """
        Routing algorithm for capsule.

        :return: vector output of capsule j
        """
        batch_size = x.size(0)

        x = x.transpose(1, 2)
        x = torch.stack([x] * self.num_unit, dim=2).unsqueeze(4)
        weight = torch.cat([self.weight] * batch_size, dim=0)

        # Transform inputs by weight matrix.
        u_hat = torch.matmul(weight, x)

        # All the routing logits (b_ij in the paper) are initialized to zero.
        b_ij = Variable(torch.zeros(1, self.in_channel, self.num_unit, 1))
        if self.cuda_enabled:
            b_ij = b_ij.cuda()

        # From the paper in the "Capsules on MNIST" section,
        # the sample MNIST test reconstructions of a CapsNet with 3 routing iterations.
        num_iterations = self.num_routing

        for iteration in range(num_iterations):
            # Routing algorithm

            # Calculate routing or also known as coupling coefficients (c_ij).
            c_ij = F.softmax(b_ij)  # Convert routing logits (b_ij) to softmax.
            c_ij = torch.cat([c_ij] * batch_size, dim=0).unsqueeze(4)

            # Implement equation 2 in the paper.
            # u_hat is weighted inputs
            s_j = (c_ij * u_hat).sum(dim=1, keepdim=True)

            v_j = utils.squash(s_j)

            v_j1 = torch.cat([v_j] * self.in_channel, dim=1)

            u_vj1 = torch.matmul(u_hat.transpose(3, 4), v_j1).squeeze(4).mean(dim=0, keepdim=True)

            # Update routing (b_ij)
            b_ij = b_ij + u_vj1

        return v_j.squeeze(1)
Exemplo n.º 23
0
    def no_routing(self, x):
        """
        Get output for each unit.
        A unit has batch, channels, height, width.

        :return: vector output of capsule j
        """
        # unit = [self.conv_units[i](x) for i in range(self.num_unit)]
        unit = [self.conv_units[i](x) for i, l in enumerate(self.conv_units)]

        # Stack all unit outputs.
        unit = torch.stack(unit, dim=1)

        # Flatten
        unit = unit.view(x.size(0), self.num_unit, -1)

        # Return squashed outputs.
        return utils.squash(unit)
Exemplo n.º 24
0
def _routing_round(previous_weights, digit_caps_prediction):
    # print(": routing weights = softmax on previous weights")
    routing_weights = tf.nn.softmax(previous_weights, dim=2)
    # (?, 1152, 10, 1, 1)

    # print(": weighted predictions = routing weights x digit caps prediction")
    weighted_predictions = tf.multiply(routing_weights, digit_caps_prediction)
    # (?, 1152, 10, 16, 1)

    # Q: When getting weighted predictions why is there no bias ?

    # print(": reduce sum of all of them (collapse `rows`)")
    weighted_sum = tf.reduce_sum(weighted_predictions, axis=1, keep_dims=True)
    # (?, 1 , 10, 16, 1)

    # print(": squash to keep below 1")
    round_output = squash(weighted_sum, axis=-2)
    # (?, 1 , 10, 16, 1)
    return round_output
Exemplo n.º 25
0
 def forward(self, x):
   # Shape of x = [2 x 23936 x 8]
   # Shape of x = [1 x 2 x 23936 x 1 x 8], Shape of Weights = [40 x 1 x 23936 x 8 x 32]
   x_hat = torch.matmul(x[None, :, :, None, :], self.weights[:, None, :, :, :])
   # Shape of x_hat = [40 x 2 x 23936 x 1 x 32]
   # b is a temporary variable that will store the value of routing weights c and will be gradually updated.
   b = torch.zeros(*x_hat.shape).to(device)
   for i in range(self.routing_iterations):
     # Routing weights for all capsules of layer l (i.e dim_2 = 23936)
     c = softmax(b, dim=2)
     # Weighted sum of x_hat and routing weights c across all capsules of layer l (i.e. Sum over dim_2 = 23936)
     outputs = squash((x_hat*c).sum(dim=2, keepdim=True))
     # Shape of outputs = [40 x 2 x 1 x 1 x 32]
     if(i != self.routing_iterations-1):
       # Weight Update Step: Update weight b using dot product similarity.
       db = (x_hat * outputs).sum(dim=-1, keepdim=True)
       # Shape of db = [40 x 2 x 23936 x 1 x 1]
       b = b + db
   return outputs
Exemplo n.º 26
0
    def forward(self, input):
        # TODO: make it work for batch sizes > 1
        _, in_channels, h, w = input.size()
        assert in_channels == self.num_shared * self.in_dim

        input = input.squeeze().view(self.num_shared, -1, self.in_dim)
        groups = input.chunk(self.num_shared)
        u = [group.squeeze().chunk(h * w) for group in groups]
        pred = [
            self.W[i](in_vec.squeeze()) for i, group in enumerate(u)
            for in_vec in group
        ]
        pred = torch.stack([torch.stack(p)
                            for p in pred]).view(self.num_shared * h * w, -1)

        c = F.softmax(self.b)
        s = torch.matmul(c, pred)
        v = squash(s.t())
        self.b = torch.add(self.b, torch.matmul(pred, v))
        return v
Exemplo n.º 27
0
    def forward(self, pose):
        # x: [b, AC, h, w]
        b, _, h, w = pose.shape
        # [b, ACkk, l]
        pose = F.unfold(pose, self.k, stride=self.stride, padding=self.pad)
        l = pose.shape[-1]
        # [b, A, C, kk, l]
        pose = pose.view(b, self.A, self.C, self.kk, l)
        # [b, l, kk, A, C]
        pose = pose.permute(0, 4, 3, 1, 2).contiguous()
        # [b, l, kkA, C, 1]
        pose = pose.view(b, l, self.kkA, self.C, 1)

        # [b, l, kkA, BD]
        pose_out = torch.matmul(self.W, pose).squeeze(-1)
        # [b, l, kkA, B, D]
        pose_out = pose_out.view(b, l, self.kkA, self.B, self.D)

        # [b, l, kkA, B, 1]
        b = pose.new_zeros(b, l, self.kkA, self.B, 1)
        for i in range(self.iters):
            c = torch.softmax(b, dim=3)

            # [b, l, 1, B, D]
            s = (c * pose_out).sum(dim=2, keepdim=True)
            # [b, l, 1, B, D]
            v = squash(s)

            b = b + (v * pose_out).sum(dim=-1, keepdim=True)

        # [b, l, B, D]
        v = v.squeeze(2)
        # [b, l, BD]
        v = v.view(v.shape[0], l, -1)
        # [b, BD, l]
        v = v.transpose(1, 2).contiguous()

        oh = ow = math.floor(l**(1 / 2))

        # [b, BD, oh, ow]
        return v.view(v.shape[0], -1, oh, ow)
Exemplo n.º 28
0
    def no_routing(self, x):
        batch_size = x.size(0)
        unit = self.conv_units(x)
        spatial_size = unit.size(2)
        if self.regrouping_type == 'local':
            unit = self.regrouping_local(unit)
        elif self.regrouping_type == 'adjacent':
            unit = self.regrouping_adjacent(unit)
        elif self.regrouping_type == 'shuffle':
            unit = self.regrouping_shuffle(unit, batch_size, spatial_size)

        if self.group_attention:
            attention_weight = self.attention(unit)
            attention_weight = torch.stack([attention_weight] * self.num_unit,
                                           dim=1)
        unit = unit.view(batch_size, self.group_num, self.num_unit,
                         spatial_size, spatial_size).transpose(1,
                                                               2).contiguous()
        unit = attention_weight * unit
        unit = unit.view(batch_size, self.num_unit, -1)
        return utils.squash(unit, dim=2)
Exemplo n.º 29
0
    def _dynamic_routingV1(self, prior, cap_predictions):
        """
        doing dynamic routing with for loop as static implementation
        :arg
            proir: log prior for scaling with shape [10, num_caps]
            cap_prediction: predictions from layer below with shape [None, 10, num_caps, 16]
        :return
            digit_caps: digit capsules with shape [None, 10, 16]
        """
        prior = tf.expand_dims(prior, 0)
        # prior shape: [1, 10, num_caps]
        for idx in xrange(cfg.ROUTING_ITERS):
            with tf.name_scope('routing_%s' % idx):
                c = tf.nn.softmax(prior, dim=1)
                # c shape: [1, 10, num_caps]
                c_t = tf.expand_dims(c, axis=-1)
                # c_t shape: [1, 10, num_caps, 1]

                s_t = tf.multiply(cap_predictions, c_t)
                # s_t shape: [None, 10, num_caps, 16]
                # for each capsule in the layer after, add all the weighted capsules to get
                # the capsule input for it.
                # s_j = Sum_i (c_ij u_hat_j|i)
                s = tf.reduce_sum(s_t, axis=[2])

                # s shape: [None, 10, 16]
                digit_caps = squash(s)
                # digit_caps shape: [None, 10, 16]

                # u_hat_j|i * v_j
                delta_prior = tf.reduce_sum(tf.multiply(tf.expand_dims(digit_caps, axis=2),
                                                        cap_predictions),
                                            axis=[-1])
                # delta_prior shape: [None, 10, num_caps]

                prior = prior + delta_prior

        # shape [None, 10, 16]
        return digit_caps
    def forward(self, obs, compute_pi=True, compute_log_pi=True):
        mu, log_std = self.trunk(obs).chunk(2, dim=-1)

        # constrain log_std inside [log_std_min, log_std_max]
        log_std = torch.tanh(log_std)
        log_std = self.log_std_min + 0.5 * (self.log_std_max -
                                            self.log_std_min) * (log_std + 1)

        if compute_pi:
            std = log_std.exp()
            noise = torch.randn_like(mu)
            pi = mu + noise * std
        else:
            pi = None

        if compute_log_pi:
            log_pi = gaussian_logprob(noise, log_std)
        else:
            log_pi = None

        mu, pi, log_pi = squash(mu, pi, log_pi)

        return mu, pi, log_pi, log_std
Exemplo n.º 31
0
    def call(self, x):

        # shape [None, 1152, 8] -> [None, 1152, 1, 8, 1]
        x = tf.expand_dims(x, axis=2)
        x = tf.expand_dims(x, axis=-1)
        # compute candidate capsule for all pair of primary and digit capsule
        #x : [None, 1152, 1, 8, 1], weight : [1, 1152, 10, 16, 8] -> u : [None, 1152, 10, 16, 1]
        u = tf.squeeze(tf.matmul(self.weight, x), axis=-1)

        # stop the gradients on u to obtaine routing coeffiant
        #b : [None, 1152, 10, 1]
        b = self.routing(tf.stop_gradient(u))

        # normalize b so it sums to 1 for each capsule of primary layer
        #c : [None, 1152, 10, 1]
        c = tf.nn.softmax(b, axis=2)

        # compute mean capsule
        #s : [None, 10, 16]
        s = tf.reduce_sum(tf.multiply(u, c), axis=1)

        # normalize capsule so its length is < 1
        return squash(s, axis=-1)