def call(self, u_vecs): if self.share_weights: u_hat_vecs = K.conv1d(u_vecs, self.W) else: u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) batch_size = K.shape(u_vecs)[0] input_num_capsule = K.shape(u_vecs)[1] u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule] b = K.zeros_like( u_hat_vecs[:, :, :, 0]) # shape = [None, num_capsule, input_num_capsule] for i in range(self.routings): b = K.permute_dimensions( b, (0, 2, 1)) # shape = [None, input_num_capsule, num_capsule] c = K.softmax(b) c = K.permute_dimensions(c, (0, 2, 1)) b = K.permute_dimensions(b, (0, 2, 1)) outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2])) if i < self.routings - 1: b = K.batch_dot(outputs, u_hat_vecs, [2, 3]) return outputs
def routing(self, u_vecs): if self.share_weights: u_hat_vecs = K.conv1d(u_vecs, self.W) else: u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) batch_size = K.shape(u_vecs)[0] input_num_capsule = K.shape(u_vecs)[1] u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule] b = K.zeros_like( u_hat_vecs[:, :, :, 0]) # shape = [None, num_capsule, input_num_capsule] for i in range(self.routings): c = K.softmax(b) s = K.batch_dot(c, u_hat_vecs, [2, 2]) v = self.squash(s) if i < self.routings - 1: b = b + K.batch_dot(v, u_hat_vecs, [2, 3]) poses = v activations = K.sqrt(K.sum(K.square(poses), 2)) return poses, activations
def call(self, inputs, training=None): # inputs.shape=[None, input_num_capsule, input_dim_capsule] # inputs_expand.shape=[None, 1, input_num_capsule, input_dim_capsule] inputs_expand = K.expand_dims(inputs, 1) # Replicate num_capsule dimension to prepare being multiplied by W # inputs_tiled.shape=[None, num_capsule, input_num_capsule, input_dim_capsule] inputs_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1]) # Compute `inputs * W` by scanning inputs_tiled on dimension 0. # x.shape=[num_capsule, input_num_capsule, input_dim_capsule] # W.shape=[num_capsule, input_num_capsule, dim_capsule, input_dim_capsule] # Regard the first two dimensions as `batch` dimension, # then matmul: [input_dim_capsule] x [dim_capsule, input_dim_capsule]^T -> [dim_capsule]. # inputs_hat.shape = [None, num_capsule, input_num_capsule, dim_capsule] inputs_hat = K.map_fn(lambda x: K.batch_dot(x, self.W, [2, 3]), elems=inputs_tiled) # Begin: Routing algorithm ---------------------------------------------------------------------# # The prior for coupling coefficient, initialized as zeros. # b.shape = [None, self.num_capsule, self.input_num_capsule]. b = tf.zeros(shape=[K.shape(inputs_hat)[0], self.num_capsule, self.input_num_capsule]) assert self.routings > 0, 'The routings should be > 0.' for i in range(self.routings): # c.shape=[batch_size, num_capsule, input_num_capsule] c = tf.nn.softmax(b, dim=1) # c.shape = [batch_size, num_capsule, input_num_capsule] # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] # The first two dimensions as `batch` dimension, # then matmal: [input_num_capsule] x [input_num_capsule, dim_capsule] -> [dim_capsule]. # outputs.shape=[None, num_capsule, dim_capsule] outputs = squash(K.batch_dot(c, inputs_hat, [2, 2])) # [None, 10, 16] if i < self.routings - 1: # outputs.shape = [None, num_capsule, dim_capsule] # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] # The first two dimensions as `batch` dimension, # then matmal: [dim_capsule] x [input_num_capsule, dim_capsule]^T -> [input_num_capsule]. # b.shape=[batch_size, num_capsule, input_num_capsule] b += K.batch_dot(outputs, inputs_hat, [2, 3]) # End: Routing algorithm -----------------------------------------------------------------------# return outputs
def reed_soft_loss(y_true, y_pred): '''Expects a binary class matrix instead of a vector of scalar classes. ''' return -K.batch_dot(y_pred, K.log(y_pred + 1e-8), axes=(1, 1))