def discriminator(x, scope, actv=actv, use_sigmoid=use_sigmoid, ksize=ksize, reuse=reuse): with tf.variable_scope('discriminator_{}'.format(scope), reuse=reuse): output = tf.reshape(inputs, [-1, 1, 512, 1024]) output = tf.transpose(output, [0, 2, 3, 1]) output_conv1 = tf.contrib.layers.conv2d(output, num_outputs=256, kernel_size=9, stride=2, activation_fn=tf.nn.relu, padding='VALID') primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') output_caps1 = primaryCaps(output_conv1, kernel_size=9, stride=2, batchsize=batchsize) digitCaps = CapsLayer(num_outputs=1, vec_len=16, with_routing=True, layer_type='FC') output_caps2 = digitCaps(output_caps1, batchsize=batchsize) # The output at this stage is of dimensions [batch_size, 16] output_caps2 = tf.squeeze(output_caps2, axis=1) output_caps2 = tf.squeeze(output_caps2, axis=2) #print(output_caps2.get_shape()) assert output_caps2.get_shape() == [batchsize, 16] # [batchsize,16] turns into # TODO: Try also removing the LeakyReLU from the CapsLayer file # TODO: Try also with 10 digitcaps outputs + thresholding (instead of just 1 output) # TODO: Adding batch normalization in capsules (See CapsLayer.py). # TODO: Try Changing the critic iteration count. output_v_length = tf.sqrt(tf.reduce_sum(tf.square(output_caps2),axis=1, keep_dims=True) + 1e-9) ## No need to take softmax anymore, because output_caps2 output is in [0,1] due to squash function. #softmax_v = tf.nn.softmax(v_length, dim=1) return tf.reshape(output_v_length, [-1])
def discriminator(self, input, feat, reuse): if reuse: tf.get_variable_scope().reuse_variables() isTrain = self.isTrain with tf.variable_scope('Conv1_layer'): # Conv1, [batch_size, 20, 20, 256] conv1 = tf.contrib.layers.conv2d(input, num_outputs=256, kernel_size=9, stride=1, padding='VALID') #assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256] # 1st hidden layer with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(conv1, kernel_size=9, stride=2) # assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1] # 2nd hidden layer with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=10, vec_len=16, with_routing=True, layer_type='FC') caps2 = digitCaps(caps1) # output shape = [cfg.batch_size, 10, 16, 1] caps_length = tf.sqrt(tf.reduce_sum(tf.square(caps2), axis=2, keep_dims=True) + 1e-9) flat_caps_length = tf.reshape(caps_length, [-1, 10]) res_caps = tf.reshape(caps2, [-1, 10, 16]) res_feat = tf.reshape(feat, [-1, 1, 10]) res_flat = tf.matmul(res_feat, res_caps) flat = tf.reshape(res_flat, [-1, 16]) feat_logits = tf.layers.dense(flat, 1, name='feat_logits') return feat_logits, flat_caps_length
def capsnet(x, is_training, batch_size, output_size): with tf.variable_scope('Conv1_layer'): # Conv1, [batch_size, 20, 20, 256] conv1 = tf.contrib.layers.conv2d(x, num_outputs=256, kernel_size=9, stride=1, padding='VALID') assert conv1.get_shape() == [batch_size, 20, 20, 256] # Primary Capsules layer, return [batch_size, 1152, 8, 1] with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV', batch_size=batch_size) caps1 = primaryCaps(conv1, kernel_size=9, stride=2) assert caps1.get_shape() == [batch_size, 1152, 8, 1] # DigitCaps layer, return [batch_size, 100, 16, 1] with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=output_size, vec_len=16, with_routing=True, layer_type='FC', batch_size=batch_size) caps2 = digitCaps(caps1) out = tf.sqrt( tf.reduce_sum(tf.square(caps2), axis=2, keep_dims=True) + epsilon) # Vector length return out
def build_arch(self): with tf.variable_scope('Conv1_layer'): # Conv1, return tensor with shape [batch_size, 20, 20, 256] conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=9, stride=1, padding='VALID') # Primary Capsules layer, return tensor with shape [batch_size, 1152, 8, 1] with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(conv1, kernel_size=9, stride=2) # DigitCaps layer, return shape [batch_size, 10, 16, 1] with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=self.num_label, vec_len=16, with_routing=True, layer_type='FC') self.caps2 = digitCaps(caps1) # Decoder structure in Fig. 2 # 1. Do masking, how: with tf.variable_scope('Masking'): # a). calc ||v_c||, then do softmax(||v_c||) # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1] self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) self.softmax_v = softmax(self.v_length, axis=1) # assert self.softmax_v.get_shape() == [cfg.batch_size, self.num_label, 1, 1] # b). pick out the index of max softmax val of the 10 caps # [batch_size, 10, 1, 1] => [batch_size] (index) self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1)) # assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1] self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, )) # Method 1. if not cfg.mask_with_y: # c). indexing # It's not easy to understand the indexing process with argmax_idx # as we are 3-dim animal masked_v = [] for batch_size in range(cfg.batch_size): v = self.caps2[batch_size][self.argmax_idx[batch_size], :] masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1))) self.masked_v = tf.concat(masked_v, axis=0) assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1] # Method 2. masking with true label, default mode else: self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, self.num_label, 1))) self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) # 2. Reconstructe the MNIST images with 3 FC layers # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512] with tf.variable_scope('Decoder'): vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1)) fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512) fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024) self.decoded = tf.contrib.layers.fully_connected(fc2, num_outputs=self.height * self.width * self.channels, activation_fn=tf.sigmoid)
def build_arch(self): conv1 = tf.layers.conv1d( tf.reshape(self.X, [-1, 179, 1]), # 将原来任意行位置(-1)179列*1维(1个通道)的向量进行变形 filters=32, #变为32个通道 kernel_size=3, #卷积核大小一般设置为3最稳定 padding='same', #补0 kernel_regularizer=tf.contrib.layers.l2_regularizer( 0.001), #进行正则化,减低过拟合 bias_regularizer=tf.contrib.layers.l2_regularizer( 0.001), #进行正则化,减低过拟合 activation=tf.nn.relu #激活函数 ) conv1_bn = tf.layers.batch_normalization(conv1) #批量标准化 conv1_reshape = tf.reshape(conv1_bn, [-1, 32 * 179]) # 卷积变形为任意一行都是,32*179列的向量 fc1 = tf.layers.dense( inputs=conv1_reshape, #将卷积变形的向量作为全连接的输入 units=128, activation=tf.nn.relu) fc2 = tf.layers.dense( inputs=fc1, #将FC1变形的向量作为全连接的输入 units=64, #全连接有64个神经元 activation=tf.nn.relu) # DigitCaps layer, return [batch_size, 10, 16, 1] with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=cfg.primaryCaps_out_num, vec_len=cfg.primaryCaps_vec_num, with_routing=False, layer_type='NN') caps1 = primaryCaps(fc2) # DigitCaps layer, return [batch_size, 10, 16, 1] with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=cfg.out_size, vec_len=cfg.outCaps_vec_num, with_routing=True, layer_type='FC') self.caps2 = digitCaps(caps1) # Decoder structure in Fig. 2 # 1. Do masking, how: with tf.variable_scope('Masking'): # a). calc ||v_c||, then do softmax(||v_c||) # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1] self.v_length = tf.sqrt( tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) + epsilon) self.v_length = tf.reshape(self.v_length, [-1, 2]) # b). pick out the index of max softmax val of the 10 caps # [batch_size, 10, 1, 1] => [batch_size] (index) self.argmax_idx = tf.to_int32(tf.argmax(self.v_length, axis=1))
def _build_simple_caps_net(self): # 做一层局部特征提取:使Capsule的输入和输出都是vector with tf.variable_scope('Conv1_layer'): # [?, 20, 20, 256] conv1 = tcl.conv2d(self.x, num_outputs=256, kernel_size=9, stride=1, padding='VALID') pass # 初始Capsule层:多个常规卷积层的堆叠,把8个conv2d拼接在一起,形成一个neural unit(capsule) # neural unit的输出为 8*1的vector # Primary Capsules layer, return [?, 1152, 8, 1] with tf.variable_scope('PrimaryCaps_layer'): primary_caps = CapsLayer(self.batch_size, num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps_primary = primary_caps(conv1, kernel_size=9, stride=2) # [batch_size, 1152, 8, 1] pass # DigitCaps layer, return [?, 10, 16, 1] with tf.variable_scope('DigitCaps_layer'): digit_caps = CapsLayer(self.batch_size, num_outputs=10, vec_len=16, with_routing=True, layer_type='FC') caps_digit = digit_caps(caps_primary) # [?, 10, 16, 1] with tf.variable_scope("Caps_prediction"): # calc ||v_c||, then do softmax(||v_c||) # [?, 10, 16, 1] => [?, 10, 1, 1] epsilon = 1e-9 v_length = tf.sqrt( tf.reduce_sum(tf.square(caps_digit), axis=2, keep_dims=True) + epsilon) # [?, 10, 1, 1] => [?, 1, 1] (index) => [?] prediction = tf.to_int32( tf.argmax(tf.nn.softmax(v_length, dim=1), axis=1)) prediction = tf.reshape(prediction, shape=(self.batch_size, )) correct_prediction = tf.equal(tf.to_int32(self.labels), prediction) batch_accuracy = tf.reduce_sum(tf.cast(correct_prediction, tf.float32)) return caps_digit, v_length, prediction, batch_accuracy
def discriminator(self, input, feat, reuse): if reuse: tf.get_variable_scope().reuse_variables() isTrain = self.isTrain with tf.variable_scope('Conv1_layer'): # Conv1, [batch_size, 20, 20, 256] conv1 = tf.contrib.layers.conv2d(input, num_outputs=256, kernel_size=9, stride=1, padding='VALID') #assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256] # 1st hidden layer with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(conv1, kernel_size=9, stride=2) # assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1] # 2nd hidden layer with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=self.num_class, vec_len=16, with_routing=True, layer_type='FC') caps2 = digitCaps(caps1) # output shape = [cfg.batch_size, self.num_class, 16, 1] res_caps = tf.reshape(caps2, [-1, self.num_class, 16]) res_feat = tf.reshape(feat, [-1, 1, self.num_class]) res_flat = tf.matmul(res_feat, res_caps) # flat = tf.reshape(caps2, [-1, 16*self.num_class]) flat = tf.reshape(res_flat, [-1, 16]) ''' conv1 = tf.layers.conv2d(input, 32, 5, strides=(2, 2), padding='same', name='conv1') lrelu1 = lrelu(conv1) conv2 = tf.layers.conv2d(conv1, 16, 5, strides=(2, 2), padding='same', name='conv2') flat = tf.reshape(conv2, [-1, self.args.img_width*self.args.img_height]) ''' # concat_feat = tf.concat([flat, feat], axis=1) # dense2 = tf.layers.dense(flat, self.args.img_width*self.args.img_height # , activation=lrelu, name='dense2') feat_logits = tf.layers.dense(flat, 1, name='feat_logits') return feat_logits
def build_arch(self): with tf.variable_scope('Embedding'): embed = tf.contrib.layers.embed_sequence(self.X, vocab_size=251969, embed_dim=cfg.embed_dim) with tf.variable_scope('Conv1_layer'): conv1 = tf.layers.conv1d(embed, filters=cfg.conv1_filters, kernel_size=cfg.conv1_kernel, strides=cfg.conv1_stride, padding=cfg.conv1_padding) with tf.variable_scope('First_caps_layer'): firstCaps = CapsLayer(num_outputs=cfg.caps1_output, vec_len=cfg.caps1_len, layer_type=cfg.caps1_type, with_routing=cfg.caps1_routing) caps1 = firstCaps(conv1, kernel_size=cfg.caps1_kernel, stride=cfg.caps1_stride) with tf.variable_scope('Second_caps_layer'): secondCaps = CapsLayer(num_outputs=cfg.caps2_output, vec_len=cfg.caps2_len, layer_type='FC', with_routing=cfg.caps2_routing) self.caps2 = secondCaps(caps1, kernel_size=3, stride=1) with tf.variable_scope('LSTM_layer'): caps2_reshape = tf.reshape(self.caps2, [cfg.batch_size, 4, -1]) caps2_unstack = tf.unstack(caps2_reshape, 4, 1) W = tf.Variable(tf.random_normal([32, 4])) b = tf.Variable(tf.random_normal([4])) lstm_layer = rnn.BasicLSTMCell(32, forget_bias=1) outputs, _ = rnn.static_rnn(lstm_layer, caps2_unstack, dtype='float32') self.prediction = tf.matmul(outputs[-1], W) + b #======================================== with tf.variable_scope('Out'): self.v_j = tf.sqrt( tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) + epsilon)
def buildArch(self): with tf.variable_scope('Conv1_layer'): # as scope: print self.img conv1 = tf.contrib.layers.conv2d(self.img, num_outputs=256, kernel_size=9, stride=1, padding='VALID') assert conv1.get_shape() == [1, 20, 20, 256] print conv1 with tf.variable_scope('PrimaryCaps_layer'): # as scope: primary_caps = CapsLayer() caps1 = primary_caps(conv1, 'primary') assert caps1.get_shape() == [1, 1152, 8, 1] with tf.variable_scope('DigitCaps_layer'): # as scope: digit_caps = CapsLayer() self.caps2 = digit_caps(caps1, 'digit')
def build_arch(self): with tf.variable_scope('Conv1_layer'): # Conv1_layer: # Input [batch_size, 20, 20, 256] conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=9, stride=1, padding='VALID') assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256] # Primary Cap Layer # Output: [batch_size, 6, 6, 32, 8-Dim tensor] # i.e: [cfg.batch_size, 1152, 8, 1] with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(conv1,kernel_size=9,stride=2) assert caps1.get_shape() == [cfg.batch_size,1152,8,1] with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=10, vec_len=16,with_routing=True,layer_type='FC') self.caps2 = digitCaps(caps1) # Don't understand
def build_arch(self): with tf.variable_scope('Embedding'): embed = tf.contrib.layers.embed_sequence(self.X, vocab_size=80, embed_dim=cfg.embed_dim) with tf.variable_scope('Conv1_layer'): conv1 = tf.layers.conv1d(embed, filters=cfg.conv1_filters, kernel_size=cfg.conv1_kernel, strides=cfg.conv1_stride, padding=cfg.conv1_padding) #conv1 = tf.nn.dropout(conv1, 0.5) with tf.variable_scope('First_caps_layer'): firstCaps = CapsLayer(num_outputs=cfg.caps1_output, vec_len=cfg.caps1_len, layer_type=cfg.caps1_type, with_routing=cfg.caps1_routing) caps1 = firstCaps(conv1, kernel_size=cfg.caps1_kernel, stride=cfg.caps1_stride) with tf.variable_scope('Second_caps_layer'): secondCaps = CapsLayer(num_outputs=cfg.caps2_output, vec_len=cfg.caps2_len, layer_type='FC', with_routing=cfg.caps2_routing) self.caps2 = secondCaps(caps1, kernel_size=3, stride=1) with tf.variable_scope('LSTM_layer'): caps2_reshape = tf.reshape(self.caps2, [cfg.batch_size, 4, 8]) caps2_unstack = tf.unstack(caps2_reshape, 4, 1) W = tf.Variable(tf.random_normal([100, 1])) b = tf.Variable(tf.random_normal([1])) lstm_layer = rnn.BasicLSTMCell(100, forget_bias=1) outputs, _ = rnn.static_rnn(lstm_layer, caps2_unstack, dtype='float32') #self.prediction = tf.matmul(outputs[-1], W) + b self.prediction = tf.layers.dense(inputs=outputs[-1], units=2, activation=tf.nn.softmax)
def discriminator(input, isTrain=True, reuse=False): epsilon = 1e-9 with tf.variable_scope('discriminator') as scope: if reuse: labels = tf.constant(0, shape=[ cfg.batch_size, ]) else: labels = tf.constant(1, shape=[ cfg.batch_size, ]) Y = tf.one_hot(labels, depth=2, axis=1, dtype=tf.float32) if reuse: scope.reuse_variables() with tf.variable_scope('Conv1_layer'): conv1 = tf.contrib.layers.conv2d(input, num_outputs=256, kernel_size=9, stride=1, padding='VALID') with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(conv1, kernel_size=9, stride=2) with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=2, vec_len=16, with_routing=True, layer_type='FC') caps2 = digitCaps(caps1) # batch size x 2 x 16 x 1 v_length = tf.sqrt( reduce_sum(tf.square(caps2), axis=2, keepdims=True) + epsilon) max_l = tf.square(tf.maximum(0., cfg.m_plus - v_length)) max_r = tf.square(tf.maximum(0., v_length - cfg.m_minus)) max_l = tf.reshape(max_l, shape=(cfg.batch_size, -1)) max_r = tf.reshape(max_r, shape=(cfg.batch_size, -1)) T_c = Y L_c = T_c * max_l + cfg.lambda_val * (1 - T_c) * max_r margin_loss = tf.reduce_mean(tf.reduce_sum(L_c, axis=1)) return margin_loss
def __init__(self, privateCaps_dim=8, outputCaps_num=7, outputCaps_dim=16, r=3, **kwargs): super(CapsNet, self).__init__(name='capsnet', **kwargs) self.capsLayer = CapsLayer(privateCaps_dim, outputCaps_num, outputCaps_dim, r) self.primaryCapsLength = privateCaps_dim self.lamda = 0.5 self.conv1 = tf.keras.layers.Conv2D(256, (4, 25), activation='relu', name='conv1') self.conv2 = tf.keras.layers.Conv2D(256, (4, 256), strides=(1, 128), activation='relu', name='conv2') self.batch_size = 1
def build_arch(self): with tf.variable_scope('conv1_layer'): # conv1, return: (batch_size, 20, 20, 256) conv1 = tf.layers.conv2d(self.X, filters=256, kernel_size=9, strides=1, padding='VALID') assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256] # PrimaryCaps, return (batch_size, 1152, 8, 1) with tf.variabel_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs_p=32, num_outputs_d=10, vec_len_p=8, vec_len_d=16) caps1 = primaryCaps.PrimaryCaps(conv1, kernel_size=9, stride=2) assert caps1.get_shape == [cfg.batch_size, 1152, 8, 1] # DigitCaps, return (batch_size, 10, 16, 1) with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs_p=32, num_outputs_d=10, vec_len_p=8, vec_len_d=16) self.caps2 = digitCaps.DigitCaps(caps1) assert self.caps2.get_shape == [cfg.batch_size, 10, 16, 1] with tf.variable_scope('masking'): self.maked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1))) self.v_length =
def build_arch(self): with tf.variable_scope('Conv1_layer'): conv1 = contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=9, stride=1, padding="VALID") assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256] # Primary Capsules layer, return [batch_size, 1152, 8, 1] with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(conv1, kernel_size=9, stride=2) assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1] # DigitCaps layer, return [batch_size, 10, 16, 1] with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=10, vec_len=16, with_routing=True, layer_type='FC') self.caps2 = digitCaps(caps1) # Decoder structure in Fig. 2 # 1. Do masking, how: with tf.variable_scope("Masking"): self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) self.softmax_v = softmax(self.v_length, axis=1) assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1] self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1)) assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1] self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, )) # Method 1. if not cfg.mask_with_y: masked_v = [] for batch_size in range(cfg.batch_size): v = self.caps2[batch_size][self.argmax_idx[batch_size], :] masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1))) self.masked_v = tf.concat(masked_v, axis=0) assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1] # Method 2. masking with true label, default model else: self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1))) self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) # 2. Reconstruct the MNIST images with 3 FC layers # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512] with tf.variable_scope('Decoder'): fully_connected = contrib.layers.fully_connected vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1)) fc1 = fully_connected(vector_j, num_outputs=512) assert fc1.get_shape() == [cfg.batch_size, 512] fc2 = fully_connected(fc1, num_outpus=1024) assert fc2.get_shape() == [cfg.batch_size, 1024] self.decoded = fully_connected(fc2, num_outputs=784, activation_fn=tf.sigmoid)
def build_arch(self): with tf.variable_scope('Conv1_layer'): # Conv1, [batch_size, 20, 20, 256] self.W = tf.get_variable( 'W', shape=[9, 9, 1, 256], initializer=tf.contrib.layers.xavier_initializer()) self.W = fix(self.W) self.biases = tf.get_variable('biases', shape=[256], initializer=tf.zeros_initializer()) self.biases = fix(self.biases) self.conv1 = tf.nn.relu( tf.nn.conv2d( self.X, self.W, strides=[1, 1, 1, 1], padding='VALID') + self.biases) self.conv1 = fix(self.conv1) assert self.conv1.get_shape() == [cfg.batch_size, 20, 20, 256] # Primary Capsules layer, return [batch_size, 1152, 8, 1] with tf.variable_scope('PrimaryCaps_layer'): self.primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') self.caps1 = self.primaryCaps(self.conv1, kernel_size=9, stride=2) assert self.caps1.get_shape() == [cfg.batch_size, 1152, 8, 1] # DigitCaps layer, return [batch_size, 10, 16, 1] with tf.variable_scope('DigitCaps_layer'): self.digitCaps = CapsLayer(num_outputs=10, vec_len=16, with_routing=True, layer_type='FC') self.caps2 = self.digitCaps(self.caps1) # Decoder structure in Fig. 2 # 1. Do masking, how: with tf.variable_scope('Masking'): # a). calc ||v_c||, then do softmax(||v_c||) # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1] self.v_length = tf.sqrt( reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) self.softmax_v = softmax(self.v_length, axis=1) assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1] # b). pick out the index of max softmax val of the 10 caps # [batch_size, 10, 1, 1] => [batch_size] (index) self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1)) assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1] self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, )) # Method 1. if not cfg.mask_with_y: # c). indexing # It's not easy to understand the indexing process with argmax_idx # as we are 3-dim animal masked_v = [] for batch_size in range(cfg.batch_size): v = self.caps2[batch_size][self.argmax_idx[batch_size], :] masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1))) self.masked_v = tf.concat(masked_v, axis=0) assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1] # Method 2. masking with true label, default mode else: # self.masked_v = tf.matmul(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)), transpose_a=True) self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1))) self.v_length = tf.sqrt( reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) # 2. Reconstructe the MNIST images with 3 FC layers # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512] with tf.variable_scope('Decoder'): vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1)) self.fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512) assert self.fc1.get_shape() == [cfg.batch_size, 512] self.fc2 = tf.contrib.layers.fully_connected(self.fc1, num_outputs=1024) assert self.fc2.get_shape() == [cfg.batch_size, 1024] self.decoded = tf.contrib.layers.fully_connected( self.fc2, num_outputs=784, activation_fn=tf.sigmoid)
def build_arch(self): # global train_var with tf.variable_scope('Conv1_layer'): if cfg.img_dim == 28: # Conv1, [batch_size, 20, 20, 256] conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=9, stride=1, padding='VALID') assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256] else: # Conv1, [batch_size, 20, 20, 256] conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=8, stride=2, padding='VALID') assert conv1.get_shape() == [cfg.batch_size, 17, 17, 256] # Primary Capsules layer, return [batch_size, 1152, 8, 1] with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') if cfg.img_dim == 28: caps1 = primaryCaps(conv1, kernel_size=9, stride=2) else: caps1 = primaryCaps(conv1, kernel_size=7, stride=2) assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1] # DigitCaps layer, return [batch_size, 10, 16, 1] with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=10, vec_len=16, with_routing=True, layer_type='FC') self.caps2 = digitCaps(caps1) assert self.caps2.get_shape() == [cfg.batch_size, 10, 16, 1] # Decoder structure in Fig. 2 # 1. Do masking, how: with tf.variable_scope('Masking'): # a). calc ||v_c||, then do softmax(||v_c||) # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1] self.v_length = tf.sqrt( tf.reduce_sum(tf.square(self.caps2[:, :, cfg.num_ex_var:16, :]), axis=2, keep_dims=True) + epsilon) self.softmax_v = tf.nn.softmax(self.v_length, dim=1) assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1] # b). pick out the index of max softmax val of the 10 caps # [batch_size, 10, 1, 1] => [batch_size] (index) self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1)) assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1] self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, )) # 2. Reconstructe the MNIST images with 3 FC layers # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512] with tf.variable_scope('Decoder'): ## fix variable caps_batch_avg = tf.reduce_mean(self.caps2, axis=0, keep_dims=True) caps_batch_avg = tf.reshape(caps_batch_avg, (1, 10, 16)) self.caps_batch_avg_tiled = tf.tile(caps_batch_avg, (cfg.batch_size, 1, 1)) def f1(): return tf.concat([ self.caps_batch_avg_tiled[:, :, 0:cfg.num_ex_var], tf.squeeze(self.caps2)[:, :, cfg.num_ex_var:16] ], axis=2) def f2(): return tf.concat([ tf.slice(self.caps_batch_avg_tiled, [0, 0, 0], [-1, -1, self.train_var]), tf.slice(tf.squeeze(self.caps2), [0, 0, self.train_var], [-1, -1, 1]), tf.slice(self.caps_batch_avg_tiled, [0, 0, self.train_var + 1], [-1, -1, -1]) ], axis=2) clamped_encoding = tf.cond( tf.greater_equal(self.train_var, cfg.num_ex_var), f1, f2) clamped_encoding = tf.reshape(clamped_encoding, (cfg.batch_size, 10, 16)) assert clamped_encoding.get_shape() == [cfg.batch_size, 10, 16] with self.graph.gradient_override_map( {"Identity": "CustomGrad_mean_diff"}): clamped_encoding_out = tf.identity(clamped_encoding, name="Identity") masked_v = tf.multiply(clamped_encoding_out, tf.reshape(self.Y, (-1, 10, 1))) # masked_v = tf.multiply(clamped_encoding, tf.reshape(self.Y, (-1, 10, 1))) vector_j = tf.reshape(masked_v, shape=(cfg.batch_size, -1)) fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512) assert fc1.get_shape() == [cfg.batch_size, 512] fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024) assert fc2.get_shape() == [cfg.batch_size, 1024] self.decoded = tf.contrib.layers.fully_connected( fc2, num_outputs=cfg.img_dim * cfg.img_dim, activation_fn=tf.sigmoid)
def build_arch(self): with tf.variable_scope('Conv1_layer'): # Conv1, [batch_size, 20, 20, 256] conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=9, stride=1, padding='VALID') assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256] # Primary Capsules layer, return [batch_size, 1152, 8, 1] with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(conv1, kernel_size=9, stride=2) assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1] # DigitCaps layer, return [batch_size, 10, 16, 1] with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=10, vec_len=16, with_routing=True, layer_type='FC') self.caps2 = digitCaps(caps1) with tf.variable_scope('Masking'): # a). calc ||v_c||, then do softmax(||v_c||) # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1] self.v_length = tf.sqrt( reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) self.softmax_v = softmax(self.v_length, axis=1) assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1] # b). pick out the index of max softmax val of the 10 caps # [batch_size, 10, 1, 1] => [batch_size] (index) self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1)) assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1] self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, )) # Method 1. if not cfg.mask_with_y: # c). indexing # It's not easy to understand the indexing process with argmax_idx # as we are 3-dim animal masked_v = [] for batch_size in range(cfg.batch_size): v = self.caps2[batch_size][self.argmax_idx[batch_size], :] #masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1))) #self.masked_v = tf.concat(masked_v, axis=0) # assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1] # Method 2. masking with true label, default mode else: # self.masked_v = tf.matmul(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)), transpose_a=True) #self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1))) self.v_length = tf.sqrt( reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) with tf.variable_scope('acc'): self.labels = tf.to_int32(tf.argmax(self.Y, axis=1)) correct_prediction = tf.equal(tf.to_int32(self.labels), self.argmax_idx) self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) * 100
def model_builder(self): #build the 3-layer capsNet #First Conv Layer wConv = tf.get_variable("wConv", shape=[9, 9, 1, 256]) bConv = tf.get_variable("bConv", shape=[256]) conv1 = tf.nn.conv2d( self.x, wConv, strides=[1, 1, 1, 1], padding='VALID') + bConv #out_put_size = 20 * 20 * 256 #Primary Capsules layer #kernel = tf.get_variable(name = 'kernel_size', shape = [9,9,256,256]) primary_capslayer = CapsLayer(num_outputs=32, length=8, with_routing=False, layer_type='CONV') caps_layer1 = primary_capslayer(conv1, filter=9, stride=2) #out_put_size = 6*6*8*32 #Digits Capsules layer digit_capslayer = CapsLayer(num_outputs=10, length=16, with_routing=True, layer_type='FC') self.caps_layer2 = digit_capslayer(caps_layer1) #Decode the digit Capsules to pic again # Change the digitsCaps from [10, 16, 1] => [10, 1, 1] #cal the ||vc|| self.v_length = tf.sqrt( tf.reduce_sum(tf.square(self.caps_layer2), axis=2, keep_dims=True) + 1e-9) #softmax for the result self.softmax_res = tf.nn.softmax(self.v_length, dim=1) #Get the index of max softmax val of the 10 caps self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_res, axis=1)) self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, )) if not cfg.mask_with_y: masked_res = [] for i in range(cfg.batch_size): v = self.caps_layer2[i][self.argmax_idx[i], :] masked_res.append(tf.reshape(v, shape=[1, 1, 16, 1])) self.masked_res = np.vstack(masked_res) else: self.masked_res = tf.multiply(tf.squeeze(self.caps_layer2), tf.reshape(self.y, (-1, 10, 1))) self.v_length = tf.sqrt( tf.reduce_sum( tf.square(self.caps_layer2), axis=2, keep_dims=True) + 1e-9) #reconstruct the img from res using 3 fully connected Layer tmpv = tf.reshape(self.masked_res, shape=(cfg.batch_size, -1)) #fully connected layer1 fw1 = tf.get_variable( "fw1", shape=[160, 512]) # so we have 256*64 parameter to calculate fb1 = tf.get_variable("fb1", shape=[512]) fc_1 = tf.matmul(tmpv, fw1) + fb1 # fully connected layer2 fw2 = tf.get_variable( "fw2", shape=[512, 1024]) # so we have 256*64 parameter to calculate fb2 = tf.get_variable("fb2", shape=[1024]) fc_2 = tf.matmul(fc_1, fw2) + fb2 # fully connected layer3 self.decoded = tf.contrib.layers.fully_connected( fc_2, num_outputs=784, activation_fn=tf.sigmoid)
def build_arch(self): with tf.variable_scope('Conv1_layer'): # Conv1, [batch_size, 20, 20, 256] conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=9, stride=1, padding='VALID') assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256] # Primary Capsules layer, return [batch_size, 1152, 8, 1] with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(conv1, kernel_size=9, stride=2) assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1] # DigitCaps layer, return [batch_size, 10, 16, 1] with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=10, vec_len=16, with_routing=True, layer_type='FC') self.caps2 = digitCaps(caps1) # Decoder structure in Fig. 2 # 1. Do masking, how: with tf.variable_scope('Masking'): # a). calc ||v_c||, then do softmax(||v_c||) # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1] self.v_length = tf.sqrt( tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True)) self.softmax_v = tf.nn.softmax(self.v_length, dim=1) assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1] # b). pick out the index of max softmax val of the 10 caps # [batch_size, 10, 1, 1] => [batch_size] (index) argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1)) assert argmax_idx.get_shape() == [cfg.batch_size, 1, 1] # c). indexing # It's not easy to understand the indexing process with argmax_idx # as we are 3-dim animal masked_v = [] argmax_idx = tf.reshape(argmax_idx, shape=(cfg.batch_size, )) for batch_size in range(cfg.batch_size): v = self.caps2[batch_size][argmax_idx[batch_size], :] masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1))) self.masked_v = tf.concat(masked_v, axis=0) assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1] # 2. Reconstructe the MNIST images with 3 FC layers # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512] with tf.variable_scope('Decoder'): vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1)) fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512) assert fc1.get_shape() == [cfg.batch_size, 512] fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024) assert fc2.get_shape() == [cfg.batch_size, 1024] self.decoded = tf.contrib.layers.fully_connected( fc2, num_outputs=784, activation_fn=tf.sigmoid)
def build_arch(self): with tf.variable_scope('Conv1_layer'): # Conv1, return tensor with shape [batch_size, 20, 20, 256],第一层 卷积层输入:28x28图像(单色)输出:20x20x256张量 ''' 第一层 卷积层 输入:28x28图像(单色) 输出:20x20x256张量 参数:20992 卷积层检测2D图像的基本特征。在CapsNet中,卷积层有256个步长为1的9x9x1核,使用ReLU激活。 ''' conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=9, stride=1, padding='VALID') # print("第一次cnn",conv1) with tf.variable_scope('PrimaryCaps_layer'): # Primary Capsules layer, return tensor with shape [batch_size, 1152, 8, 1] ''' 第二层 PrimaryCaps层 输入:20x20x256张量 输出:6x6x8x32张量 参数:5308672 这一层包含32个主胶囊,接受卷积层检测到的基本特征,生成特征的组合。这一层的32个主胶囊本质上和卷积层很相似。 每个胶囊将8个9x9x256卷积核应用到20x20x256输入张量,因而生成6x6x8输出张量。 由于总共有32个胶囊,输出为6x6x8x32张量。 ''' primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(conv1, kernel_size=9, stride=2) # print("第二层 PrimaryCaps层",caps1) with tf.variable_scope('DigitCaps_layer'): # DigitCaps layer, return shape [batch_size, 10, 16, 1] ''' 第三层 DigitCaps层 输入:6x6x8x32张量 输出:16x10矩阵 参数:1497600 这一层包含10个数字胶囊,每个胶囊对应一个数字。每个胶囊接受一个6x6x8x32张量作为输入。你可以把它看成6x6x32的8维向量,也就是1152输入向量。在胶囊内部,每个输入向量通过8x16权重矩阵将8维输入空间映射到16维胶囊输出空间。 ''' digitCaps = CapsLayer(num_outputs=self.num_label, vec_len=16, with_routing=True, layer_type='FC') self.caps2 = digitCaps(caps1) # print("第三层 DigitCaps层",self.caps2) # Decoder structure in Fig. 2 # 1. Do masking, how: with tf.variable_scope('Masking'): # a). calc ||v_c||, then do softmax(||v_c||) # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1] self.v_length = tf.sqrt( reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) # print("self.v_length",self.v_length) #计算 v 向量的模 self.softmax_v = softmax(self.v_length, axis=1) # print("self.softmax_v",self.softmax_v) # 对每个低层胶囊i而言,所有权重cij的总和等于1。 # assert self.softmax_v.get_shape() == [cfg.batch_size, self.num_label, 1, 1] # b). pick out the index of max softmax val of the 10 caps # [batch_size, 10, 1, 1] => [batch_size] (index) self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1)) # print("self.argmax_idx",self.argmax_idx) # 获取最佳的预测id # assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1] self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, )) # print("self.argmax_idx",self.argmax_idx) # Method 1. if not cfg.mask_with_y: # c). indexing # It's not easy to understand the indexing process with argmax_idx # as we are 3-dim animal masked_v = [] for batch_size in range(cfg.batch_size): v = self.caps2[batch_size][self.argmax_idx[batch_size], :] # print("v",v) masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1))) self.masked_v = tf.concat(masked_v, axis=0) # print("self.masked_v",self.masked_v ) assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1] # Method 2. masking with true label, default mode else: self.masked_v = tf.multiply( tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, self.num_label, 1))) self.v_length = tf.sqrt( reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) # print("self.masked_v2",self.masked_v) # print("self.v_length2",self.v_length) # 2. Reconstructe the MNIST images with 3 FC layers # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512] with tf.variable_scope('Decoder'): vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1)) fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512) ''' 第四层 第一全连接层 输入:16x10 输出:512 参数:82432 低层的每个输出加权后传导至全连接层的每个神经元作为输入。每个神经元同时具备一个偏置项。 16x10输入全部传导至这一层的512个神经元中的每个神经元。 ''' fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024) ''' 第五层 第二全连接层 输入:512 输出:1024 参数:525312 ''' self.decoded = tf.contrib.layers.fully_connected( fc2, num_outputs=self.height * self.width * self.channels, activation_fn=tf.sigmoid) '''
def build_architecture(self): ''' [ 1st Convolution Layer ] # Conv1, [batch_size, 20, 20, 256] ''' with tf.variable_scope('Conv1_layer'): conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=9, stride=1, padding='VALID') assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256] ''' [ Primary Capsules Layer] # PCL, [batch_size, 1152, 8, 1] ''' with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(conv1, kernel_size=9, stride=2) assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1] ''' [ DigitCaps Layer] # DGL, [batch_size, 10, 16, 1] ''' with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=10, vec_len=16, with_routing=True, layer_type='FC') self.caps2 = digitCaps(caps1) ''' [ Masking ] 1. calc ||v_c||, then do softmax(||v_c||) ==> [batch_size, 10, 16, 1] ==> [batch_size, 10, 1, 1] 2. pick out the index of max softmax val of the 10 caps ==> [batch_size, 10, 1, 1] => [batch_size] (index) 3. indexing 4. masking with true label <-- default mode ''' with tf.variable_scope('Masking') as scope: # 1 self.v_length = tf.sqrt(tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) + epsilon) self.softmax_v = tf.nn.softmax(self.v_length, dim=1) assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1] # 2 self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1)) assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1] self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, )) # 3 if not cfg.mask_with_y: masked_v = [] for batch_size in range(cfg.batch_size): v = self.caps2[batch_size][self.argmax_idx[batch_size], :] masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1))) self.masked_v = tf.concat(masked_v, axis=0) assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1] # 4 else: # self.masked_v = tf.matmul(tf.squeeze(self.caps2), tf.reshape(self.y, (-1, 10, 1)), transpose_a=True) self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.y, (-1, 10, 1))) self.v_length = tf.sqrt(tf.reduce_sum(tf.square(self.caps2), axis=2, keep_dims=True) + epsilon) ''' [ Reconstructe the MNIST images with 3 FC layers ] # 1st FC: [batch_size, 1, 16, 1] ==> 2nd FC: [batch_size, 16] ==> 3rd FC: [batch_size, 512] ''' with tf.variable_scope('Decoder') as scope: # 1st vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1)) fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512) assert fc1.get_shape() == [cfg.batch_size, 512] # 2nd fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024) assert fc2.get_shape() == [cfg.batch_size, 1024] # 3rd self.decoded = tf.contrib.layers.fully_connected(fc2, num_outputs=784, activation_fn=tf.sigmoid)
def build_arch(self): with tf.variable_scope('LSTM_layer'): #pred batch*4d out batch*128d pred, out = self.RNN() out = tf.reshape(out, (-1, 1, self.n_hidden, 1)) # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y)) # #Adam optimizer # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Evaluate model # correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1)) # accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) with tf.variable_scope('Conv1_layer'): # Conv1, [batch_size, 20, 20, 256] conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=self.kernel_size1, stride=1, padding='VALID') # print(conv1.get_shape(),[cfg.batch_size, self.conv1_outa,self.conv1_outb, 256]) assert conv1.get_shape() == [ cfg.batch_size, self.conv1_outa, self.conv1_outb, 256 ] out = tf.tile(out, [1, self.conv1_outa, 1, 256]) self.conv1 = tf.add(conv1, out) # out_temp= tf.placeholder(tf.float32, shape=(cfg.batch_size,self.conv1_outa+1,self.conv1_outb, 256)) # self.dense1 = tf.layers.dense(inputs=tf.reshape(self.conv1,(cfg.batch_size,-1)), units=self.n_classes, activation=tf.nn.relu) #全连接层 pool = tf.layers.max_pooling2d(inputs=self.conv1, pool_size=[2, 2], strides=2) fc1 = tf.layers.dense(inputs=pool, units=1024, activation=tf.nn.relu) fc2 = tf.layers.dense(inputs=fc1, units=512, activation=tf.nn.relu) self.dense1 = tf.layers.dense(inputs=tf.reshape( fc2, (cfg.batch_size, -1)), units=self.n_classes, activation=None) self.dense1_index = tf.to_int32( tf.argmax(tf.nn.softmax(self.dense1, axis=1), axis=1)) # Primary Capsules layer, return [batch_size, 1152, 8, 1] with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV', conv1_outa=self.conv1_outa, conv1_outb=self.conv1_outb, cap1_out=self.cap1_out, n_classes=self.n_classes) (self.caps1, pred) = primaryCaps(self.conv1, kernel_size=self.kernel_size2, stride=2) self.lstmpred = pred assert self.caps1.get_shape() == [ cfg.batch_size, self.cap1_out, 8, 1 ] # self.dense2= tf.layers.dense(inputs=tf.reshape(self.caps1,(cfg.batch_size,-1)), units=self.n_classes, activation=tf.nn.relu) pool = tf.layers.max_pooling2d(inputs=self.caps1, pool_size=[2, 2], strides=2) fc1 = tf.layers.dense(inputs=pool, units=1024, activation=tf.nn.relu) fc2 = tf.layers.dense(inputs=fc1, units=512, activation=tf.nn.relu) self.dense2 = tf.layers.dense(inputs=tf.reshape( fc2, (cfg.batch_size, -1)), units=self.n_classes, activation=None) self.dense2_index = tf.to_int32( tf.argmax(tf.nn.softmax(self.dense2, axis=1), axis=1)) # DigitCaps layer, return [batch_size, 10, 16, 1] with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=self.n_classes, vec_len=8, with_routing=True, layer_type='FC', conv1_outa=self.conv1_outa, conv1_outb=self.conv1_outb, cap1_out=self.cap1_out, n_classes=self.n_classes) self.caps2 = digitCaps(self.caps1) # self.caps2 = tf.add(tf.tile(tf.reshape(self.lstmpred,(cfg.batch_size,self.n_classes,1,1)),[1,1,16,1]),self.caps2) # Decoder structure in Fig. 2 # 1. Do masking, how: with tf.variable_scope('Masking'): # a). calc ||v_c||, then do softmax(||v_c||) # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1] self.v_length = tf.sqrt( tf.reduce_sum(tf.square(self.caps2), axis=2, keepdims=True)) self.softmax_v = tf.nn.softmax(self.v_length, axis=1) assert self.softmax_v.get_shape() == [ cfg.batch_size, self.n_classes, 1, 1 ] # b). pick out the index of max softmax val of the 10 caps # [batch_size, 10, 1, 1] => [batch_size] (index) self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1)) assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1] self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, ))
def cnn(self): """CNN模型""" embedding_inputs = self.input_embedding() filter_sizes = [[1, 300], [2, 300], [3, 300], [5, 300]] global all_conv for i, filter_size in enumerate(filter_sizes): with tf.name_scope("cnn%s" % filter_size[0]): # filter_shape=[filter_size[0],cfg.embedding_dim,1,cfg.num_filters] filter_shape = [filter_size[0], cfg.embedding_dim, 1, filter_size[1]] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name='W') conv = tf.nn.conv2d( embedding_inputs, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") conv = tf.reshape(conv, shape=[-1, filter_size[1], conv.shape[1], 1]) if i == 0: all_conv = conv else: all_conv = tf.concat([all_conv, conv], axis=2) digitCaps = CapsLayer(num_outputs=cfg.num_classes, vec_len=cfg.vec_len, with_routing=True, layer_type='FC') self.caps2 = digitCaps(all_conv) print("self.caps2",self.caps2) # self.cap_flatten=tf.reshape(self.caps2,[-1,cfg.num_classes*cfg.vec_len]) #映射成一个 num_filters_total 维的特征向量 # print("self.cap_flatten", self.cap_flatten.shape) with tf.variable_scope('Masking'): # a). calc ||v_c||, then do softmax(||v_c||) # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1] self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) # print("self.v_length",self.v_length) # 计算 v 向量的模 self.softmax_v = softmax(self.v_length, axis=1) # print("self.softmax_v",self.softmax_v) # 对每个低层胶囊i而言,所有权重cij的总和等于1。 # assert self.softmax_v.get_shape() == [cfg.batch_size, self.num_label, 1, 1] # b). pick out the index of max softmax val of the 10 caps # [batch_size, 10, 1, 1] => [batch_size] (index) self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1)) # print("self.argmax_idx",self.argmax_idx) # 获取最佳的预测id # assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1] self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size,)) # print("self.argmax_idx",self.argmax_idx) # Method 1. if not cfg.mask_with_y: self.masked_v=tf.reshape(self.caps2,(-1,cfg.num_classes,cfg.vec_len)) # # c). indexing # # It's not easy to understand the indexing process with argmax_idx # # as we are 3-dim animal # masked_v = [] # for batch_size in range(cfg.batch_size): # v = self.caps2[batch_size][self.argmax_idx[batch_size], :] # # print("v",v) # masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1))) # # self.masked_v = tf.concat(masked_v, axis=0) # # print("self.masked_v",self.masked_v ) # assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1] # Method 2. masking with true label, default mode else: self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.input_y, (-1, cfg.num_classes, 1))) ''' 请注意,它在训练时仅使用正确的DigitCap向量,忽略不正确的DigitCap,取出正确的DigitCap向量 ''' self.v_length = tf.sqrt(reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) print("self.masked_v2", self.masked_v) # print("self.v_length2",self.v_length) # 2. Reconstructe the MNIST images with 3 FC layers # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512] with tf.name_scope("score"): vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1)) self.logits = tf.layers.dense(vector_j, cfg.num_classes, name='fc2') # self.y_pred = tf.contrib.layers.fully_connected(vector_j, # num_outputs=cfg.num_classes, # activation_fn=tf.sigmoid) # 输出层,分类器 # self.logits = tf.layers.dense(cur_layer, cfg.num_classes, name='fc2') self.logits_softmax = tf.nn.softmax(self.logits) # self.logits1 = tf.nn.local_response_normalization(self.logits,dim = 0) # print("self.logits", self.logits.shape) self.y_pred = tf.argmax(self.logits_softmax, 1) # 预测类别 # print("self.y_pred",self.y_pred.shape) with tf.name_scope("loss"): # 使用优化方式,损失函数,交叉熵 # 1. The margin loss # [batch_size, 10, 1, 1] # max_l = max(0, m_plus-||v_c||)^2 max_l = tf.square(tf.maximum(0., cfg.m_plus - self.v_length)) # max_r = max(0, ||v_c||-m_minus)^2 max_r = tf.square(tf.maximum(0., self.v_length - cfg.m_minus)) ''' 当正确DigitCap预测正确标签的概率大于0.9时,损失函数为零,当概率小于0.9时,损失函数不为零。 ''' assert max_l.get_shape() == [cfg.batch_size, cfg.num_classes, 1, 1] # reshape: [batch_size, 10, 1, 1] => [batch_size, 10] max_l = tf.reshape(max_l, shape=(cfg.batch_size, -1)) max_r = tf.reshape(max_r, shape=(cfg.batch_size, -1)) # calc T_c: [batch_size, 10] # T_c = Y, is my understanding correct? Try it. T_c = self.input_y # [batch_size, 10], element-wise multiply L_c = T_c * max_l + cfg.lambda_val * (1 - T_c) * max_r self.margin_loss = tf.reduce_mean(tf.reduce_sum(L_c, axis=1)) # 2. The reconstruction loss # print("self.input_y", self.input_y) # orgin = tf.reshape(self.input_y, shape=(cfg.batch_size, -1)) # print("self.y_pred",self.y_pred) # print("orgin",orgin) squared = tf.square(self.logits_softmax - self.input_y) self.reconstruction_err = tf.reduce_mean(squared) # 3. Total loss # The paper uses sum of squared error as reconstruction error, but we # have used reduce_mean in `# 2 The reconstruction loss` to calculate # mean squared error. In order to keep in line with the paper,the # regularization scale should be 0.0005*10=0.005 self.loss = self.margin_loss + cfg.regularization_scale * self.reconstruction_err # cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.input_y) # self.loss = tf.reduce_mean(cross_entropy) with tf.name_scope("optimize"): # 优化器 self.optim = tf.train.AdamOptimizer(learning_rate=cfg.learning_rate).minimize(self.loss) with tf.name_scope("accuracy"): correct_pred = tf.equal(self.y_pred, tf.argmax(self.input_y, 1)) self.acc = tf.reduce_mean(tf.cast(correct_pred, "float"), name="accuracy")
def Discriminator(inputs, reuse=False, batchsize=BATCH_SIZE): with tf.variable_scope('CapsDiscrim', reuse=reuse): output = tf.reshape(inputs, [-1, 1, 28, 28]) # The following line flips the dimensions so that the CapsNet architecture # can be kept as is. # This command re-orders the dimensions output = tf.transpose(output, [0, 2, 3, 1]) # TODO: make sure the shape is correct #with tf.variable_scope('Conv1_layer'): # Conv1, [batch_size, 20, 20, 256] output_conv1 = tf.contrib.layers.conv2d( output, num_outputs=256, kernel_size=9, stride=1, #activation_fn=None, # Added this line to remove the ReLU activation activation_fn=tf.nn.relu, padding='VALID') #output_LeakyReLU = LeakyReLU(output_conv1) # Primary Capsules layer, return [batch_size, 1152, 8, 1] primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') #output_caps1 = primaryCaps(output_LeakyReLU, kernel_size=9, stride=2, batchsize=batchsize) output_caps1 = primaryCaps(output_conv1, kernel_size=9, stride=2, batchsize=batchsize) # DigitCaps layer, return [batch_size, 10, 16, 1] #with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=1, vec_len=16, with_routing=True, layer_type='FC') output_caps2 = digitCaps(output_caps1, batchsize=batchsize) # The output at this stage is of dimensions [batch_size, 16] output_caps2 = tf.squeeze(output_caps2, axis=1) output_caps2 = tf.squeeze(output_caps2, axis=2) #print(output_caps2.get_shape()) assert output_caps2.get_shape() == [batchsize, 16] # TODO: Try also removing the LeakyReLU from the CapsLayer file # TODO: Try also with 10 digitcaps outputs + thresholding (instead of just 1 output) # TODO: Adding batch normalization in capsules (See CapsLayer.py). # TODO: Try Changing the critic iteration count. output_v_length = tf.sqrt( tf.reduce_sum(tf.square(output_caps2), axis=1, keep_dims=True) + 1e-9) ## No need to take softmax anymore, because output_caps2 output is in [0,1] due to squash function. #softmax_v = tf.nn.softmax(v_length, dim=1) return tf.reshape(output_v_length, [-1])
def get_model(point_cloud, is_training, n_outputs, bn_decay=None): """ ConvNet baseline, input is BxNx3 gray image """ batch_size = point_cloud.get_shape()[0].value num_point = point_cloud.get_shape()[1].value input_image = tf.expand_dims(point_cloud, -2) k = 10 nearest_pts_id = tf.py_func(pointer_util.get_nearest_neighbors_id, [input_image, k], tf.int32) # pointer_util.get_nearest_neighbors_id(input_image,k) # pdb.set_trace() nearest_pts_id = tf.reshape(nearest_pts_id, (batch_size, num_point, k)) #pdb.set_trace() global_edge_features = tf.py_func(pointer_util.get_global_features, [input_image, nearest_pts_id, k], tf.float32) local_edge_features = tf.py_func(pointer_util.get_local_features, [input_image, nearest_pts_id, k], tf.float32) global_edge_features = tf.reshape(global_edge_features, (batch_size, num_point, k, 3)) local_edge_features = tf.reshape(local_edge_features, (batch_size, num_point, k, 3)) global_feature_1 = pointer_util.feature_network(global_edge_features, mlp=[126], name='global_feature_1_', is_training=is_training, bn_decay=bn_decay) local_feature_1 = pointer_util.feature_network(local_edge_features, mlp=[126], name='local_feature_1_', is_training=is_training, bn_decay=bn_decay) out_feature_1 = tf_util.conv2d(tf.concat( [global_feature_1, local_feature_1], axis=-1), 126, [1, 1], padding='VALID', stride=[1, 1], bn=True, is_training=is_training, scope='out_feature_1', bn_decay=bn_decay, is_dist=True) out_feature_1 = tf.reduce_max(out_feature_1, axis=-2, keepdims=True) #shape (10,1000,1,126) primaryCaps = CapsLayer(num_outputs=40, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(out_feature_1, kernel_size=10, stride=2, scope='caps_layer_1') # DigitCaps layer, return shape [batch_size, 10, 16, 1] # with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=n_outputs, vec_len=16, with_routing=True, layer_type='FC') caps2 = digitCaps(caps1) #caps 2 shape : 10x40x16x1 v_length = tf.sqrt( reduce_sum(tf.square(caps2), axis=2, keepdims=True) + epsilon) softmax_v = softmax(v_length, axis=1) # assert self.softmax_v.get_shape() == [cfg.batch_size, self.num_label, 1, 1] # b). pick out the index of max softmax val of the 10 caps # [batch_size, 10, 1, 1] => [batch_size] (index) argmax_idx = tf.to_int32(tf.argmax(softmax_v, axis=1)) # assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1] # argmax_idx = tf.reshape(argmax_idx, shape=(batch_size, )) return tf.reshape(v_length, (batch_size, n_outputs))
epoch_size = 100000 batch_size = 128 data_shape = (28, 28) channel = 1 dataset_name = 'mnist' X = tf.placeholder(tf.float32, shape=(batch_size, data_shape[0], data_shape[1], channel)) label = tf.placeholder(tf.uint8, shape=(batch_size, )) Y = tf.one_hot(label, depth=10, axis=1, dtype=tf.float32) # arch # X = [batch_size, 28, 28, 1] conv1 = slim.conv2d(X, 256, [9, 9], scope='Conv1_1', padding='VALID') # conv1 = [batch_size, 20, 20, 256] caps1 = CapsLayer(layer_type='PrimaryCaps') capsLayer1 = caps1(conv1, num_outputs=32, vec_len=8, kernel_size=9, stride=2) # capsLayer1 = [batch_size, 6 * 6 * 32, 8, 1] caps2 = CapsLayer(layer_type='DigitCaps') capsLayer2 = caps2(capsLayer1, num_outputs=10, vec_len=16) # capsLayer2 =[batch_size, 10, 16, 1] # Decoder v_length = tf.sqrt(tf.reduce_sum(tf.square(capsLayer2), axis=2, keepdims=True) + 1e-9) softmax_v = tf.nn.softmax(v_length, axis=1) # softmax_v = [batch_size, 10, 1, 1] argmax_idx = tf.reshape(tf.to_int32(tf.argmax(softmax_v, axis=1)), shape=(batch_size, )) print(argmax_idx) # argmax_idx = [batch_size, 1] masked_v = []
def build_arch(self): with tf.variable_scope('Test'): self.testConst = tf.constant(1.0, name='testConst') with tf.variable_scope('Conv1_layer'): # Conv1, [batch_size, 20, 20, 256] print('shape of self x : ', self.X.shape) conv1 = tf.contrib.layers.conv2d(self.X, num_outputs=256, kernel_size=cfg.image_size - 19, stride=1, padding='VALID') print('shape asdf asdf: ', conv1.get_shape()) assert conv1.get_shape() == [cfg.batch_size, 20, 20, 256] # Primary Capsules layer, return [batch_size, 1152, 8, 1] with tf.variable_scope('PrimaryCaps_layer'): primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(conv1, kernel_size=9, stride=2) assert caps1.get_shape() == [cfg.batch_size, 1152, 8, 1] # DigitCaps layer, return [batch_size, 10, 16, 1] with tf.variable_scope('DigitCaps_layer'): digitCaps = CapsLayer(num_outputs=10, vec_len=16, with_routing=True, layer_type='FC') self.caps2 = digitCaps(caps1) #### ASDF #### self.v_J = digitCaps.v_J self.W = digitCaps.W self.b_IJ = digitCaps.b_IJ self.s_J = digitCaps.s_J self.c_IJ = digitCaps.c_IJ self.u_hat = digitCaps.u_hat self.biases = digitCaps.biases #### END ASDF #### # Decoder structure in Fig. 2 # 1. Do masking, how: with tf.variable_scope('Masking'): # a). calc ||v_c||, then do softmax(||v_c||) # [batch_size, 10, 16, 1] => [batch_size, 10, 1, 1] self.v_length = tf.sqrt( reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) self.softmax_v = softmax(self.v_length, axis=1) assert self.softmax_v.get_shape() == [cfg.batch_size, 10, 1, 1] # b). pick out the index of max softmax val of the 10 caps # [batch_size, 10, 1, 1] => [batch_size] (index) self.argmax_idx = tf.to_int32(tf.argmax(self.softmax_v, axis=1)) assert self.argmax_idx.get_shape() == [cfg.batch_size, 1, 1] self.argmax_idx = tf.reshape(self.argmax_idx, shape=(cfg.batch_size, )) # Method 1. if not cfg.mask_with_y: # c). indexing # It's not easy to understand the indexing process with argmax_idx # as we are 3-dim animal masked_v = [] for batch_size in range(cfg.batch_size): v = self.caps2[batch_size][self.argmax_idx[batch_size], :] masked_v.append(tf.reshape(v, shape=(1, 1, 16, 1))) self.masked_v = tf.concat(masked_v, axis=0) assert self.masked_v.get_shape() == [cfg.batch_size, 1, 16, 1] # Method 2. masking with true label, default mode else: # self.masked_v = tf.matmul(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1)), transpose_a=True) self.masked_v = tf.multiply(tf.squeeze(self.caps2), tf.reshape(self.Y, (-1, 10, 1))) self.v_length = tf.sqrt( reduce_sum(tf.square(self.caps2), axis=2, keepdims=True) + epsilon) # 2. Reconstructe the MNIST images with 3 FC layers # [batch_size, 1, 16, 1] => [batch_size, 16] => [batch_size, 512] with tf.variable_scope('Decoder'): vector_j = tf.reshape(self.masked_v, shape=(cfg.batch_size, -1)) fc1 = tf.contrib.layers.fully_connected(vector_j, num_outputs=512) assert fc1.get_shape() == [cfg.batch_size, 512] fc2 = tf.contrib.layers.fully_connected(fc1, num_outputs=1024) assert fc2.get_shape() == [cfg.batch_size, 1024] self.decoded = tf.contrib.layers.fully_connected( fc2, num_outputs=cfg.image_size_flatten, activation_fn=tf.sigmoid)
val_iterator = dataset_val.make_initializable_iterator() val_init_op = data_iterator.make_initializer(dataset_val) im1, im2, im3, im4, onehot_labels = data_iterator.get_next() with tf.variable_scope('CNN'): conv77 = tf.keras.layers.Conv2D(16, (7, 7), activation='relu') convmaps = [conv77(im) for im in [im1, im2, im3, im4]] from capsLayer import CapsLayer with tf.variable_scope('QuadrantCaps'): l1caps = [] for cm in convmaps: quadrantCaps = CapsLayer(num_outputs=1, vec_len=8, iter_routing=0, batch_size=batch_size, input_shape=(batch_size, 16, 8, 8), layer_type='CONV') l1caps.append(quadrantCaps(cm, kernel_size=7, stride=1)) caps1 = tf.keras.layers.Concatenate(axis=1)(l1caps) with tf.variable_scope('ClassCaps'): digitCaps = CapsLayer(num_outputs=10, vec_len=16, iter_routing=iter_routing, batch_size=batch_size, input_shape=(batch_size, 16, 8, 1), layer_type='FC') caps2 = digitCaps(caps1) ctx_batch_size = batch_size ctx_nclasses = 10
def Generator(n_samples, noise=None, reuse=False): with tf.variable_scope('CapsGener', reuse=reuse): # TODO: Play around with different ways of generating noise, either masked, or unmasked. # Masked noise generation if noise is None: noise = caps_noise_generator(batch=n_samples) # This is of shape [batch, num_caps=10, vec_len = 16, 1] # Inverted digitcaps to primary caps fully connect layer with dynamic routing dedigitCaps = CapsLayer(num_outputs=1152, vec_len=8, with_routing=True, layer_type='FC') output_caps2 = dedigitCaps(noise, batchsize=n_samples) print( "Output_caps2 dimensions should be : (bs, 1152,8,1), and actually are: " ) print(output_caps2.get_shape()) # TODO: Try adding convolutional capsule layers to the network. reshape1 = tf.reshape(output_caps2, (n_samples, 32, 6, 6, 8)) # Not sure if the two following lines are actually useful transposed = tf.transpose(reshape1, [0, 1, 4, 2, 3]) reshaped_for_deconv = tf.reshape(transposed, (n_samples, 256, 6, 6)) # TODO: Determine appropriate shapes print( "Reshaped_for_deconv caps2 dimensions should be : (b_s, 256, 6,6), and actually are: " ) print(reshaped_for_deconv.get_shape()) # Deconvolution 1 deconv1 = lib.ops.deconv2d.Deconv2D('CapsDeconv1', 256, 256, MASK_SIZE, reshaped_for_deconv) paddings = tf.constant([[ 0, 0, ], [0, 0], [1, 1], [1, 1]]) padded = tf.pad(deconv1, paddings, "SYMMETRIC") # Make sure if we want to have this ReLU or not relu_1 = tf.nn.relu(padded) # TODO: Get shape of deconv1 output # Deconvolution 2 # TODO: Change the input accordingly if we want to have the ReLU or not deconv2 = lib.ops.deconv2d.Deconv2D('CapsDeconv2', 256, 1, MASK_SIZE, relu_1) print("The shape of deconv2 should be (b_s,1,28,28) :") print((deconv2.get_shape())) # Make sure if we want to have this ReLU or not (This one probably not if there is the sigmoid) # relu_2 = tf.nn.relu(deconv2) # TODO: Check if we want the sigmoid output or not sigmoid_out = tf.nn.sigmoid(deconv2) print("Shape at the output:") print(sigmoid_out.get_shape()) # OUTPUT_DIM is the number of pixels in MNIST aka 784 return tf.reshape(sigmoid_out, [-1, OUTPUT_DIM])