def __call__(self, input): """ Args: input: batch_size x width x height x num_feat_maps Returns: output: one unit with domain tag (0/1) """ with tf.variable_scope(self.name): #g = tf.get_default_graph() #with g.gradient_override_map({"Identity": "ReverseGrad"}): idinput = tf.identity(input) fc_output = ops.fully_connected(idinput, reuse=self.reuse, name='fc1', units=100) fc_output = ops.fully_connected(idinput, reuse=self.reuse, name='fc2', units=100) output = ops.logits(fc_output, reuse=self.reuse, name='logits', units=10) # set reuse=True for next call self.reuse = True self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name) return output
def fpn_classifier(ipt, pool_size, num_classes, is_training=True, fc_layers_size=1024): ipt = tf.map_fn(lambda x: ops.conv2d(x, fc_layers_size, pool_size, 0, 1, norm='batch', activation=tf.nn.relu, is_training=is_training, name='mrcnn_class_conv1', use_bias=True, kernel_initializer='glorot_uniform_tanh'), elems=ipt, dtype=tf.float32) ipt = tf.map_fn(lambda x: ops.conv2d(x, fc_layers_size, 1, 0, 1, norm='batch', activation=tf.nn.relu, is_training=is_training, name='mrcnn_class_conv2', use_bias=True, kernel_initializer='glorot_uniform_tanh'), elems=ipt, dtype=tf.float32) shared = tf.squeeze(tf.squeeze(ipt, 3), 2) mrcnn_class_logits = tf.map_fn(lambda x: ops.fully_connected(x, num_classes, name='mrcnn_class_logits', weights_initializer='glorot_uniform_tanh'), elems=shared, dtype=tf.float32) mrcnn_probs = tf.map_fn(lambda x: tf.nn.softmax(x, name='mrcnn_class'), elems=mrcnn_class_logits, dtype=tf.float32) ipt = tf.map_fn(lambda x: ops.fully_connected(x, 4 * num_classes, name='mrcnn_bbox_fc', weights_initializer='glorot_uniform_tanh'), elems=shared, dtype=tf.float32) ipt_shape = tf.shape(ipt) mrcnn_bbox = tf.reshape(ipt, [-1, ipt_shape[1], num_classes, 4]) return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
def get_prob(input, params, num_class=1000, is_train=True): # Get pool5 layers = get_vgg16_pool5(input, params) layers.fc6 = ops.fully_connected(input=layers.pool5, num_neuron=4096, name='fc6', params=params) if is_train: layers.fc6 = tf.nn.dropout(layers.fc6, keep_prob=0.5) layers.fc6_relu = ops.activate(input=layers.fc6, act_type='relu', name='fc6_relu') layers.fc7 = ops.fully_connected(input=layers.fc6_relu, num_neuron=4096, name='fc7', params=params) if is_train: layers.fc7 = tf.nn.dropout(layers.fc7, keep_prob=0.5) layers.fc7_relu = ops.activate(input=layers.fc7, act_type='relu', name='fc7_relu') layers.fc8 = ops.fully_connected(input=layers.fc7_relu, num_neuron=num_class, name='fc8', params=params) layers.prob = tf.nn.softmax(layers.fc8) return layers
def build_generator(self, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() options = self.options self.g_question = tf.placeholder('int32', [None, None], name="question") self.g_image_features = tf.placeholder('float32', [ None, options['img_dim'], options['img_dim'], options['img_channels'] ], name="image_features") # image_features = self.g_image_features image_features = tf.nn.l2_normalize(self.g_image_features, dim=3) encoded_question = self.encode_question(self.g_question, options['text_model'], train=False) context, self.g_prob1, self.g_prob2 = self.attend_image( image_features, encoded_question, dropout_keep_prob=1.0) with tf.variable_scope("post_attention_fc"): # context = tf.nn.tanh(context) fc_1 = tf.nn.relu(ops.fully_connected(context, 1024, name="fc_1")) logits = ops.fully_connected(fc_1, options['ans_vocab_size'], name="logits") self.g_predictions = tf.argmax(logits, 1)
def _simple_generator(z, zy, igen): with tf.variable_scope('%s/%d' % (params.gen_scope, igen)): h0 = z h1 = ops.fully_connected(h0, 128, 'h1') h1 = ops.lrelu(h1) h2 = ops.fully_connected(h1, 128, 'h2') h2 = ops.lrelu(h2) h3 = ops.fully_connected(h2, 1, 'h3') return h3, {'h0': h0, 'h1': h1, 'h2': h2, 'h3': h3}
def _simple_discriminator(x, y, reuse_vars=False): with tf.variable_scope(params.dis_scope, reuse=reuse_vars): h0 = x h1 = ops.fully_connected(h0, 128, 'h1') h1 = ops.lrelu(h1) h2 = ops.fully_connected(h1, 128, 'h2') h2 = ops.lrelu(h2) h3 = ops.fully_connected(h2, params.num_generators + 1, 'h3') return h3, {'h0': h0, 'h1': h1, 'h2': h2, 'h3': h3}
def encoder(x, scope="spade_encoder"): """Encoder that outputs global N(mu, sig) parameters. Args: x: [B, H, W, 4] an RGBD image (usually the initial image) which is used to sample noise from a distirbution to feed into the refinement network. Range [0, 1]. scope: (str) variable scope Returns: (mu, logvar) are [B, 256] tensors of parameters defining a normal distribution to sample from. """ x = 2 * x - 1 num_channel = 16 with tf.compat.v1.variable_scope(scope, reuse=tf.compat.v1.AUTO_REUSE): x = ops.sn_conv(x, num_channel, kernel_size=3, stride=2, use_bias=True, use_spectral_norm=True, scope="conv_0") x = ops.instance_norm(x, scope="inst_norm_0") x = ops.leaky_relu(x, 0.2) x = ops.sn_conv(x, 2 * num_channel, kernel_size=3, stride=2, use_bias=True, use_spectral_norm=True, scope="conv_1") x = ops.instance_norm(x, scope="inst_norm_1") x = ops.leaky_relu(x, 0.2) x = ops.sn_conv(x, 4 * num_channel, kernel_size=3, stride=2, use_bias=True, use_spectral_norm=True, scope="conv_2") x = ops.instance_norm(x, scope="inst_norm_2") x = ops.leaky_relu(x, 0.2) x = ops.sn_conv(x, 8 * num_channel, kernel_size=3, stride=2, use_bias=True, use_spectral_norm=True, scope="conv_3") x = ops.instance_norm(x, scope="inst_norm_3") x = ops.leaky_relu(x, 0.2) x = ops.sn_conv(x, 8 * num_channel, kernel_size=3, stride=2, use_bias=True, use_spectral_norm=True, scope="conv_4") x = ops.instance_norm(x, scope="inst_norm_4") x = ops.leaky_relu(x, 0.2) x = ops.sn_conv(x, 8 * num_channel, kernel_size=3, stride=2, use_bias=True, use_spectral_norm=True, scope="conv_5") x = ops.instance_norm(x, scope="inst_norm_5") x = ops.leaky_relu(x, 0.2) mu = ops.fully_connected(x, config.DIM_OF_STYLE_EMBEDDING, scope="linear_mu") logvar = ops.fully_connected(x, config.DIM_OF_STYLE_EMBEDDING, scope="linear_logvar") return mu, logvar
def _encode(self, training_data): with tf.variable_scope("encoder"): f1 = tf.nn.relu(conv2d("1st_conv", training_data, 1, 50, self.pref)) f2 = tf.nn.relu(conv2d("2nd_conv", f1, 50, 100, self.pref)) f3 = tf.nn.relu(conv2d("3rd_conv", f2, 100, 200, self.pref)) flat_f3 = tf.reshape(f3, [self.pref.batch_size, 13 * 13 * 200]) z_means = fully_connected("enc_means", flat_f3, 13 * 13 * 200, self.pref.n_z) z_stddev = fully_connected("enc_stddev", flat_f3, 13 * 13 * 200, self.pref.n_z) return z_means, z_stddev
def create_model(self): self.x = tf.placeholder(dtype=tf.float32, shape=[None, INPUT_SIZE * INPUT_SIZE]) self.y_target = tf.placeholder(dtype=tf.float32, shape=[None, 10]) labels = self.y_target signal = self.x signal = tf.reshape(signal, [-1, INPUT_SIZE, INPUT_SIZE]) signal = lstm(signal, INPUT_SIZE, INPUT_SIZE, INPUT_SIZE) signal = fully_connected(signal, 10) self.global_step = tf.get_variable('global_step', initializer=0) update_global_step = tf.assign(self.global_step, self.global_step + 1) self.loss = loss_function(signal, labels) self.accuracy = accuracy(signal, labels) with tf.control_dependencies([update_global_step]): self.train_step = tf.train.AdamOptimizer().minimize(self.loss) loss_sum = tf.summary.scalar('loss', self.loss) acc_sum = tf.summary.scalar('accuracy', self.accuracy) self.all_summaries = tf.summary.merge([loss_sum, acc_sum])
def create_model(self): layers = [26, 52, 52] self.x = tf.placeholder(dtype=tf.float32, shape=[None, INPUT_SIZE * INPUT_SIZE]) self.y_target = tf.placeholder(dtype=tf.float32, shape=[None, 10]) labels = self.y_target signal = self.x signal = tf.reshape(signal, [-1, INPUT_SIZE, INPUT_SIZE]) signal = augment(signal, layers[0]) for i in range(1, len(layers)): hidden_n = layers[i] input_n = layers[i - 1] name = "lstm_{}".format(i) signal = bidirect_lstm(signal, hidden_n, input_n, name=name) signal = get_last_row(signal, layers[-1]) signal = fully_connected(signal, 10) self.global_step = tf.get_variable('global_step', initializer=0) update_global_step = tf.assign(self.global_step, self.global_step + 1) self.loss = loss_function(signal, labels) self.accuracy = accuracy(signal, labels) with tf.control_dependencies([update_global_step]): self.train_step = tf.train.AdamOptimizer().minimize(self.loss) loss_sum = tf.summary.scalar('loss', self.loss) acc_sum = tf.summary.scalar('accuracy', self.accuracy) self.all_summaries = tf.summary.merge([loss_sum, acc_sum])
def _decode(self, latent_variables): with tf.variable_scope("decoder"): z_expanded = fully_connected("dec_expansion", latent_variables, self.pref.n_z, 13 * 13 * 200) z_shaped = tf.reshape(z_expanded, [self.pref.batch_size, 13, 13, 200]) dec_f3 = tf.nn.relu( conv_transpose("1st_deconv", z_shaped, [ self.pref.rfs, self.pref.rfs, self.pref.n_z, z_shaped.get_shape()[-1] ], [self.pref.batch_size, 25, 25, 100], self.pref)) dec_f2 = tf.nn.relu( conv_transpose( "2nd_deconv", dec_f3, [self.pref.rfs, self.pref.rfs, 50, dec_f3.get_shape()[-1]], [self.pref.batch_size, 50, 50, 50], self.pref)) dec_f1 = tf.nn.sigmoid( conv_transpose( "3rd_deconv", dec_f2, [self.pref.rfs, self.pref.rfs, 1, dec_f2.get_shape()[-1]], [self.pref.batch_size, 100, 100, 1], self.pref)) return dec_f1
def densenet(image, options, reuse=False, name='densenet'): divide = 2 h_conv1 = conv2d(image, options.nk, ks=options.ks, name=name+'_conv1') h_db1 = denseblock(h_conv1, options, name=name+'_db1') h_maxpool1 = maxpool2d(h_db1, name=name+'_pool1') h_db2 = denseblock(h_maxpool1, options, name=name+'_db2') pooled_size = int(options.image_size / divide) h_flat = tf.reshape(h_db2, [-1, pooled_size * pooled_size * options.nk]) h_fc1 = fully_connected(h_flat, options.nk * options.nk, name=name+'_fc1') h_fc2 = fully_connected(h_fc1, options.n_pred, name=name+'_fc2') return h_fc2
def build_model(self): options = self.options self.question = tf.placeholder('int32', [None, None], name="question") self.image_features = tf.placeholder('float32', [ None, options['img_dim'], options['img_dim'], options['img_channels'] ], name="image_features") self.answers = tf.placeholder('int32', [None, options['num_answers']], name="answer") # image_features = self.image_features image_features = tf.nn.l2_normalize(self.image_features, dim=3) encoded_question = self.encode_question(self.question, options['text_model'], train=True) context, prob1, prob2 = self.attend_image(image_features, encoded_question, options['dropout_keep_prob']) with tf.variable_scope("post_attention_fc"): # context = tf.nn.dropout(context, 0.8) # context = tf.nn.tanh(context) fc_1 = tf.nn.relu(ops.fully_connected(context, 1024, name="fc_1")) fc_1 = tf.nn.dropout(fc_1, options['dropout_keep_prob']) logits = ops.fully_connected(fc_1, options['ans_vocab_size'], name="logits") loss = 0 for i in range(options['num_answers']): loss += tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.answers[:, i], logits=logits) loss /= options['num_answers'] self.loss = tf.reduce_mean(loss) self.predictions = tf.argmax(logits, 1)
def ds128(x, reuse=False, is_training=True, name='ds128', norm=None, activation=ops.leaky_relu): return ops.fully_connected(x, 128, use_bias=False, is_training=is_training, activation=activation, reuse=reuse, name=name, norm=norm)
def d2(x, reuse=False, is_training=True, norm=None, name='d2', activation=tf.nn.sigmoid): # 全链接层,2输出 return ops.fully_connected(x, 2, use_bias=False, reuse=reuse, activation=activation, is_training=is_training, name=name, norm=norm)
def d7x7x128(x, reuse=False, is_training=True, norm='batch', name='d7x7x128', activation=tf.nn.relu): # 全连接层,7*7*128 = 6272输出 return ops.fully_connected(x, 7 * 7 * 128, use_bias=False, reuse=reuse, activation=activation, is_training=is_training, name=name, norm=norm)
def d1(x, reuse=False, is_training=True, name='d1', norm=None, activation=None): return tf.squeeze( ops.fully_connected(x, 1, use_bias=False, is_training=is_training, activation=activation, reuse=reuse, name=name, norm=norm), -1)
def d10(x, reuse=False, is_training=True, norm=None, name='d10', activation=None): # 全链接层,10输出 return ops.fully_connected(x, 10, use_bias=False, reuse=reuse, activation=activation, is_training=is_training, name=name, norm=norm)
def d1024(x, reuse=False, is_training=True, norm='batch', name='d1024', activation=tf.nn.relu): # 全连接层,1024输出 return ops.fully_connected(x, 1024, use_bias=False, reuse=reuse, activation=activation, is_training=is_training, name=name, norm=norm)
def _discriminator(x, y, reuse_vars=False): with tf.variable_scope(params.dis_scope, reuse=reuse_vars): h0 = ops.concat(x, y) h1_pure = ops.convolution(h0, params.dis_filters_size, params.dis_filters, name='h1') h1 = h1_pure if params.use_batch_norm: h1 = ops.batch_norm(h1, name='bn1') h1 = ops.lrelu(h1) h1 = ops.concat(h1, y) h2 = ops.convolution(h1, params.dis_filters_size, params.dis_filters * 2, name='h2') if params.use_batch_norm: h2 = ops.batch_norm(h2, name='bn2') h2 = ops.lrelu(h2) h2 = ops.concat(h2, y) h3 = ops.convolution(h2, params.dis_filters_size, params.dis_filters * 4, name='h3') if params.use_batch_norm: h3 = ops.batch_norm(h3, name='bn3') h3 = ops.lrelu(h3) h3 = ops.concat(h3, y) h4 = tf.reshape(h3, [params.batch_size, -1]) h4 = ops.fully_connected(h4, 1, 'h4') return h4, { 'h0': h0, 'h1': h1, 'h1_pure': h1_pure, 'h2': h2, 'h3': h3, 'h4': h4 }
def DenseNet(inputs, nums_out, growth_rate, train_phase, depth): inputs = preprocess(inputs) n = (depth - 4) // 3 inputs = conv("conv1", inputs, nums_out=16, k_size=3) inputs = DenseBlock("DenseBlock1", inputs, n, growth_rate, train_phase) inputs = Transition("Transition_Layer1", inputs, nums_out=growth_rate, train_phase=train_phase) inputs = DenseBlock("DenseBlock2", inputs, n, growth_rate, train_phase) inputs = Transition("Transition_Layer2", inputs, nums_out=growth_rate, train_phase=train_phase) inputs = DenseBlock("DenseBlock3", inputs, n, growth_rate, train_phase) inputs = batchnorm(inputs, train_phase, "BN") inputs = relu(inputs) inputs = global_avg_pooling(inputs) inputs = fully_connected("FC", inputs, nums_out) return inputs
def __call__(self, inputs, train_phase): with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): # inputs = tf.random_crop(inputs, [-1, 70, 70, 3]) inputs = conv("conv1_1", inputs, 64, 3, 2) inputs = leaky_relu(inputs, 0.2) # inputs = conv("conv1_2", inputs, 64, 3, is_SN=True) # inputs = leaky_relu(inputs, 0.2) inputs = conv("conv2_1", inputs, 128, 3, 2) inputs = batchnorm(inputs, train_phase, "BN1") inputs = leaky_relu(inputs, 0.2) # inputs = conv("conv2_2", inputs, 128, 3, is_SN=True) # inputs = leaky_relu(inputs, 0.2) inputs = conv("conv3_1", inputs, 256, 3, 2) inputs = batchnorm(inputs, train_phase, "BN2") inputs = leaky_relu(inputs, 0.2) # inputs = conv("conv3_2", inputs, 256, 3, is_SN=True) # inputs = leaky_relu(inputs, 0.2) inputs = conv("conv4_1", inputs, 512, 3, 2) inputs = batchnorm(inputs, train_phase, "BN3") inputs = leaky_relu(inputs, 0.2) # inputs = fully_connected("fc", inputs, 512, is_SN=True) output = fully_connected("output", inputs, 1) return output
def refinement_network(rgbd, mask, z, scope="spade_generator"): """Refines rgbd, mask based on noise z. H, W should be divisible by 2 ** num_up_layers Args: rgbd: [B, H, W, 4] the rendered view to be refined mask: [B, H, W, 1] binary mask of unknown regions. 1 where known and 0 where unknown z: [B, D] a noise vector to be used as noise for the generator scope: (str) variable scope Returns: [B, H, W, 4] refined rgbd image. """ img = 2 * rgbd - 1 img = tf.concat([img, mask], axis=-1) num_channel = 32 num_up_layers = 5 out_channels = 4 # For RGBD batch_size, im_height, im_width, unused_c = rgbd.get_shape().as_list() init_h = im_height // (2**num_up_layers) init_w = im_width // (2**num_up_layers) with tf.compat.v1.variable_scope(scope, reuse=tf.compat.v1.AUTO_REUSE): x = ops.fully_connected(z, 16 * num_channel * init_h * init_w, "fc_expand_z") x = tf.reshape(x, [batch_size, init_h, init_w, 16 * num_channel]) x = spade.spade_resblock( x, img, 16 * num_channel, use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION, scope="head") x = ops.double_size(x) x = spade.spade_resblock( x, img, 16 * num_channel, use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION, scope="middle_0") x = spade.spade_resblock( x, img, 16 * num_channel, use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION, scope="middle_1") x = ops.double_size(x) x = spade.spade_resblock( x, img, 8 * num_channel, use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION, scope="up_0") x = ops.double_size(x) x = spade.spade_resblock( x, img, 4 * num_channel, use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION, scope="up_1") x = ops.double_size(x) x = spade.spade_resblock( x, img, 2 * num_channel, use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION, scope="up_2") x = ops.double_size(x) x = spade.spade_resblock( x, img, 1 * num_channel, use_spectral_norm=config.USE_SPECTRAL_NORMALIZATION, scope="up_3") x = ops.leaky_relu(x, 0.2) # Pre-trained checkpoint uses default conv scoping. x = ops.sn_conv(x, out_channels, kernel_size=3) x = tf.tanh(x) return 0.5 * (x + 1)
def build_model(self, train = True): dropout_rate = 1.0 if train: dropout = 0.5 options = self.options fc7_features = tf.placeholder('float32', [ None, self.options['fc7_feature_length'] ], name = 'fc7') source_sentence = tf.placeholder('float32', [ None, options['text_length'], options['length_of_word_vector']], name = 'sentence') answer = tf.placeholder('float32', [ None, self.options['ans_vocab_size']], name = "answer") image_embedding = ops.fully_connected(fc7_features, 2 * options['residual_channels'], name = "image_embedding") image_embedding = tf.nn.dropout( tf.nn.tanh(image_embedding), dropout_rate) print "image_embedding", image_embedding # image_features_flat = tf.nn.dropout(image_features_flat, 0.5) if options['text_model'] == "bytenet": text_tensors = text_model_v2.encoder_bytenet(source_sentence, options) else: text_tensors = text_model_v2.encoder_lstm(source_sentence, options, train) encoded_sentence = text_tensors['last_seq_element'] encoded_embedding = ops.fully_connected(encoded_sentence, 2 * options['residual_channels'], name = "encoded_embedding") encoded_embedding = tf.nn.dropout( tf.nn.tanh(encoded_embedding), dropout_rate ) print "encoded_embedding", encoded_embedding combined_features = encoded_embedding * image_embedding combined_features = tf.nn.dropout( combined_features, dropout_rate) print "combined", combined_features logits = ops.fully_connected(combined_features, options['ans_vocab_size'], name = "logits") print "logits", logits ce = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=answer, name = 'ce') answer_probab = tf.nn.softmax(logits, name='answer_probab') predictions = tf.argmax(answer_probab,1) correct_predictions = tf.equal(tf.argmax(answer_probab,1), tf.argmax(answer,1)) accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32)) loss = tf.reduce_mean(ce, name = 'loss') input_tensors = { 'fc7' : fc7_features, 'source_sentence' : source_sentence, 'answer' : answer } vqa_model = { 'input_tensors' : input_tensors, 'loss' : loss, 'accuracy' : accuracy, 'predictions' : predictions, } return vqa_model
def _generator(z, zy): with tf.variable_scope(params.gen_scope): imh, imw = params.dataset.image_size, params.dataset.image_size hidden_layers_num = 3 imdiv = 2**hidden_layers_num h0 = tf.concat([z, zy], axis=1) h1 = ops.fully_connected(h0, (imh // imdiv) * (imw // imdiv) * params.gen_filters * 4, 'h1') if params.use_batch_norm: h1 = ops.batch_norm(h1, name='bn1') h1 = tf.reshape( h1, [-1, imh // imdiv, imw // imdiv, params.gen_filters * 4]) h1 = ops.lrelu(h1) h1 = ops.dropout(h1, training=training, keep=params.gen_keep_dropout, name='dropout1') h1 = ops.concat(h1, zy) h2 = ops.deconvolution(h1, params.gen_filters_size, params.gen_filters * 2, name='h2') if params.use_batch_norm: h2 = ops.batch_norm(h2, name='bn2') h2 = ops.lrelu(h2) h2 = ops.dropout(h2, training=training, keep=params.gen_keep_dropout, name='dropout2') h2 = ops.concat(h2, zy) h3_pure = ops.deconvolution(h2, params.gen_filters_size, params.gen_filters, name='h3') h3 = h3_pure if params.use_batch_norm: h3 = ops.batch_norm(h3, name='bn3') h3 = ops.lrelu(h3) h3 = ops.dropout(h3, training=training, keep=params.gen_keep_dropout, name='dropout3') h3 = ops.concat(h3, zy) h4 = ops.deconvolution(h3, params.gen_filters_size, params.dataset.channels_size, name='h4') return tf.nn.tanh(h4), { 'h0': h0, 'h1': h1, 'h2': h2, 'h3': h3, 'h3_pure': h3_pure, 'h4': h4 }
def quat_inception(net, vp_mask): net.quat_net = {} with tf.variable_scope('Viewpoint_Net', reuse=tf.AUTO_REUSE): vp_mask = tf.expand_dims(vp_mask, -1) # Output (bs, 64, 64, ch) conv1 = conv2d('conv1', vp_mask, 3, 256, stride=1, norm=net.norm, mode=net.mode, act=None) net.quat_net['conv1'] = conv1 conv2 = conv2d('conv2', conv1, 1, 128, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv2'] = conv2 conv3 = conv2d('conv3', conv2, 1, 128, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv3'] = conv3 # Output (bs, 32, 32, ch) pool1 = tf.layers.max_pooling2d(conv3, 2, 2, padding='same', name='pool1') net.quat_net['pool1'] = pool1 conv4 = conv2d('conv4', pool1, 3, 512, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv4'] = conv4 conv5 = conv2d('conv5', conv4, 1, 256, stride=1, norm=net.norm, mode=net.mode) conv5 = dropout(conv5, net.keep_prob) net.quat_net['conv5'] = conv5 # Output (bs, 16, 16, ch) pool2 = tf.layers.max_pooling2d(conv5, 2, 2, padding='same', name='pool2') pool2 = dropout(pool2, net.keep_prob) net.quat_net['pool2'] = pool2 fc1 = fully_connected('fc1', pool2, 1024) net.quat_net['fc1'] = fc1 fc2 = fully_connected('fc2', fc1, 4 * net.num_classes) # fc2 = tf.tanh(fc2) net.quat_net['fc2'] = fc2 out = fc2 net.quat_net['out'] = out return out
def quat_res(net, vp_mask): net.quat_net = {} with tf.variable_scope('Quat_Net', reuse=tf.AUTO_REUSE): vp_mask = tf.expand_dims(vp_mask, -1) # Output (bs, 32, 32, 64) conv1 = conv2d('conv1', vp_mask, 7, 64, stride=2, norm=net.norm, mode=net.mode, act=None) net.quat_net['conv1'] = conv1 # Output (bs, 16, 16, 64) pool1 = tf.layers.max_pooling2d(conv1, 3, 2, padding='same', name='pool1') net.quat_net['pool1'] = pool1 # Output (bs, 16, 16, 64) conv2_1a = conv2d('conv2_1a', pool1, 3, 64, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv2_1a'] = conv2_1a conv2_2a = conv2d('conv2_2a', conv2_1a, 3, 64, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv2_2a'] = conv2_2a res_2a = tf.add_n([conv2_2a, pool1], name='res_2a') conv2_1b = conv2d('conv2_1b', res_2a, 3, 64, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv2_1b'] = conv2_1b conv2_2b = conv2d('conv2_2b', conv2_1b, 3, 64, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv2_2b'] = conv2_2b res_2b = tf.add_n([conv2_2b, res_2a], name='res_2b') # Output (bs, 8, 8, 128) conv3_1a = conv2d('conv3_1a', res_2b, 3, 128, stride=2, norm=net.norm, mode=net.mode) net.quat_net['conv3_1a'] = conv3_1a conv3_2a = conv2d('conv3_2a', conv3_1a, 3, 128, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv3_2a'] = conv3_2a res_2b_skip = conv2d('res_2b_skip', res_2b, 1, 128, stride=2, norm=net.norm, mode=net.mode) res_3a = tf.add_n([conv3_2a, res_2b_skip], name='res_3a') conv3_1b = conv2d('conv3_1b', res_3a, 3, 128, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv3_1b'] = conv3_1b conv3_2b = conv2d('conv3_2b', conv3_1b, 3, 128, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv3_2b'] = conv3_2b res_3b = tf.add_n([conv3_2b, res_3a], name='res_3b') # Output (bs, 4, 4, 256) conv4_1a = conv2d('conv4_1a', res_3b, 3, 256, stride=2, norm=net.norm, mode=net.mode) net.quat_net['conv4_1a'] = conv4_1a conv4_2a = conv2d('conv4_2a', conv4_1a, 3, 256, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv4_2a'] = conv4_2a res_3b_skip = conv2d('res_3b_skip', res_3b, 1, 256, stride=2, norm=net.norm, mode=net.mode) res_4a = tf.add_n([conv4_2a, res_3b_skip], name='res_4a') conv4_1b = conv2d('conv4_1b', res_4a, 3, 256, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv4_1b'] = conv4_1b conv4_2b = conv2d('conv4_2b', conv4_1b, 3, 256, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv4_2b'] = conv4_2b res_4b = tf.add_n([conv4_2b, res_4a], name='res_4b') # Output (bs, 2, 2, 512) conv5_1a = conv2d('con5_1a', res_4b, 3, 512, stride=2, norm=net.norm, mode=net.mode) net.quat_net['con5_1a'] = conv5_1a conv5_2a = conv2d('con5_2a', conv5_1a, 3, 512, stride=1, norm=net.norm, mode=net.mode) net.quat_net['con5_2a'] = conv5_2a res_4b_skip = conv2d('res_4b_skip', res_4b, 1, 512, stride=2, norm=net.norm, mode=net.mode) res_5a = tf.add_n([conv5_2a, res_4b_skip], name='res_5a') conv5_1b = conv2d('conv5_1b', res_5a, 3, 512, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv5_1b'] = conv5_1b conv5_2b = conv2d('conv5_2b', conv5_1b, 3, 512, stride=1, norm=net.norm, mode=net.mode) net.quat_net['conv5_2b'] = conv5_2b res_5b = tf.add_n([conv5_2b, res_5a], name='res_5b') res_5b = dropout(res_5b, net.keep_prob) # Output (bs, 4*num_classes) fc1 = fully_connected('fc1', res_5b, 512) net.quat_net['fc1'] = fc1 fc2 = fully_connected('fc2', fc1, 4 * net.num_classes) net.quat_net['fc2'] = fc2 # out = tf.tanh(fc2) out = fc2 net.quat_net['out'] = out return out