def resface36(images, keep_probability, phase_train=True, bottleneck_layer_size=512, weight_decay=0.0, reuse=None): ''' conv name conv[conv_layer]_[block_index]_[block_layer_index] ''' with tf.variable_scope('Conv1'): net = resface_pre(images,64,scope='Conv1_pre') net = slim.repeat(net,2,resface_block,64,scope='Conv_1') with tf.variable_scope('Conv2'): net = resface_pre(net,128,scope='Conv2_pre') net = slim.repeat(net,4,resface_block,128,scope='Conv_2') with tf.variable_scope('Conv3'): net = resface_pre(net,256,scope='Conv3_pre') net = slim.repeat(net,8,resface_block,256,scope='Conv_3') with tf.variable_scope('Conv4'): net = resface_pre(net,512,scope='Conv4_pre') #net = resface_block(Conv4_pre,512,scope='Conv4_1') net = slim.repeat(net,1,resface_block,512,scope='Conv4') with tf.variable_scope('Logits'): #pylint: disable=no-member #net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', # scope='AvgPool') net = slim.flatten(net) net = slim.dropout(net, keep_probability, is_training=phase_train, scope='Dropout') net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net,''
def localization_VGG16(self,inputs): with tf.variable_scope('localization_network'): with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn = tf.nn.relu, weights_initializer = tf.constant_initializer(0.0)): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') shape = int(np.prod(net.get_shape()[1:])) net = slim.fully_connected(tf.reshape(net, [-1, shape]), 4096, scope='fc6') net = slim.fully_connected(net, 1024, scope='fc7') identity = np.array([[1., 0., 0.], [0., 1., 0.]]) identity = identity.flatten() net = slim.fully_connected(net, 6, biases_initializer = tf.constant_initializer(identity) , scope='fc8') return net
def decoder(self, latent_var, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('decoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') net = tf.reshape(net, [-1,4,4,256], name='Reshape') net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1') net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2') net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3') net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4c') return net
def LResnet50E_IR(images, keep_probability, phase_train=True, bottleneck_layer_size=512, weight_decay=0.0, reuse=None): ''' conv name conv[conv_layer]_[block_index]_[block_layer_index] for resnet50 n_units=[3,4,14,3], consider one unit is dim_reduction_layer repeat n_units=[2,3,13,2] ''' with tf.variable_scope('Conv1'): net = slim.conv2d(images,64,scope='Conv1_pre') net = slim.batch_norm(net,scope='Conv1_bn') with tf.variable_scope('Conv2'): net = resface_block(net,64,stride=2,dim_match=False,scope='Conv2_pre') net = slim.repeat(net,2,resface_block,64,1,True,scope='Conv2_main') with tf.variable_scope('Conv3'): net = resface_block(net,128,stride=2,dim_match=False,scope='Conv3_pre') net = slim.repeat(net,3,resface_block,128,1,True,scope='Conv3_main') with tf.variable_scope('Conv4'): net = resface_block(net,256,stride=2,dim_match=False,scope='Conv4_pre') net = slim.repeat(net,13,resface_block,256,1,True,scope='Conv4_main') with tf.variable_scope('Conv5'): net = resface_block(net,512,stride=2,dim_match=False,scope='Conv5_pre') net = slim.repeat(net,2,resface_block,512,1,True,scope='Conv5_main') with tf.variable_scope('Logits'): net = slim.batch_norm(net,activation_fn=None,scope='bn1') net = slim.dropout(net, keep_probability, is_training=phase_train,scope='Dropout') net = slim.flatten(net) net = slim.fully_connected(net, bottleneck_layer_size, biases_initializer=tf.contrib.layers.xavier_initializer(), scope='fc1') net = slim.batch_norm(net, activation_fn=None, scope='Bottleneck') return net,''
def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('encoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = images net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.flatten(net) fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2
def _build_network(self, sess, is_training=True): with tf.variable_scope('vgg_16', 'vgg_16'): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') self._act_summaries.append(net) self._layers['head'] = net # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net, rois, "pool5") else: raise NotImplementedError pool5_flat = slim.flatten(pool5, scope='flatten') fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') # region classification cls_prob, bbox_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def build_backbones(self): inputs = self.inputs with slim.arg_scope([slim.conv2d, slim.fully_connected], padding='SAME', weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), activation_fn=tf.nn.relu): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') self.vgg_head = net
def _image_to_head(self, is_training, reuse=False): with tf.variable_scope(self._scope, self._scope, reuse=reuse): net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') self._act_summaries.append(net) self._layers['head'] = net return net
def inference(self): x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.0005)): model = slim.repeat(x, 2, slim.conv2d, 64, [3, 3], scope='conv1') model = slim.max_pool2d(model, [2, 2], scope='pool1') model = slim.repeat(model, 2, slim.conv2d, 128, [3, 3], scope='conv2') model = slim.max_pool2d(model, [2, 2], scope='pool2') model = slim.repeat(model, 3, slim.conv2d, 256, [3, 3], scope='conv3') model = slim.max_pool2d(model, [2, 2], scope='pool3') model = slim.repeat(model, 3, slim.conv2d, 512, [3, 3], scope='conv4') model = slim.max_pool2d(model, [2, 2], scope='pool4') model = slim.repeat(model, 3, slim.conv2d, 512, [3, 3], scope='conv5') model = slim.max_pool2d(model, [2, 2], scope='pool5') model = slim.flatten(model, scope='flatten5') model = slim.fully_connected(model, 4096, scope='fc6') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do6') model = slim.fully_connected(model, 4096, scope='fc7') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do7') model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fcX8') return model
def build_model(): #### build some layer def LeakyReLU(x, alpha): return tf.nn.relu(x) - alpha * tf.nn.relu(-x) def orientation_loss(y_true, y_pred): # Find number of anchors anchors = tf.reduce_sum(tf.square(y_true), axis=2) anchors = tf.greater(anchors, tf.constant(0.5)) anchors = tf.reduce_sum(tf.cast(anchors, tf.float32), 1) # Define the loss loss = (y_true[:, :, 0] * y_pred[:, :, 0] + y_true[:, :, 1] * y_pred[:, :, 1]) loss = tf.reduce_sum((2 - 2 * tf.reduce_mean(loss, axis=0))) / anchors return tf.reduce_mean(loss) ##### # Build Graph with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( 0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') conv5 = tf.contrib.layers.flatten(net) # dimension = slim.fully_connected(conv5, 512, scope='fc7_d') dimension = slim.fully_connected(conv5, 512, activation_fn=None, scope='fc7_d') dimension = LeakyReLU(dimension, 0.1) dimension = slim.dropout(dimension, 0.5, scope='dropout7_d') # dimension = slim.fully_connected(dimension, 3, scope='fc8_d') dimension = slim.fully_connected(dimension, 3, activation_fn=None, scope='fc8_d') # dimension = LeakyReLU(dimension, 0.1) # loss_d = tf.reduce_mean(tf.square(d_label - dimension)) loss_d = tf.losses.mean_squared_error(d_label, dimension) # orientation = slim.fully_connected(conv5, 256, scope='fc7_o') orientation = slim.fully_connected(conv5, 256, activation_fn=None, scope='fc7_o') orientation = LeakyReLU(orientation, 0.1) orientation = slim.dropout(orientation, 0.5, scope='dropout7_o') # orientation = slim.fully_connected(orientation, BIN*2, scope='fc8_o') orientation = slim.fully_connected(orientation, BIN * 2, activation_fn=None, scope='fc8_o') # orientation = LeakyReLU(orientation, 0.1) orientation = tf.reshape(orientation, [-1, BIN, 2]) orientation = tf.nn.l2_normalize(orientation, dim=2) loss_o = orientation_loss(o_label, orientation) # confidence = slim.fully_connected(conv5, 256, scope='fc7_c') confidence = slim.fully_connected(conv5, 256, activation_fn=None, scope='fc7_c') confidence = LeakyReLU(confidence, 0.1) confidence = slim.dropout(confidence, 0.5, scope='dropout7_c') confidence = slim.fully_connected(confidence, BIN, activation_fn=None, scope='fc8_c') loss_c = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=c_label, logits=confidence)) confidence = tf.nn.softmax(confidence) # loss_c = tf.reduce_mean(tf.square(c_label - confidence)) # loss_c = tf.losses.mean_squared_error(c_label, confidence) total_loss = 4. * loss_d + 8. * loss_o + loss_c optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize( total_loss) return dimension, orientation, confidence, total_loss, optimizer, loss_d, loss_o, loss_c
def inception_resnet_v2_base(inputs, final_endpoint='Conv2d_7b_1x1', output_stride=16, align_feature_maps=False, scope=None, activation_fn=tf.nn.relu): """Inception model from http://arxiv.org/abs/1602.07261. Constructs an Inception Resnet v2 network from inputs to the given final endpoint. This method can construct the network up to the final inception block Conv2d_7b_1x1. Args: inputs: a tensor of size [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1'] output_stride: A scalar that specifies the requested ratio of input to output spatial resolution. Only supports 8 and 16. align_feature_maps: When true, changes all the VALID paddings in the network to SAME padding so that the feature maps are aligned. scope: Optional variable_scope. activation_fn: Activation function for block scopes. Returns: tensor_out: output tensor corresponding to the final_endpoint. end_points: a set of activations for external use, for example summaries or losses. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or if the output_stride is not 8 or 16, or if the output_stride is 8 and we request an end point after 'PreAuxLogits'. """ if output_stride != 8 and output_stride != 16: raise ValueError('output_stride must be 8 or 16.') padding = 'SAME' if align_feature_maps else 'VALID' end_points = {} def add_and_check_final(name, net): end_points[name] = net return name == final_endpoint with tf.variable_scope(scope, 'InceptionResnetV2', [inputs]): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding=padding, scope='Conv2d_2a_3x3') if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding=padding, scope='MaxPool_3a_3x3') if add_and_check_final('MaxPool_3a_3x3', net): return net, end_points # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding=padding, scope='Conv2d_3b_1x1') if add_and_check_final('Conv2d_3b_1x1', net): return net, end_points # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding=padding, scope='Conv2d_4a_3x3') if add_and_check_final('Conv2d_4a_3x3', net): return net, end_points # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding=padding, scope='MaxPool_5a_3x3') if add_and_check_final('MaxPool_5a_3x3', net): return net, end_points # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat( [tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) if add_and_check_final('Mixed_5b', net): return net, end_points # TODO(alemi): Register intermediate endpoints net = slim.repeat(net, 10, block35, scale=0.17, activation_fn=activation_fn) # 17 x 17 x 1088 if output_stride == 8, # 33 x 33 x 1088 if output_stride == 16 use_atrous = output_stride == 8 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=1 if use_atrous else 2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=1 if use_atrous else 2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=1 if use_atrous else 2, padding=padding, scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) if add_and_check_final('Mixed_6a', net): return net, end_points # TODO(alemi): register intermediate endpoints with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1): net = slim.repeat(net, 20, block17, scale=0.10, activation_fn=activation_fn) if add_and_check_final('PreAuxLogits', net): return net, end_points if output_stride == 8: # TODO(gpapan): Properly support output_stride for the rest of the net. raise ValueError( 'output_stride==8 is only supported up to the ' 'PreAuxlogits end_point for now.') # 8 x 8 x 2080 with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding=padding, scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding=padding, scope='MaxPool_1a_3x3') net = tf.concat( [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) if add_and_check_final('Mixed_7a', net): return net, end_points # TODO(alemi): register intermediate endpoints net = slim.repeat(net, 9, block8, scale=0.20, activation_fn=activation_fn) net = block8(net, activation_fn=None) # 8 x 8 x 1536 net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') if add_and_check_final('Conv2d_7b_1x1', net): return net, end_points raise ValueError('final_endpoint (%s) not recognized', final_endpoint)
def repeat(inputs, repetitions, layer, layer_dict={}, **kargv): outputs = slim.repeat(inputs, repetitions, layer, **kargv) _update_dict(layer_dict, kargv['scope'], outputs) return outputs
def define(inputs, reuse, weightDecay, scope='InceptionResnetV2', trainFrom=None, freezeBatchNorm=False): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ with tf.name_scope('preprocess'): # BGR -> RGB inputs = tf.reverse(inputs, axis=[3]) # Normalize inputs = 2.0 * (inputs / 255.0 - 0.5) end_points = {} scopes = [] trainBatchNormScope = slim.arg_scope([slim.batch_norm], is_training=True) weightDecayScope = slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_regularizer=slim.l2_regularizer(weightDecay), biases_regularizer=slim.l2_regularizer(weightDecay)) nonlocalTemp = {"trainBnEntered": False, "currBlock": ""} def beginBlock(name): nonlocalTemp["currBlock"] = name if (trainFrom is not None) and (not nonlocalTemp["trainBnEntered"]) and ( trainFrom == name or trainFrom == "start"): print("Enabling training on " + trainFrom) if not freezeBatchNorm: trainBatchNormScope.__enter__() weightDecayScope.__enter__() nonlocalTemp["trainBnEntered"] = True def endBlock(net, scope=True, name=None): if name is None: name = nonlocalTemp["currBlock"] end_points[name] = net if scope: scopes.append(name) def endAll(): if nonlocalTemp["trainBnEntered"]: if not freezeBatchNorm: trainBatchNormScope.__exit__(None, None, None) weightDecayScope.__exit__(None, None, None) with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm], is_training=False): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 beginBlock('Conv2d_1a_3x3') net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') endBlock(net) # 147 x 147 x 32 beginBlock('Conv2d_2a_3x3') net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') endBlock(net) # 147 x 147 x 64 beginBlock('Conv2d_2b_3x3') net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') endBlock(net) # 73 x 73 x 64 beginBlock('MaxPool_3a_3x3') net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') endBlock(net) # 73 x 73 x 80 beginBlock('Conv2d_3b_1x1') net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') endBlock(net) # 71 x 71 x 192 beginBlock('Conv2d_4a_3x3') net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') endBlock(net) # 35 x 35 x 192 beginBlock('MaxPool_5a_3x3') net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') endBlock(net) # 35 x 35 x 320 beginBlock('Mixed_5b') with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d( net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat([ tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1 ], 3) endBlock(net) beginBlock('Repeat') net = slim.repeat(net, 10, InceptionResnetV2.block35, scale=0.17) endBlock(net) # 17 x 17 x 1024 beginBlock('Mixed_6a') with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d( net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat( [tower_conv, tower_conv1_2, tower_pool], 3) endBlock(net) beginBlock('Repeat_1') net = slim.repeat(net, 20, InceptionResnetV2.block17, scale=0.10) endBlock(net) endBlock(net, scope=False, name='aux') beginBlock('Mixed_7a') with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d( net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([ tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool ], 3) endBlock(net) beginBlock('Repeat_2') net = slim.repeat(net, 9, InceptionResnetV2.block8, scale=0.20) endBlock(net) beginBlock('Block8') net = InceptionResnetV2.block8(net, activation_fn=None) endBlock(net) beginBlock('Conv2d_7b_1x1') net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') endBlock(net) endBlock(net, scope=False, name='PrePool') endAll() return end_points, scope, scopes
def decoder(self, latent_var, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('decoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer( stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') net = tf.reshape(net, [-1, 4, 4, 256], name='Reshape') net = tf.image.resize_nearest_neighbor(net, size=(8, 8), name='Upsample_1') net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = tf.image.resize_nearest_neighbor(net, size=(16, 16), name='Upsample_2') net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = tf.image.resize_nearest_neighbor(net, size=(32, 32), name='Upsample_3') net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = tf.image.resize_nearest_neighbor(net, size=(64, 64), name='Upsample_4') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4c') return net
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 5, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 # net = slim.conv2d(net, 32, 3, padding='VALID', # scope='Conv2d_2a_3x3') # end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 32, [1, 5], scope='Conv2d_2b_3x3') net = slim.conv2d(net, 32, [5, 1], scope='Conv2d_2c_3x3') end_points['Conv2d_2_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=3, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 64, 1, padding='VALID', scope='Conv2d_3b_1x1') net = slim.max_pool2d(net, 3, stride=3, padding='VALID', scope='MaxPool_3b_3x3') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 # net = slim.conv2d(net, 192, 3, padding='VALID', # scope='Conv2d_4a_3x3') # end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 32, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 1, block35, scale=0.27) end_points['Mixed_5a'] = net # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 96, 48, 96, 64) end_points['Mixed_6a'] = net # # 10 x Inception-Resnet-B # net = slim.repeat(net, 1, block17, scale=0.10) # end_points['Mixed_6b'] = net # # # Reduction-B # with tf.variable_scope('Mixed_7a'): # net = reduction_b(net) # end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 1, block8, scale=0.20) end_points['Mixed_8a'] = net net = block8(net, activation_fn=None) end_points['Mixed_8b'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d( net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') ### 修改成max pool net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def recursive_generator(label, sp): label_tmp = tf.image.resize_bilinear(label, (sp // 2, sp), align_corners=True) conv1_encoder = slim.repeat(tf.concat([label_tmp], axis=3), 2, slim.conv2d, 64, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_encoder_conv1') pool1_encoder = slim.avg_pool2d(conv1_encoder, [3, 3], stride=2, padding='SAME', scope='g_encoder_pool1') conv2_encoder = slim.repeat(pool1_encoder, 2, slim.conv2d, 128, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_encoder_conv2') pool2_encoder = slim.avg_pool2d(conv2_encoder, [3, 3], stride=2, padding='SAME', scope='g_encoder_pool2') conv3_encoder = slim.repeat(pool2_encoder, 3, slim.conv2d, 256, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_encoder_conv3') pool3_encoder = slim.avg_pool2d(conv3_encoder, [3, 3], stride=2, padding='SAME', scope='g_encoder_pool3') conv4_encoder = slim.repeat(pool3_encoder, 3, slim.conv2d, 512, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_encoder_conv4') pool4_encoder = slim.avg_pool2d(conv4_encoder, [3, 3], stride=2, padding='SAME', scope='g_encoder_pool4') conv5_encoder = slim.repeat(pool4_encoder, 3, slim.conv2d, 512, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_encoder_conv5') pool5_encoder = slim.avg_pool2d(conv5_encoder, [3, 3], stride=2, padding='SAME', scope='g_encoder_pool5') conv6_encoder = slim.repeat(pool5_encoder, 3, slim.conv2d, 512, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_encoder_conv6') pool6_encoder = slim.avg_pool2d(conv6_encoder, [3, 3], stride=2, padding='SAME', scope='g_encoder_pool6') #decoder downsampled_6 = tf.image.resize_bilinear(label, (sp // 128, sp // 64), align_corners=True) input_6 = tf.concat([downsampled_6, pool6_encoder], 3) net_6 = slim.repeat(input_6, 2, slim.conv2d, 512, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_decoder_conv6') net_6 = tf.image.resize_bilinear(net_6, (sp // 64, sp // 32), align_corners=True) downsampled_5 = tf.image.resize_bilinear(label, (sp // 64, sp // 32), align_corners=True) input_5 = tf.concat([downsampled_5, pool5_encoder, net_6], 3) net_5 = slim.repeat(input_5, 2, slim.conv2d, 512, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_decoder_conv5') net_5 = tf.image.resize_bilinear(net_5, (sp // 32, sp // 16), align_corners=True) downsampled_4 = tf.image.resize_bilinear(label, (sp // 32, sp // 16), align_corners=True) input_4 = tf.concat([downsampled_4, pool4_encoder, net_5], 3) net_4 = slim.repeat(input_4, 2, slim.conv2d, 512, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_decoder_conv4') net_4 = tf.image.resize_bilinear(net_4, (sp // 16, sp // 8), align_corners=True) downsampled_3 = tf.image.resize_bilinear(label, (sp // 16, sp // 8), align_corners=True) input_3 = tf.concat([downsampled_3, pool3_encoder, net_4], 3) net_3 = slim.repeat(input_3, 2, slim.conv2d, 512, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_decoder_conv3') net_3 = tf.image.resize_bilinear(net_3, (sp // 8, sp // 4), align_corners=True) downsampled_2 = tf.image.resize_bilinear(label, (sp // 8, sp // 4), align_corners=True) input_2 = tf.concat([downsampled_2, pool2_encoder, net_3], 3) net_2 = slim.repeat(input_2, 2, slim.conv2d, 512, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_decoder_conv2') net_2 = tf.image.resize_bilinear(net_2, (sp // 4, sp // 2), align_corners=True) downsampled_1 = tf.image.resize_bilinear(label, (sp // 4, sp // 2), align_corners=True) input_1 = tf.concat([downsampled_1, pool1_encoder, net_2], 3) net_1 = slim.repeat(input_1, 2, slim.conv2d, 256, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_decoder_conv1') net_1 = tf.image.resize_bilinear(net_1, (sp // 2, sp), align_corners=True) input = tf.concat([label_tmp, net_1], 3) net = slim.repeat(input, 2, slim.conv2d, 256, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_decoder_final') net = tf.image.resize_bilinear(net, (sp, sp * 2), align_corners=True) input_up = tf.concat([label, net], 3) net_up = slim.repeat(input_up, 2, slim.conv2d, 128, [3, 3], rate=1, normalizer_fn=slim.layer_norm, activation_fn=lrelu, scope='g_decoder_512') net_final = slim.conv2d(net_up, 3, [1, 1], rate=1, activation_fn=None, scope='g_512' + '_conv100') net_final = (net_final + 1.0) / 2.0 * 255.0 net_semantic = slim.conv2d(net_up, 20, [1, 1], rate=1, activation_fn=None, scope='g_semantic_512') return net_final, net_semantic
def inception_resnet_v2(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV2'): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') end_points['MaxPool_5a_3x3'] = net # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) end_points['Mixed_5b'] = net net = slim.repeat(net, 10, block35, scale=0.17) # 17 x 17 x 1024 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) end_points['Mixed_6a'] = net net = slim.repeat(net, 20, block17, scale=0.10) with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) end_points['Mixed_7a'] = net net = slim.repeat(net, 9, block8, scale=0.20) net = block8(net, activation_fn=None) net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') end_points['Conv2d_7b_1x1'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def vgg_16(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_16'): """Oxford Net VGG 16-Layers version D Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """ with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict( end_points_collection) if spatial_squeeze: net = tf.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def vgg16_model(inputs, emb_size=128, is_training=True, img_shape=None, new_shape=None, dropout_keep_prob=0.5, l2_weight=0.0005, end_point=None, **kwargs): inputs = tf.cast(inputs, tf.float32) if new_shape is not None: shape = new_shape inputs = tf.image.resize_images(inputs, tf.constant(new_shape[:2]), method=tf.image.ResizeMethod.BILINEAR) else: shape = img_shape #net = inputs net = tf.image.resize_images(inputs, tf.constant([168, 168]), method=tf.image.ResizeMethod.BILINEAR) mean = tf.reduce_mean(net, [1, 2], True) std = tf.reduce_mean(tf.square(net - mean), [1, 2], True) net = (net - mean) / (std + 1e-5) with slim.arg_scope( [slim.conv2d, slim.fully_connected], #weights_regularizer=slim.l2_regularizer(l2_weight)): weights_regularizer=None): with slim.arg_scope([slim.dropout], is_training=is_training): net = slim.repeat(net, 2, slim.conv2d, 64, [3, 3], scope='conv1') # 100 net = slim.max_pool2d(net, [2, 2], scope='pool1') # 50 print('--------pool1--------------') print(net) net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') # 25 print('--------pool2--------------') print(net) net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') # 12 print('--------pool3--------------') print(net) net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') # 6 print('--------pool4--------------') print(net) net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') # 3 print('--------pool5--------------') print(net) net = slim.flatten(net, scope='flatten') with slim.arg_scope([slim.fully_connected], normalizer_fn=None): net = slim.fully_connected(net, 4096, activation_fn=tf.nn.relu, scope='fc6') if end_point == 'fc6': return net net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') emb = slim.fully_connected(net, emb_size, activation_fn=None, scope='fc7') return emb
def vgg_19(inputs, num_classes=1000, is_training=False, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_19', reuse=False, fc_conv_padding='VALID'): with tf.variable_scope(scope, 'vgg_19', [inputs], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, 3, scope='conv1', reuse=reuse) net = slim.avg_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, 3, scope='conv2', reuse=reuse) net = slim.avg_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 4, slim.conv2d, 256, 3, scope='conv3', reuse=reuse) net = slim.avg_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv4', reuse=reuse) net = slim.avg_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv5', reuse=reuse) net = slim.avg_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict( end_points_collection) return net, end_points
def LResnet50E_IR(images, keep_probability, phase_train=True, bottleneck_layer_size=512, weight_decay=0.0, reuse=None): ''' conv name conv[conv_layer]_[block_index]_[block_layer_index] for resnet50 n_units=[3,4,14,3], consider one unit is dim_reduction_layer repeat n_units=[2,3,13,2] ''' with tf.variable_scope('Conv1'): net = slim.conv2d(images, 64, scope='Conv1_pre') # net = resface_pre(images, 32, scope='Conv1_pre') net = slim.batch_norm(net, scope='Conv1_bn') with tf.variable_scope('Conv2'): net = resface_block(net, 64, stride=2, dim_match=False, scope='Conv2_pre') net = slim.repeat(net, 2, resface_block, 64, 1, True, scope='Conv2_main') with tf.variable_scope('Conv3'): net = resface_block(net, 128, stride=2, dim_match=False, scope='Conv3_pre') net = slim.repeat(net, 3, resface_block, 128, 1, True, scope='Conv3_main') with tf.variable_scope('Conv4'): net = resface_block(net, 256, stride=2, dim_match=False, scope='Conv4_pre') net = slim.repeat(net, 13, resface_block, 256, 1, True, scope='Conv4_main') with tf.variable_scope('Conv5'): net = resface_block(net, 512, stride=2, dim_match=False, scope='Conv5_pre') net = slim.repeat(net, 2, resface_block, 512, 1, True, scope='Conv5_main') with tf.variable_scope('Logits'): net = slim.batch_norm(net, activation_fn=None, scope='bn1') net = slim.dropout(net, keep_probability, is_training=phase_train, scope='Dropout') net = slim.flatten(net) net = slim.fully_connected( net, bottleneck_layer_size, biases_initializer=tf.contrib.layers.xavier_initializer(), scope='fc1') net = slim.batch_norm(net, activation_fn=None, scope='Bottleneck') return net, ''
def create_model(inputs, labels=None, is_train=True): with slim.arg_scope(resnet_v2.resnet_arg_scope(batch_norm_decay=0.99)): conv1 = slim.conv2d(inputs, num_outputs=64, kernel_size=7, stride=1) pol1 = slim.max_pool2d(conv1, kernel_size=3, stride=2) # Stage 2 conv2 = conv_block(pol1, 64, rate=2, stride=1) conv2 = slim.repeat(conv2, 2, identity_block, scope='block1', in_depth=64, rate=2) # Stage 3 conv3 = conv_block(conv2, 128, rate=2, stride=2) conv3 = slim.repeat(conv3, 3, identity_block, scope='block2', in_depth=128, rate=2) # Stage 4 conv4 = conv_block(conv3, 256, rate=2, stride=2) conv4 = slim.repeat(conv4, 22, identity_block, scope='block3', in_depth=256, rate=2) # Stage 5 conv5 = conv_block(conv4, 512, rate=2, stride=2) conv5 = slim.repeat(conv5, 2, identity_block, scope='block4', in_depth=512, rate=4) svs = slim.get_variables_to_restore() cv1_1 = slim.conv2d(conv1, num_outputs=1, kernel_size=1, stride=1, activation_fn=tf.nn.tanh) cv2_1 = slim.conv2d(conv2, num_outputs=1, kernel_size=1, stride=1, activation_fn=None) dev2 = bliliner_additive_upsampleing(cv2_1, 1, 2) cv3_1 = slim.conv2d(conv3, num_outputs=1, kernel_size=1, stride=1, activation_fn=None) dev3 = bliliner_additive_upsampleing(cv3_1, 1, 4) cv4_1 = slim.conv2d(conv4, num_outputs=1, kernel_size=1, stride=1, activation_fn=None) dev4 = bliliner_additive_upsampleing(cv4_1, 1, 8) cv5_1 = slim.conv2d(conv5, num_outputs=1, kernel_size=1, stride=1, activation_fn=None) dev5 = bliliner_additive_upsampleing(cv5_1, 1, 16) ct = tf.concat([cv1_1, dev2, dev3, dev4, dev5], 3) final_cv = slim.conv2d( ct, num_outputs=1, kernel_size=1, stride=1, weights_initializer=tf.constant_initializer(0.2), activation_fn=tf.nn.tanh) if is_train: loss1 = sigmoid_cross_entropy_balanced(labels=labels, logits=cv1_1) loss2 = sigmoid_cross_entropy_balanced(labels=labels, logits=dev2) loss3 = sigmoid_cross_entropy_balanced(labels=labels, logits=dev3) loss4 = sigmoid_cross_entropy_balanced(labels=labels, logits=dev4) loss5 = sigmoid_cross_entropy_balanced(labels=labels, logits=dev5) fuse_loss = sigmoid_cross_entropy_balanced(labels=labels, logits=final_cv) pred = tf.cast(tf.greater(final_cv, 0.5), tf.int32, name='predictions') ers = tf.cast(tf.not_equal(pred, tf.cast(labels, tf.int32)), tf.float32) ers = tf.reduce_mean(ers, name='pixel_error') return loss1 * 1 + loss2 * 1 + loss3 * 1 + loss4 * 1 + loss5 * 1 + fuse_loss * 3, ers, final_cv, svs, dev2, dev5, cv1_1 else: return final_cv
def build_model(): def leaky_relu(x, alpha): return tf.nn.relu(x) - alpha * tf.nn.relu(-x) ##### # Build Graph def loss_angle_l2(y_true, y_pred, c_true): c_mask = tf.cast(tf.greater(c_true, 0), tf.float32) l2_loss = tf.square(tf.subtract(y_true, y_pred)) l2_loss = tf.multiply(c_mask, l2_loss) l2_loss = tf.reduce_sum(l2_loss, axis=1) l2_loss_mean = tf.reduce_mean(l2_loss, axis=0) return l2_loss_mean def loss_angle_cos(y_true, y_pred, c_true): c_mask = tf.cast(tf.greater(c_true, 0), tf.float32) cos_loss = tf.reduce_sum(tf.multiply(y_true, y_pred), axis=2) cos_loss = tf.multiply(c_mask, cos_loss) cos_loss = tf.reduce_sum(cos_loss, axis=1) cos_loss_mean = -1 * tf.reduce_mean(cos_loss, axis=0) return cos_loss_mean def loss_confidence(c_true, c_pred): loss = tf.nn.softmax_cross_entropy_with_logits(labels=c_true, logits=c_pred) loss_mean = tf.reduce_mean(loss, axis=0) return loss_mean def loss_rs(rs_true, rs_pred, c_true): c_mask = tf.cast(tf.greater(c_true, 0), tf.float32) l2_loss = tf.square(tf.subtract(rs_true, rs_pred)) l2_loss = tf.multiply(c_mask, l2_loss) l2_loss = tf.reduce_sum(l2_loss, axis=1) l2_loss_mean = tf.reduce_mean(l2_loss, axis=0) return l2_loss_mean with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( 0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') conv5 = tf.contrib.layers.flatten(net) rs = slim.fully_connected(conv5, 512, activation_fn=None, scope='fc7_rs') rs = leaky_relu(rs, 0.1) rs = slim.dropout(rs, 0.5, scope='dropout7_rs') rs = slim.fully_connected(rs, DIRECTIONS, activation_fn=None, scope='fc8_rs') loss_r = loss_rs(rs_label, rs, c_label) theta1 = slim.fully_connected(conv5, 256, activation_fn=None, scope='fc7_theta1') theta1 = leaky_relu(theta1, 0.1) theta1 = slim.dropout(theta1, 0.5, scope='dropout7_theta1') theta1 = slim.fully_connected(theta1, DIRECTIONS * 2, activation_fn=None, scope='fc8_theta1') theta1 = tf.reshape(theta1, [-1, DIRECTIONS, 2]) theta1 = tf.nn.l2_normalize(theta1, dim=2) loss_theta1 = loss_angle_cos(t1_label, theta1, c_label) theta2 = slim.fully_connected(conv5, 256, activation_fn=None, scope='fc7_theta2') theta2 = leaky_relu(theta2, 0.1) theta2 = slim.dropout(theta2, 0.5, scope='dropout7_theta2') theta2 = slim.fully_connected(theta2, DIRECTIONS * 2, activation_fn=None, scope='fc8_theta2') theta2 = tf.reshape(theta2, [-1, DIRECTIONS, 2]) theta2 = tf.nn.l2_normalize(theta2, dim=2) loss_theta2 = loss_angle_cos(t2_label, theta2, c_label) confidence = slim.fully_connected(conv5, 256, activation_fn=None, scope='fc7_confidence') confidence = leaky_relu(confidence, 0.1) confidence = slim.dropout(confidence, 0.5, scope='dropout7_confidence') confidence = slim.fully_connected(confidence, DIRECTIONS, activation_fn=None, scope='fc8_confidence') loss_c = loss_confidence(c_label, confidence) probability = tf.nn.softmax(confidence) total_loss = loss_r + loss_theta1 + loss_theta2 + loss_c optimizer = tf.train.AdamOptimizer(learning_rate).minimize(total_loss) return rs, theta1, theta2, probability, total_loss, optimizer, loss_r, loss_theta1, loss_theta2, loss_c
def forward(self, inputs, is_training=False): def preprocessing(inputs): dims = inputs.get_shape() if len(dims) == 3: inputs = tf.expand_dims(inputs, dim=0) mean_BGR = tf.reshape(self.mean_BGR, [1, 1, 1, 3]) inputs = inputs[:, :, :, ::-1] + mean_BGR return inputs # inputs = preprocessing(inputs) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.relu, stride=1, padding='SAME'): with tf.variable_scope('fcn', reuse=tf.AUTO_REUSE): ## ----------------- vgg norm--------------------------------------------------------------- self.conv1_norm = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1_norm') self.pool1_norm = slim.max_pool2d(self.conv1_norm, [3, 3], stride=2, padding='SAME', scope='pool1_norm') self.conv2_norm = slim.repeat(self.pool1_norm, 2, slim.conv2d, 128, [3, 3], scope='conv2_norm') self.pool2_norm = slim.max_pool2d(self.conv2_norm, [3, 3], stride=2, padding='SAME', scope='pool2_norm') self.conv3_norm = slim.repeat(self.pool2_norm, 3, slim.conv2d, 256, [3, 3], scope='conv3_norm') self.pool3_norm = slim.max_pool2d(self.conv3_norm, [3, 3], stride=2, padding='SAME', scope='pool3_norm') self.conv4_norm = slim.repeat(self.pool3_norm, 3, slim.conv2d, 512, [3, 3], scope='conv4_norm') self.pool4_norm = slim.max_pool2d(self.conv4_norm, [3, 3], stride=1, padding='SAME', scope='pool4_norm') self.conv5_norm = slim.repeat(self.pool4_norm, 3, slim.conv2d, 512, [3, 3], rate=2, scope='conv5_norm') self.pool5_norm = slim.max_pool2d(self.conv5_norm, [3, 3], stride=1, padding='SAME', scope='pool5_norm')
def regression_features(x, scope, L2_reg=0.0, reuse=None, train_mode=True, act_func=tf.nn.relu, **kwargs): def _args_scope(): with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=act_func, weights_regularizer=slim.l2_regularizer(L2_reg)): with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc: return arg_sc with slim.arg_scope(_args_scope()): with tf.variable_scope(scope, scope, [x], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.conv2d(x, 32, [3, 3], stride=1, scope='conv1') net = slim.batch_norm(net, is_training=train_mode, scope='bn1') net = slim.repeat(net, 3, slim.conv2d, 32, [3, 3], scope='conv11') net = slim.conv2d(net, 64, [3, 3], stride=2, padding='VALID', scope='conv2') net = slim.batch_norm(net, is_training=train_mode, scope='bn2') net = slim.repeat(net, 3, slim.conv2d, 64, [3, 3], scope='conv21') #x2 net = slim.conv2d(net, 128, [3, 3], stride=2, padding='VALID', scope='conv3') net = slim.batch_norm(net, is_training=train_mode, scope='bn3') net = slim.repeat(net, 3, slim.conv2d, 128, [3, 3], scope='conv31') net = slim.conv2d(net, 128, [3, 3], stride=2, padding='VALID', scope='conv4') net = slim.batch_norm(net, is_training=train_mode, scope='bn4') net = slim.repeat(net, 3, slim.conv2d, 128, [3, 3], scope='conv41') # Here we go down to quarter net = slim.conv2d(net, 256, [3, 3], scope='conv5') net = slim.batch_norm(net, is_training=train_mode, scope='bn5') net = slim.conv2d(net, 512, [3, 3], scope='conv6') net = slim.batch_norm(net, is_training=train_mode, scope='bn6') net = slim.conv2d(net, 1024, [3, 3], scope='conv7') net = slim.batch_norm(net, is_training=train_mode, scope='bn7') # TODO: custom size image support - make padding dynamic net = tf.pad(net, paddings=[[0, 0], [0, 1], [0, 1], [0, 0]], mode='CONSTANT', name='reg_features') return net
def vgg_19(inputs, num_classes=1000, is_training=False, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_19', reuse=False, fc_conv_padding='VALID'): """Oxford Net VGG 19-Layers version E Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. fc_conv_padding: the type of padding to use for the fully connected layer that is implemented as a convolutional layer. Use 'SAME' padding if you are applying the network in a fully convolutional manner and want to get a prediction map downsampled by a factor of 32 as an output. Otherwise, the output prediction map will be (input / 32) - 6 in case of 'VALID' padding. Returns: the last op containing the log predictions and end_points dict. """ with tf.variable_scope(scope, 'vgg_19', [inputs], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, 3, scope='conv1', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, 3, scope='conv2', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 4, slim.conv2d, 256, 3, scope='conv3', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv4', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv5', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict( end_points_collection) return net, end_points
def classify(inputs, num_classes, dropout_keep_prob=0.5, middle_size=1, bottom_size=1, weight_decay=1e-5, fc_size=16, num_filter=16, scope=None, reuse=None, is_training=True): """ model used to make predictions input: x -> shape=[None,bands,frames,num_channels] output: logits -> shape=[None,num_labels] """ with slim.arg_scope(simple_arg_scope(weight_decay=weight_decay)): #with slim.arg_scope(batchnorm_arg_scope()): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with tf.variable_scope(scope, 'model_v1', [inputs], reuse=reuse) as scope: net = tf.expand_dims( inputs, -1 ) #input needs to be in the format NHWC!! if there is only one channel, expand it by 1 dimension with tf.variable_scope('bottom'): #net = slim.conv2d(net, num_filter, [4, 4], rate=2, scope='convB1') net = slim.repeat(net, bottom_size, slim.conv2d, num_filter, [3, 7], scope='convB2') net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='poolB1') #random block with tf.variable_scope('middle'): net = slim.repeat(net, middle_size, slim.conv2d, num_filter, [3, 5], scope='convM') #, reuse=i>0 net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='poolM') # Use conv2d instead of fully_connected layers. with tf.variable_scope('top'): net = slim.flatten(net) net = slim.fully_connected(net, fc_size, scope='fc1') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout1') logits = slim.fully_connected(net, num_classes, scope='fc2', activation_fn=None) return logits
def generator(self, Z, BG, reuse=None): if (reuse): tf.get_variable_scope().reuse_variables() with tf.variable_scope('Generator_scope'): with slim.arg_scope( [slim.conv2d], padding='SAME', # weights_initializer=tf.random_normal_initializer(stddev=0.02), weights_initializer=tf.contrib.layers.xavier_initializer(), normalizer_fn=slim.batch_norm): Z_ = tf.image.resize_images( images=Z, size=[self.G_input_size, self.G_input_size]) BG_ = tf.image.resize_images( images=BG, size=[self.G_input_size, self.G_input_size]) Z_ = tf.concat([Z_, BG_], axis=-1) # encoder net_a_1 = slim.repeat(Z_, 2, slim.conv2d, 64, [3, 3], scope='conv1_ea') # net1 = slim.dropout(net1) net_a_1 = slim.max_pool2d(net_a_1, [2, 2], scope='pool1_ea') net_a_2 = slim.repeat(net_a_1, 2, slim.conv2d, 64, [3, 3], scope='conv2_ea') # net2 = slim.dropout(net2) net_a_2 = slim.max_pool2d(net_a_2, [2, 2], scope='pool2_ea') net_a_3 = slim.repeat(net_a_2, 2, slim.conv2d, 64, [3, 3], scope='conv3_ea') net_a_3 = slim.max_pool2d(net_a_3, [2, 2], scope='pool3_ea') net_a_4 = slim.repeat(net_a_3, 2, slim.conv2d, 64, [3, 3], scope='conv4_ea') net4 = slim.max_pool2d(net_a_4, [2, 2], scope='pool4_ea') # decoder net_1 = slim.repeat(net4, 2, slim.conv2d, 64, [5, 5], scope='conv1_d1') # net_1 = slim.conv2d_transpose(net_1, 256, [5, 5], stride=2) net_1 = tf.image.resize_images(images=net_1, size=[ int(self.G_input_size / 8), int(self.G_input_size / 8) ]) net_1 = net_1 + net_a_3 net_2 = slim.repeat(net_1, 2, slim.conv2d, 64, [5, 5], scope='conv2_d1') # net_2 = slim.conv2d_transpose(net_2, 128, [5, 5], stride=2) net_2 = tf.image.resize_images(images=net_2, size=[ int(self.G_input_size / 4), int(self.G_input_size / 4) ]) net_2 = net_2 + net_a_2 net_3 = slim.repeat(net_2, 2, slim.conv2d, 64, [5, 5], scope='conv3_d1') # net_3 = slim.conv2d_transpose(net_3, 64, [5, 5], stride=2) net_3 = tf.image.resize_images(images=net_3, size=[ int(self.G_input_size / 2), int(self.G_input_size / 2) ]) net_3 = net_3 + net_a_1 net_4 = slim.repeat(net_3, 2, slim.conv2d, 64, [5, 5], scope='conv4_d1') # net_4 = slim.conv2d_transpose(net_4, 64, [5, 5], stride=2) net_4 = tf.image.resize_images( images=net_4, size=[int(self.G_input_size), int(self.G_input_size)]) reflectance_ = slim.conv2d(net_4, 3, [3, 3], activation_fn=None) reflectance_ = tf.nn.sigmoid(reflectance_) # build high reso maps reflectance = tf.image.resize_images( images=reflectance_, size=[self.img_size, self.img_size]) reflectance = reflectance * Z * 2 reflectance = tf.clip_by_value(reflectance, clip_value_min=0, clip_value_max=1.) alpha = reflectance * ( 0.2 * tf.random_uniform([self.batch_size, 1, 1, 1]) + 1.) alpha = tf.clip_by_value(alpha, clip_value_min=0, clip_value_max=1.) sample = reflectance + (1 - alpha) * BG # sample = tf.clip_by_value(sample, clip_value_min=0, clip_value_max=1.) return sample, reflectance, alpha
def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('encoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer( stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = images net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.flatten(net) fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2
import tensorflow as tf import tensorflow.contrib.slim as slim import graph_construction.graph_utils as gut import numpy as np import time inputs = tf.placeholder(tf.float32, shape=(None, 576, 576, 3), name = 'Input') label = tf.placeholder(tf.float32, shape = (None, 576, 576, 2), name="path0_label") # stride 1 path0 = slim.repeat(inputs, 2, slim.conv2d, 32, 3, scope='down_conv0', normalizer_fn=slim.batch_norm, activation_fn=tf.nn.leaky_relu) path1 = slim.max_pool2d(path0, 2, scope='pool0') # stride 2 path1 = slim.repeat(path1, 2, slim.conv2d, 64, 3, scope='down_conv1', normalizer_fn=slim.batch_norm, activation_fn=tf.nn.leaky_relu) path2 = slim.max_pool2d(path1, 2, scope='pool1') # stride 4 path2 = slim.repeat(path2, 2, slim.conv2d, 128, 3, scope='down_conv2', normalizer_fn=slim.batch_norm, activation_fn=tf.nn.leaky_relu) path3 = slim.max_pool2d(path2, 2, scope='pool2') # stride 8 path3 = slim.repeat(path3, 2, slim.conv2d, 256, 3, scope='down_conv3', normalizer_fn=slim.batch_norm, activation_fn=tf.nn.leaky_relu) path4 = slim.max_pool2d(path3, 2, scope='pool3') # stride 16 path4 = slim.repeat(path4, 2, slim.conv2d, 512, 3, scope='up_conv4', normalizer_fn=slim.batch_norm, activation_fn=tf.nn.leaky_relu) path4_out = slim.conv2d(path4, 2, 3, activation_fn=None) up_path4 = slim.conv2d_transpose(path4, 256, 2, 2, activation_fn=None, normalizer_fn=slim.batch_norm)
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 5, block35, scale=0.17) end_points['Mixed_5a'] = net # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 192, 192, 256, 384) end_points['Mixed_6a'] = net # 10 x Inception-Resnet-B net = slim.repeat(net, 10, block17, scale=0.10) end_points['Mixed_6b'] = net # Reduction-B with tf.variable_scope('Mixed_7a'): net = reduction_b(net) end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 5, block8, scale=0.20) end_points['Mixed_8a'] = net net = block8(net, activation_fn=None) end_points['Mixed_8b'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates model Args: inputs: a 4-D tensor of size [batch_size, 32, 32, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): #31 x 31 x 32 net = slim.conv2d(inputs, 32, 3, stride=1, padding='VALID', scope='conv_1_3x3') #15 * 15 * 64 net = slim.conv2d(net, 64, 3, stride=2, padding='VALID', scope='conv_2_3x3') #7 * 7 * 96 net = slim.conv2d(net, 96, 3, stride=2, padding='VALID', scope='conv_3_3x3') #7 * 7 * 96 net = slim.repeat(net, 4, block35, scale=0.17) #4 * 4 * 224 with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 32, 32, 64, 96) net = slim.repeat(net, 4, block8, scale=0.20) with tf.variable_scope('Mixed_7a'): net = reduction_b(net) with tf.variable_scope('Logits'): #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def vgg_19(inputs, scope='vgg_19', reuse=False): """Oxford Net VGG 19-Layers version E Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. Returns: the last op containing the log predictions and end_points dict. """ with tf.variable_scope(scope, 'vgg_19', [inputs], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, 3, scope='conv1', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, 3, scope='conv2', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 4, slim.conv2d, 256, 3, scope='conv3', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv4', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv5', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict( end_points_collection) return net, end_points
def build_network(self, sess, is_training=True): with tf.variable_scope('vgg_16', 'vgg_16'): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer( mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') self._act_summaries.append(net) self._layers['head'] = net # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer( rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer( rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer( rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net, rois, "pool5") else: raise NotImplementedError pool5_flat = slim.flatten(pool5, scope='flatten') fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def build_model(): #### build some layer def LeakyReLU(x, alpha): return tf.nn.relu(x) - alpha * tf.nn.relu(-x) def orientation_loss(y_true, y_pred): # Find number of anchors anchors = tf.reduce_sum(tf.square(y_true), axis=2) anchors = tf.greater(anchors, tf.constant(0.5)) anchors = tf.reduce_sum(tf.cast(anchors, tf.float32), 1) # Define the loss loss = (y_true[:,:,0]*y_pred[:,:,0] + y_true[:,:,1]*y_pred[:,:,1]) loss = tf.reduce_sum((2 - 2 * tf.reduce_mean(loss,axis=0))) / anchors return tf.reduce_mean(loss) ##### #Build Graph with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') conv5 = tf.contrib.layers.flatten(net) #dimension = slim.fully_connected(conv5, 512, scope='fc7_d') dimension = slim.fully_connected(conv5, 512, activation_fn=None, scope='fc7_d') dimension = LeakyReLU(dimension, 0.1) dimension = slim.dropout(dimension, 0.5, scope='dropout7_d') #dimension = slim.fully_connected(dimension, 3, scope='fc8_d') dimension = slim.fully_connected(dimension, 3, activation_fn=None, scope='fc8_d') #dimension = LeakyReLU(dimension, 0.1) #loss_d = tf.reduce_mean(tf.square(d_label - dimension)) loss_d = tf.losses.mean_squared_error(d_label, dimension) #orientation = slim.fully_connected(conv5, 256, scope='fc7_o') orientation = slim.fully_connected(conv5, 256, activation_fn=None, scope='fc7_o') orientation = LeakyReLU(orientation, 0.1) orientation = slim.dropout(orientation, 0.5, scope='dropout7_o') #orientation = slim.fully_connected(orientation, BIN*2, scope='fc8_o') orientation = slim.fully_connected(orientation, BIN*2, activation_fn=None, scope='fc8_o') #orientation = LeakyReLU(orientation, 0.1) orientation = tf.reshape(orientation, [-1, BIN, 2]) orientation = tf.nn.l2_normalize(orientation, dim=2) loss_o = orientation_loss(o_label, orientation) #confidence = slim.fully_connected(conv5, 256, scope='fc7_c') confidence = slim.fully_connected(conv5, 256, activation_fn=None, scope='fc7_c') confidence = LeakyReLU(confidence, 0.1) confidence = slim.dropout(confidence, 0.5, scope='dropout7_c') confidence = slim.fully_connected(confidence, BIN, activation_fn=None, scope='fc8_c') loss_c = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=c_label, logits= confidence)) confidence = tf.nn.softmax(confidence) #loss_c = tf.reduce_mean(tf.square(c_label - confidence)) #loss_c = tf.losses.mean_squared_error(c_label, confidence) total_loss = 4. * loss_d + 8. * loss_o + loss_c optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss) return dimension, orientation, confidence, total_loss, optimizer, loss_d, loss_o, loss_c
def discriminator(self, x, reuse=False): if (reuse): tf.get_variable_scope().reuse_variables() with tf.variable_scope('Discriminator_scope'): with slim.arg_scope( [slim.conv2d], padding='SAME', # weights_initializer=tf.random_normal_initializer(stddev=0.02), weights_initializer=tf.contrib.layers.xavier_initializer(), normalizer_fn=slim.batch_norm, # weights_regularizer=slim.l2_regularizer(0.01) ): x = tf.image.resize_images( images=x, size=[self.D_input_size, self.D_input_size]) net = slim.repeat(x, 2, slim.conv2d, 128, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') net = slim.flatten(net) net = slim.fully_connected(net, 512) D_logit = slim.fully_connected(net, 1, activation_fn=None) D_prob = tf.nn.sigmoid(D_logit) return D_logit, D_prob
def irv1_small2(inputs, is_training=True, dropout_keep_prob=0.8, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 3 x Inception-resnet-A net = slim.repeat(net, 3, block35, scale=0.17) # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 192, 192, 256, 384) end_points['Mixed_6a'] = net # 5 x Inception-Resnet-B net = slim.repeat(net, 5, block17, scale=0.10) with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net return net, end_points
def build_model(self): self.variables_collections = { 'weights': ['weights'], 'biases': ['biases'] } # pre_conv, output shape: 320X320X3 pre_conv = slim.conv2d( self.input_data, 3, [3, 3], scope='pre_conv', biases_initializer=None, weights_initializer=initializers.xavier_initializer(uniform=False), activation_fn=None, variables_collections=self.variables_collections) tf.summary.image("pre_conv_out", pre_conv, max_outputs=3, family="pre_conv") # vgg_16, output shape: 10X10X512 with slim.arg_scope(vgg.vgg_arg_scope()): net, argmax, __ = vgg_16( pre_conv, variables_collections=self.variables_collections) tf.summary.image("channel1", tf.slice(net, [0, 0, 0, 0], [-1, 10, 10, 1]), max_outputs=3, family="vgg_16") tf.summary.image("channel2", tf.slice(net, [0, 0, 0, 1], [-1, 10, 10, 1]), max_outputs=3, family="vgg_16") tf.summary.image("channel3", tf.slice(net, [0, 0, 0, 2], [-1, 10, 10, 1]), max_outputs=3, family="vgg_16") tf.summary.image("channel4", tf.slice(net, [0, 0, 0, 3], [-1, 10, 10, 1]), max_outputs=3, family="vgg_16") # deconv_1, output shape: 10X10X512 deconv_1 = slim.repeat( net, 3, slim.conv2d_transpose, 512, [3, 3], scope='deconv1', weights_initializer=initializers.xavier_initializer(uniform=False), biases_initializer=None, variables_collections=self.variables_collections) tf.summary.image("channel1", tf.slice(deconv_1, [0, 0, 0, 0], [-1, 10, 10, 1]), max_outputs=3, family="deconv1") tf.summary.image("channel2", tf.slice(deconv_1, [0, 0, 0, 1], [-1, 10, 10, 1]), max_outputs=3, family="deconv1") tf.summary.image("channel3", tf.slice(deconv_1, [0, 0, 0, 2], [-1, 10, 10, 1]), max_outputs=3, family="deconv1") tf.summary.image("channel4", tf.slice(deconv_1, [0, 0, 0, 3], [-1, 10, 10, 1]), max_outputs=3, family="deconv1") # unpool_1, output shape: 20X20X512 unpool_1 = unpool(deconv_1, argmax[4], shape=[-1, 20, 20, 512], scope='unpool1') tf.summary.image("channel1", tf.slice(unpool_1, [0, 0, 0, 0], [-1, 20, 20, 1]), max_outputs=3, family="unpool1") tf.summary.image("channel2", tf.slice(unpool_1, [0, 0, 0, 1], [-1, 20, 20, 1]), max_outputs=3, family="unpool1") tf.summary.image("channel3", tf.slice(unpool_1, [0, 0, 0, 2], [-1, 20, 20, 1]), max_outputs=3, family="unpool1") tf.summary.image("channel4", tf.slice(unpool_1, [0, 0, 0, 3], [-1, 20, 20, 1]), max_outputs=3, family="unpool1") # deconv_2, output shape: 20X20X512 deconv_2 = slim.repeat( unpool_1, 3, slim.conv2d_transpose, 512, [3, 3], scope='deconv2', weights_initializer=initializers.xavier_initializer(uniform=False), biases_initializer=None, variables_collections=self.variables_collections) tf.summary.image("channel1", tf.slice(deconv_2, [0, 0, 0, 0], [-1, 20, 20, 1]), max_outputs=3, family="deconv2") tf.summary.image("channel2", tf.slice(deconv_2, [0, 0, 0, 1], [-1, 20, 20, 1]), max_outputs=3, family="deconv2") tf.summary.image("channel3", tf.slice(deconv_2, [0, 0, 0, 2], [-1, 20, 20, 1]), max_outputs=3, family="deconv2") tf.summary.image("channel4", tf.slice(deconv_2, [0, 0, 0, 3], [-1, 20, 20, 1]), max_outputs=3, family="deconv2") # unpool_2, output shape: 40X40X512 unpool_2 = unpool(deconv_2, argmax[3], shape=[-1, 40, 40, 512], scope='unpool2') tf.summary.image("channel1", tf.slice(unpool_2, [0, 0, 0, 0], [-1, 40, 40, 1]), max_outputs=3, family="unpool2") tf.summary.image("channel2", tf.slice(unpool_2, [0, 0, 0, 1], [-1, 40, 40, 1]), max_outputs=3, family="unpool2") tf.summary.image("channel3", tf.slice(unpool_2, [0, 0, 0, 2], [-1, 40, 40, 1]), max_outputs=3, family="unpool2") tf.summary.image("channel4", tf.slice(unpool_2, [0, 0, 0, 3], [-1, 40, 40, 1]), max_outputs=3, family="unpool2") # deconv_3, output shape: 40X40x256 deconv_3 = slim.repeat( unpool_2, 3, slim.conv2d_transpose, 256, [3, 3], scope='deconv3', weights_initializer=initializers.xavier_initializer(uniform=False), biases_initializer=None, variables_collections=self.variables_collections) tf.summary.image("channel1", tf.slice(deconv_3, [0, 0, 0, 0], [-1, 40, 40, 1]), max_outputs=3, family="deconv3") tf.summary.image("channel2", tf.slice(deconv_3, [0, 0, 0, 1], [-1, 40, 40, 1]), max_outputs=3, family="deconv3") tf.summary.image("channel3", tf.slice(deconv_3, [0, 0, 0, 2], [-1, 40, 40, 1]), max_outputs=3, family="deconv3") tf.summary.image("channel4", tf.slice(deconv_3, [0, 0, 0, 3], [-1, 40, 40, 1]), max_outputs=3, family="deconv3") # unpool_3, output shape: 80X80X256 unpool_3 = unpool(deconv_3, argmax[2], shape=[-1, 80, 80, 256], scope='unpool3') tf.summary.image("channel1", tf.slice(unpool_3, [0, 0, 0, 0], [-1, 80, 80, 1]), max_outputs=3, family="unpool3") tf.summary.image("channel2", tf.slice(unpool_3, [0, 0, 0, 1], [-1, 80, 80, 1]), max_outputs=3, family="unpool3") tf.summary.image("channel3", tf.slice(unpool_3, [0, 0, 0, 2], [-1, 80, 80, 1]), max_outputs=3, family="unpool3") tf.summary.image("channel4", tf.slice(unpool_3, [0, 0, 0, 3], [-1, 80, 80, 1]), max_outputs=3, family="unpool3") # deconv_4, output shape: 80X80X128 deconv_4 = slim.repeat( unpool_3, 2, slim.conv2d_transpose, 128, [3, 3], scope='deconv4', weights_initializer=initializers.xavier_initializer(uniform=False), biases_initializer=None, variables_collections=self.variables_collections) tf.summary.image("channel1", tf.slice(deconv_4, [0, 0, 0, 0], [-1, 80, 80, 1]), max_outputs=3, family="deconv4") tf.summary.image("channel2", tf.slice(deconv_4, [0, 0, 0, 1], [-1, 80, 80, 1]), max_outputs=3, family="deconv4") tf.summary.image("channel3", tf.slice(deconv_4, [0, 0, 0, 2], [-1, 80, 80, 1]), max_outputs=3, family="deconv4") tf.summary.image("channel4", tf.slice(deconv_4, [0, 0, 0, 3], [-1, 80, 80, 1]), max_outputs=3, family="deconv4") # unpool_4, output shape: 160X160X128 unpool_4 = unpool(deconv_4, argmax[1], shape=[-1, 160, 160, 128], scope='unpool4') tf.summary.image("channel1", tf.slice(unpool_4, [0, 0, 0, 0], [-1, 160, 160, 1]), max_outputs=3, family="unpool4") tf.summary.image("channel2", tf.slice(unpool_4, [0, 0, 0, 1], [-1, 160, 160, 1]), max_outputs=3, family="unpool4") tf.summary.image("channel3", tf.slice(unpool_4, [0, 0, 0, 2], [-1, 160, 160, 1]), max_outputs=3, family="unpool4") tf.summary.image("channel4", tf.slice(unpool_4, [0, 0, 0, 3], [-1, 160, 160, 1]), max_outputs=3, family="unpool4") # deconv_5, output shape: 160X160X64 deconv_5 = slim.repeat( unpool_4, 2, slim.conv2d_transpose, 64, [3, 3], scope='deconv5', weights_initializer=initializers.xavier_initializer(uniform=False), biases_initializer=None, variables_collections=self.variables_collections) tf.summary.image("channel1", tf.slice(deconv_5, [0, 0, 0, 0], [-1, 160, 160, 1]), max_outputs=3, family="deconv5") tf.summary.image("channel2", tf.slice(deconv_5, [0, 0, 0, 1], [-1, 160, 160, 1]), max_outputs=3, family="deconv5") tf.summary.image("channel3", tf.slice(deconv_5, [0, 0, 0, 2], [-1, 160, 160, 1]), max_outputs=3, family="deconv5") tf.summary.image("channel4", tf.slice(deconv_5, [0, 0, 0, 3], [-1, 160, 160, 1]), max_outputs=3, family="deconv5") # unpool_5, output shape: 320X320X64 unpool_5 = unpool(deconv_5, argmax[0], shape=[-1, 320, 320, 64], scope='unpool5') tf.summary.image("channel1", tf.slice(unpool_5, [0, 0, 0, 0], [-1, 320, 320, 1]), max_outputs=3, family="unpool5") tf.summary.image("channel2", tf.slice(unpool_5, [0, 0, 0, 1], [-1, 320, 320, 1]), max_outputs=3, family="unpool5") tf.summary.image("channel3", tf.slice(unpool_5, [0, 0, 0, 2], [-1, 320, 320, 1]), max_outputs=3, family="unpool5") tf.summary.image("channel4", tf.slice(unpool_5, [0, 0, 0, 3], [-1, 320, 320, 1]), max_outputs=3, family="unpool5") # conv, output shape: 320X320X2 conv = slim.conv2d( unpool_5, 64, [3, 3], scope='conv1', biases_initializer=None, weights_initializer=initializers.xavier_initializer(uniform=False), variables_collections=self.variables_collections) conv = slim.dropout(conv, keep_prob=0.8, is_training=self.is_training, scope='dropout1') conv = slim.conv2d( conv, 2, [3, 3], scope='conv2', biases_initializer=None, weights_initializer=initializers.xavier_initializer(uniform=False), variables_collections=self.variables_collections) conv_1 = tf.slice(conv, [0, 0, 0, 0], [-1, self.image_height, self.image_height, 1]) conv_2 = tf.slice(conv, [0, 0, 0, 1], [-1, self.image_height, self.image_height, 1]) tf.summary.image("channel1", conv_1, max_outputs=3, family="conv") tf.summary.image("channel2", conv_2, max_outputs=3, family="conv") # final result with tf.name_scope("result"): output = tf.nn.softmax(conv) output = tf.expand_dims(tf.argmax(output, axis=3, output_type=tf.int32), axis=3) result = tf.cast(output, tf.uint8) tf.summary.image("segmentation", result, max_outputs=3) self.logits = conv self.output = output weights = ops.get_collection("weights") for weight in weights: L = weight.name.split('/') name = L[-2] + '/' + L[-1] family = L[0] tf.summary.histogram(name=name, values=weight, family=family)
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 64 x 64 x 16 net = slim.conv2d(inputs, 16, 3, stride=2, padding='SAME', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 64 x 64 x 16 # net = slim.conv2d(net, 16, 3, padding='SAME', # scope='Conv2d_2a_3x3') # end_points['Conv2d_2a_3x3'] = net # 64 x 64 x 32 net = slim.conv2d(net, 32, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 32 x 32 x 32 net = slim.max_pool2d(net, 3, stride=2, padding='SAME', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 32 x 32 x 40 net = slim.conv2d(net, 40, 1, padding='SAME', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 32 x 32 x 96 net = slim.conv2d(net, 96, 3, padding='SAME', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 32 x 32 x 128 net = slim.conv2d(net, 128, 3, stride=2, padding='SAME', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 1 x Inception-resnet-A net = slim.repeat(net, 2, block35, scale=0.17) # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 96, 96, 128, 192) end_points['Mixed_6a'] = net # 1 x Inception-Resnet-B net = slim.repeat(net, 2, block17, scale=0.10) # Reduction-B with tf.variable_scope('Mixed_7a'): net = reduction_b(net, 128) end_points['Mixed_7a'] = net # 1 x Inception-Resnet-C net = slim.repeat(net, 2, block8, scale=0.20) net = block8(net, kernel_size=96, activation_fn=None) with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net return net, end_points
def text_net(inputs, feat_layers=TextboxNet.default_params.feat_layers, anchor_sizes=TextboxNet.default_params.anchor_sizes, anchor_ratios=TextboxNet.default_params.anchor_ratios, normalizations=TextboxNet.default_params.normalizations, is_training=True, dropout_keep_prob=0.5, reuse=None, scope='text_box_384'): end_points = {} with tf.compat.v1.variable_scope(scope, 'text_box_300', [inputs], reuse=reuse): # 300*300 384*383 # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') # 300 384 end_points['conv1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # 150 # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') # 150 192 end_points['conv2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # 75 # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') # 75 81 end_points['conv3'] = net net = slim.max_pool2d(net, [2, 2], scope='pool3') # 38 # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') # 38 40 end_point = 'conv4' end_points[end_point] = net net = slim.max_pool2d(net, [2, 2], scope='pool4') # 19 # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') # 19 end_points['conv5'] = net net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5') # 19 # Additional SSD blocks. # Block 6: let's dilate the hell out of it! net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') # 19 end_points['conv6'] = net # Block 7: 1x1 conv. Because the f**k. net = slim.conv2d(net, 1024, [1, 1], scope='conv7') # 19 end_point = 'conv7' end_points[end_point] = net # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 end_point = 'conv8' with tf.compat.v1.variable_scope(end_point): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net # 10 end_point = 'conv9' with tf.compat.v1.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = custom_layers.pad2d(net, pad=(1, 1)) net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID') end_points[end_point] = net # 5 end_point = 'conv10' with tf.compat.v1.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1', padding='VALID') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # 3 end_point = 'conv11' with tf.compat.v1.variable_scope(end_point): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID') end_points[end_point] = net # end_point = feat_layers[0] with tf.compat.v1.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 128, [3, 3], stride=1, scope='dilation1') # net_dilation2 = custom_layers.pad2d(net, pad=(0, 4)) net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 9], padding='SAME', stride=1, scope='dilation2') net_dilation3 = slim.conv2d(end_points[end_point], 128, [9, 1], stride=1, padding='SAME', scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(4, 0)) net_inception = tf.concat( values=[net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception end_point = feat_layers[1] with tf.compat.v1.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 1024, [1, 1], stride=1, scope='dilation1') net_dilation2 = slim.conv2d(end_points[end_point], 1024, [1, 7], stride=1, scope='dilation2') # net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3)) net_dilation3 = slim.conv2d(end_points[end_point], 1024, [7, 1], stride=1, scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0)) net_inception = tf.concat( [net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception end_point = 'conv8' with tf.compat.v1.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1, scope='dilation1') net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 7], stride=1, scope='dilation2') # net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3)) net_dilation3 = slim.conv2d(end_points[end_point], 128, [7, 1], stride=1, scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0)) net_inception = tf.concat( [net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception end_point = feat_layers[3] with tf.compat.v1.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1, scope='dilation1') net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 7], stride=1, scope='dilation2') # net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3)) net_dilation3 = slim.conv2d(end_points[end_point], 128, [7, 1], stride=1, scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0)) net_inception = tf.concat( [net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception # 5 end_point = 'conv10' with tf.compat.v1.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1, scope='dilation1') net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 7], stride=1, scope='dilation2') # net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 3)) net_dilation3 = slim.conv2d(end_points[end_point], 128, [7, 1], stride=1, scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(3, 0)) net_inception = tf.concat( [net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception # 3 end_point = 'conv11' with tf.compat.v1.variable_scope(end_point): net_dilation1 = slim.conv2d(end_points[end_point], 128, [1, 1], stride=1, scope='dilation1') net_dilation2 = slim.conv2d(end_points[end_point], 128, [1, 5], stride=1, scope='dilation2') # net_dilation2 = custom_layers.pad2d(net_dilation2, pad=(0, 2)) net_dilation3 = slim.conv2d(end_points[end_point], 128, [5, 1], stride=1, scope='dilation3') # net_dilation3 = custom_layers.pad2d(net_dilation3, pad=(2, 0)) net_inception = tf.concat( [net_dilation1, net_dilation2, net_dilation3], axis=3) end_points[end_point] = net_inception # 1 # Prediction and localisations layers. predictions = [] logits = [] localisations = [] for i, layer in enumerate(feat_layers): with tf.compat.v1.variable_scope(layer + '_box'): p, loc = text_multibox_layer(layer, end_points[layer], anchor_sizes[i], anchor_ratios[i], normalizations[i]) prediction_fn = slim.softmax predictions.append(prediction_fn(p)) logits.append(p) localisations.append(loc) return predictions, localisations, logits, end_points
def refine_by_decoder(features, end_points, decoder_height, decoder_width, decoder_use_separable_conv=False, model_variant=None, weight_decay=0.0001, reuse=None, is_training=False, fine_tune_batch_norm=False): """Adds the decoder to obtain sharper segmentation results. Args: features: A tensor of size [batch, features_height, features_width, features_channels]. end_points: A dictionary from components of the network to the corresponding activation. decoder_height: The height of decoder feature maps. decoder_width: The width of decoder feature maps. decoder_use_separable_conv: Employ separable convolution for decoder or not. model_variant: Model variant for feature extraction. weight_decay: The weight decay for model variables. reuse: Reuse the model variables or not. is_training: Is training or not. fine_tune_batch_norm: Fine-tune the batch norm parameters or not. Returns: Decoder output with size [batch, decoder_height, decoder_width, decoder_channels]. """ batch_norm_params = { 'is_training': is_training and fine_tune_batch_norm, 'decay': 0.9997, 'epsilon': 1e-5, 'scale': True, } with slim.arg_scope( [slim.conv2d, slim.separable_conv2d], weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, padding='SAME', stride=1, reuse=reuse): with slim.arg_scope([slim.batch_norm], **batch_norm_params): with tf.variable_scope(_DECODER_SCOPE, _DECODER_SCOPE, [features]): feature_list = feature_extractor.networks_to_feature_maps[ model_variant][feature_extractor.DECODER_END_POINTS] if feature_list is None: tf.logging.info('Not found any decoder end points.') return features else: decoder_features = features for i, name in enumerate(feature_list): decoder_features_list = [decoder_features] # MobileNet variants use different naming convention. if 'mobilenet' in model_variant: feature_name = name else: feature_name = '{}/{}'.format( feature_extractor.name_scope[model_variant], name) decoder_features_list.append( slim.conv2d(end_points[feature_name], 48, 1, scope='feature_projection'+str(i))) # Resize to decoder_height/decoder_width. for j, feature in enumerate(decoder_features_list): decoder_features_list[j] = tf.image.resize_bilinear( feature, [decoder_height, decoder_width], align_corners=True) decoder_features_list[j].set_shape( [None, decoder_height, decoder_width, None]) decoder_depth = 256 if decoder_use_separable_conv: decoder_features = _split_separable_conv2d( tf.concat(decoder_features_list, 3), filters=decoder_depth, rate=1, weight_decay=weight_decay, scope='decoder_conv0') decoder_features = _split_separable_conv2d( decoder_features, filters=decoder_depth, rate=1, weight_decay=weight_decay, scope='decoder_conv1') else: num_convs = 2 decoder_features = slim.repeat( tf.concat(decoder_features_list, 3), num_convs, slim.conv2d, decoder_depth, 3, scope='decoder_conv'+str(i)) return decoder_features