def inference(image_batch, keep_probability, phase_train=True, bottleneck_layer_size=512, weight_decay=0.0): with tf.variable_scope('LResnetE_IR'): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), biases_initializer=None, #default no biases activation_fn=None, normalizer_fn=None ): with slim.arg_scope([slim.conv2d], kernel_size=3): with slim.arg_scope([slim.batch_norm], decay=0.995, epsilon=1e-5, scale=True, is_training=phase_train, activation_fn=prelu, updates_collections=None, variables_collections=[ tf.GraphKeys.TRAINABLE_VARIABLES ] ): return LResnet50E_IR(images=image_batch, keep_probability=keep_probability, phase_train=phase_train, bottleneck_layer_size=bottleneck_layer_size, reuse=None)
def get_network_byname(net_name, inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, spatial_squeeze=True): if net_name not in ['resnet_v1_50', 'mobilenet_224', 'inception_resnet', 'vgg16', 'resnet_v1_101']: raise ValueError('''not include network: {}, net_name must in [resnet_v1_50, mobilenet_224, inception_resnet, vgg16, resnet_v1_101] '''.format(net_name)) if net_name == 'resnet_v1_50': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=cfgs.WEIGHT_DECAY[net_name])): logits, end_points = resnet_v1.resnet_v1_50(inputs=inputs, num_classes=num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, spatial_squeeze=spatial_squeeze ) return logits, end_points if net_name == 'resnet_v1_101': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=cfgs.WEIGHT_DECAY[net_name])): logits, end_points = resnet_v1.resnet_v1_101(inputs=inputs, num_classes=num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, spatial_squeeze=spatial_squeeze ) return logits, end_points
def mobilenet_v1_arg_scope(is_training=True, stddev=0.09): batch_norm_params = { 'is_training': False, 'center': True, 'scale': True, 'decay': 0.9997, 'epsilon': 0.001, 'trainable': False, } # Set weight_decay for weights in Conv and DepthSepConv layers. weights_init = tf.truncated_normal_initializer(stddev=stddev) regularizer = tf.contrib.layers.l2_regularizer(cfg.MOBILENET.WEIGHT_DECAY) if cfg.MOBILENET.REGU_DEPTH: depthwise_regularizer = regularizer else: depthwise_regularizer = None with slim.arg_scope([slim.conv2d, slim.separable_conv2d], trainable=is_training, weights_initializer=weights_init, activation_fn=tf.nn.relu6, normalizer_fn=slim.batch_norm, padding='SAME'): with slim.arg_scope([slim.batch_norm], **batch_norm_params): with slim.arg_scope([slim.conv2d], weights_regularizer=regularizer): with slim.arg_scope([slim.separable_conv2d], weights_regularizer=depthwise_regularizer) as sc: return sc
def inference(image_batch, keep_probability, phase_train=True, bottleneck_layer_size=512, weight_decay=0.0): batch_norm_params = { 'decay': 0.995, 'epsilon': 0.001, 'scale':True, 'is_training': phase_train, 'updates_collections': None, 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ] } with tf.variable_scope('Resface'): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=prelu, normalizer_fn=slim.batch_norm, #normalizer_fn=None, normalizer_params=batch_norm_params): with slim.arg_scope([slim.conv2d], kernel_size=3): return resface20(images=image_batch, keep_probability=keep_probability, phase_train=phase_train, bottleneck_layer_size=bottleneck_layer_size, reuse=None)
def resnet_arg_scope(is_training=True, weight_decay=cfg.TRAIN.WEIGHT_DECAY, batch_norm_decay=0.997, batch_norm_epsilon=1e-5, batch_norm_scale=True): batch_norm_params = { # NOTE 'is_training' here does not work because inside resnet it gets reset: # https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py#L187 'is_training': False, 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale, 'trainable': cfg.RESNET.BN_TRAIN, 'updates_collections': ops.GraphKeys.UPDATE_OPS } with arg_scope( [slim.conv2d], weights_regularizer=regularizers.l2_regularizer(weight_decay), weights_initializer=initializers.variance_scaling_initializer(), trainable=is_training, activation_fn=nn_ops.relu, normalizer_fn=layers.batch_norm, normalizer_params=batch_norm_params): with arg_scope([layers.batch_norm], **batch_norm_params) as arg_sc: return arg_sc
def conv_tower_fn(self, images, is_training=True, reuse=None): """Computes convolutional features using the InceptionV3 model. Args: images: A tensor of shape [batch_size, height, width, channels]. is_training: whether is training or not. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. Returns: A tensor of shape [batch_size, OH, OW, N], where OWxOH is resolution of output feature map and N is number of output features (depends on the network architecture). """ mparams = self._mparams['conv_tower_fn'] logging.debug('Using final_endpoint=%s', mparams.final_endpoint) with tf.variable_scope('conv_tower_fn/INCE'): if reuse: tf.get_variable_scope().reuse_variables() with slim.arg_scope(inception.inception_v3_arg_scope()): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): net, _ = inception.inception_v3_base( images, final_endpoint=mparams.final_endpoint) return net
def _image_to_head(self, is_training, reuse=None): # Base bottleneck assert (0 <= cfg.MOBILENET.FIXED_LAYERS <= 12) net_conv = self._image if cfg.MOBILENET.FIXED_LAYERS > 0: with slim.arg_scope(mobilenet_v1_arg_scope(is_training=False)): net_conv = mobilenet_v1_base(net_conv, _CONV_DEFS[:cfg.MOBILENET.FIXED_LAYERS], starting_layer=0, depth_multiplier=self._depth_multiplier, reuse=reuse, scope=self._scope) if cfg.MOBILENET.FIXED_LAYERS < 12: with slim.arg_scope(mobilenet_v1_arg_scope(is_training=is_training)): net_conv = mobilenet_v1_base(net_conv, _CONV_DEFS[cfg.MOBILENET.FIXED_LAYERS:12], starting_layer=cfg.MOBILENET.FIXED_LAYERS, depth_multiplier=self._depth_multiplier, reuse=reuse, scope=self._scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv return net_conv
def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('encoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = images net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.flatten(net) fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2
def decoder(self, latent_var, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('decoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') net = tf.reshape(net, [-1,4,4,256], name='Reshape') net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1') net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2') net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3') net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4c') return net
def _image_to_head(self, is_training, reuse=None): assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3) # Now the base is always fixed during training with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv = self._build_base() if cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) if cfg.RESNET.FIXED_BLOCKS < 3: with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv, _ = resnet_v1.resnet_v1(net_conv, self._blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, reuse=reuse, scope=self._scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv return net_conv
def content_extractor(self, images, reuse=False): # images: (batch, 32, 32, 3) or (batch, 32, 32, 1) if images.get_shape()[3] == 1: # For mnist dataset, replicate the gray scale image 3 times. images = tf.image.grayscale_to_rgb(images) with tf.variable_scope('content_extractor', reuse=reuse): with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=None, stride=2, weights_initializer=tf.contrib.layers.xavier_initializer()): with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, activation_fn=tf.nn.relu, is_training=(self.mode=='train' or self.mode=='pretrain')): net = slim.conv2d(images, 64, [3, 3], scope='conv1') # (batch_size, 16, 16, 64) net = slim.batch_norm(net, scope='bn1') net = slim.conv2d(net, 128, [3, 3], scope='conv2') # (batch_size, 8, 8, 128) net = slim.batch_norm(net, scope='bn2') net = slim.conv2d(net, 256, [3, 3], scope='conv3') # (batch_size, 4, 4, 256) net = slim.batch_norm(net, scope='bn3') net = slim.conv2d(net, 128, [4, 4], padding='VALID', scope='conv4') # (batch_size, 1, 1, 128) net = slim.batch_norm(net, activation_fn=tf.nn.tanh, scope='bn4') if self.mode == 'pretrain': net = slim.conv2d(net, 10, [1, 1], padding='VALID', scope='out') net = slim.flatten(net) return net
def factory_fn(image, reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=False): with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.batch_norm, slim.layer_norm], reuse=reuse): features, logits = _create_network( image, reuse=reuse, weight_decay=weight_decay) return features, logits
def _build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) # Base bottleneck assert (0 <= cfg.MOBILENET.FIXED_LAYERS <= 12) net_conv = self._image if cfg.MOBILENET.FIXED_LAYERS > 0: with slim.arg_scope(mobilenet_v1_arg_scope(is_training=False)): net_conv = mobilenet_v1_base(net_conv, _CONV_DEFS[:cfg.MOBILENET.FIXED_LAYERS], starting_layer=0, depth_multiplier=self._depth_multiplier, scope=self._scope) if cfg.MOBILENET.FIXED_LAYERS < 12: with slim.arg_scope(mobilenet_v1_arg_scope(is_training=is_training)): net_conv = mobilenet_v1_base(net_conv, _CONV_DEFS[cfg.MOBILENET.FIXED_LAYERS:12], starting_layer=cfg.MOBILENET.FIXED_LAYERS, depth_multiplier=self._depth_multiplier, scope=self._scope) self._act_summaries.append(net_conv) self._layers['head'] = net_conv with tf.variable_scope(self._scope, 'MobilenetV1'): # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net_conv, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(mobilenet_v1_arg_scope(is_training=is_training)): fc7 = mobilenet_v1_base(pool5, _CONV_DEFS[12:], starting_layer=12, depth_multiplier=self._depth_multiplier, scope=self._scope) with tf.variable_scope(self._scope, 'MobilenetV1'): # average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) # region classification cls_prob, bbox_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def construct_embedding(self): """Builds a conv -> spatial softmax -> FC adaptation network.""" is_training = self._is_training normalizer_params = {'is_training': is_training} with tf.variable_scope('tcn_net', reuse=self._reuse) as vs: self._adaptation_scope = vs.name with slim.arg_scope( [slim.layers.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params, weights_regularizer=slim.regularizers.l2_regularizer( self._l2_reg_weight), biases_regularizer=slim.regularizers.l2_regularizer( self._l2_reg_weight)): with slim.arg_scope( [slim.layers.fully_connected], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params, weights_regularizer=slim.regularizers.l2_regularizer( self._l2_reg_weight), biases_regularizer=slim.regularizers.l2_regularizer( self._l2_reg_weight)): # Input to embedder is pre-trained inception output. net = self._pretrained_output # Optionally add more conv layers. for num_filters in self._additional_conv_sizes: net = slim.layers.conv2d( net, num_filters, kernel_size=[3, 3], stride=[1, 1]) net = slim.dropout(net, keep_prob=self._conv_hidden_keep_prob, is_training=is_training) # Take the spatial soft arg-max of the last convolutional layer. # This is a form of spatial attention over the activations. # See more here: http://arxiv.org/abs/1509.06113. net = tf.contrib.layers.spatial_softmax(net) self.spatial_features = net # Add fully connected layers. net = slim.layers.flatten(net) for fc_hidden_size in self._fc_hidden_sizes: net = slim.layers.fully_connected(net, fc_hidden_size) if self._fc_hidden_keep_prob < 1.0: net = slim.dropout(net, keep_prob=self._fc_hidden_keep_prob, is_training=is_training) # Connect last FC layer to embedding. net = slim.layers.fully_connected(net, self._embedding_size, activation_fn=None) # Optionally L2 normalize the embedding. if self._embedding_l2: net = tf.nn.l2_normalize(net, dim=1) return net
def factory_fn(image, reuse, l2_normalize): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.batch_norm, slim.layer_norm], reuse=reuse): features, logits = _create_network( image, num_classes, l2_normalize=l2_normalize, reuse=reuse, create_summaries=is_training, weight_decay=weight_decay) return features, logits
def image_embedding(images, model_fn=resnet_v1_152, trainable=True, is_training=True, weight_decay=0.0001, batch_norm_decay=0.997, batch_norm_epsilon=1e-5, batch_norm_scale=True, add_summaries=False, reuse=False): """Extract image features from pretrained resnet model.""" is_resnet_training = trainable and is_training batch_norm_params = { "is_training": is_resnet_training, "trainable": trainable, "decay": batch_norm_decay, "epsilon": batch_norm_epsilon, "scale": batch_norm_scale, } if trainable: weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) else: weights_regularizer = None with tf.variable_scope(model_fn.__name__, [images], reuse=reuse) as scope: with slim.arg_scope( [slim.conv2d], weights_regularizer=weights_regularizer, trainable=trainable): with slim.arg_scope( [slim.conv2d], weights_initializer=slim.variance_scaling_initializer(), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with slim.arg_scope([slim.batch_norm], is_training=is_resnet_training, trainable=trainable): with slim.arg_scope([slim.max_pool2d], padding="SAME"): net, end_points = model_fn( images, num_classes=None, global_pool=False, is_training=is_resnet_training, reuse=reuse, scope=scope) if add_summaries: for v in end_points.values(): tf.contrib.layers.summaries.summarize_activation(v) return net
def model(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def build_inceptionv3_graph(images, endpoint, is_training, checkpoint, reuse=False): """Builds an InceptionV3 model graph. Args: images: A 4-D float32 `Tensor` of batch images. endpoint: String, name of the InceptionV3 endpoint. is_training: Boolean, whether or not to build a training or inference graph. checkpoint: String, path to the pretrained model checkpoint. reuse: Boolean, whether or not we are reusing the embedder. Returns: inception_output: `Tensor` holding the InceptionV3 output. inception_variables: List of inception variables. init_fn: Function to initialize the weights (if not reusing, then None). """ with slim.arg_scope(inception.inception_v3_arg_scope()): _, endpoints = inception.inception_v3( images, num_classes=1001, is_training=is_training) inception_output = endpoints[endpoint] inception_variables = slim.get_variables_to_restore() inception_variables = [ i for i in inception_variables if 'global_step' not in i.name] if is_training and not reuse: init_saver = tf.train.Saver(inception_variables) def init_fn(scaffold, sess): del scaffold init_saver.restore(sess, checkpoint) else: init_fn = None return inception_output, inception_variables, init_fn
def generator(self, inputs, reuse=False): # inputs: (batch, 1, 1, 128) with tf.variable_scope('generator', reuse=reuse): with slim.arg_scope([slim.conv2d_transpose], padding='SAME', activation_fn=None, stride=2, weights_initializer=tf.contrib.layers.xavier_initializer()): with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, activation_fn=tf.nn.relu, is_training=(self.mode=='train')): net = slim.conv2d_transpose(inputs, 512, [4, 4], padding='VALID', scope='conv_transpose1') # (batch_size, 4, 4, 512) net = slim.batch_norm(net, scope='bn1') net = slim.conv2d_transpose(net, 256, [3, 3], scope='conv_transpose2') # (batch_size, 8, 8, 256) net = slim.batch_norm(net, scope='bn2') net = slim.conv2d_transpose(net, 128, [3, 3], scope='conv_transpose3') # (batch_size, 16, 16, 128) net = slim.batch_norm(net, scope='bn3') net = slim.conv2d_transpose(net, 1, [3, 3], activation_fn=tf.nn.tanh, scope='conv_transpose4') # (batch_size, 32, 32, 1) return net
def inference (images, train=True, resnet_stride=8): with slim.arg_scope(resnet_v1.resnet_arg_scope(train)): net, end_points = resnet_v1_slim(images, num_classes = None, global_pool = False, output_stride = resnet_stride) # replace resnet_v1_slim above with resnet_v1.resnet_v1_50/101/... # to use standard architectures. # num_classes: Number of predicted classes for classification tasks. If None # we return the features before the logit layer. # global_pool: If True, we perform global average pooling before computing the # logits. Set to True for image classification, False for dense prediction. # output_stride: If None, then the output will be computed at the nominal # network stride. If output_stride is not None, it specifies the requested # ratio of input to output spatial resolution. resnet_depth = utils.last_dimension(net.get_shape(), min_rank=4) shape = tf.unpack(tf.shape(images)) print(shape.__class__) shape.pop() shape.append(tf.constant(FLAGS.out_channels, dtype=tf.int32)) print(len(shape)) filters = tf.Variable( tf.truncated_normal( [resnet_stride*2+1, resnet_stride*2+1, FLAGS.out_channels, resnet_depth], dtype=tf.float32, stddev=0.01), name='filters') logits = tf.nn.conv2d_transpose(net, filters, tf.pack(shape), [1,resnet_stride,resnet_stride,1], padding='SAME', name='upscale') return logits
def conv_net_kelz(inputs): """Builds the ConvNet from Kelz 2016.""" with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.variance_scaling_initializer( factor=2.0, mode='FAN_AVG', uniform=True)): net = slim.conv2d( inputs, 32, [3, 3], scope='conv1', normalizer_fn=slim.batch_norm) net = slim.conv2d( net, 32, [3, 3], scope='conv2', normalizer_fn=slim.batch_norm) net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool2') net = slim.dropout(net, 0.25, scope='dropout2') net = slim.conv2d( net, 64, [3, 3], scope='conv3', normalizer_fn=slim.batch_norm) net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool3') net = slim.dropout(net, 0.25, scope='dropout3') # Flatten while preserving batch and time dimensions. dims = tf.shape(net) net = tf.reshape(net, (dims[0], dims[1], net.shape[2].value * net.shape[3].value), 'flatten4') net = slim.fully_connected(net, 512, scope='fc5') net = slim.dropout(net, 0.5, scope='dropout5') return net
def create_network(self, name): with tf.variable_scope(name) as scope: inputs = tf.placeholder(fl32, [None, self.state_dim], 'inputs') with slim.arg_scope( [slim.fully_connected], activation_fn=relu, weights_initializer=uniform, weights_regularizer=None ): net = slim.fully_connected(inputs, 1024) res = net = slim.fully_connected(net, 128) net = slim.fully_connected(net, 256) net = slim.fully_connected(net, 128, activation_fn=None) net = relu(net+res) res = net = slim.fully_connected(net, 128) net = slim.fully_connected(net, 256) net = slim.fully_connected(net, 128, activation_fn=None) net = relu(net+res) res = net = slim.fully_connected(net, 128) net = slim.fully_connected(net, 256) net = slim.fully_connected(net, 128, activation_fn=None) net = relu(net+res) outputs = slim.fully_connected( net, self.action_dim, activation_fn=tanh) outputs = tf.mul(outputs, self.bound) return (inputs, outputs, scope.name)
def create_model(self, images, num_classes, weight_decay=0.00004, scope='Flowers', reuse=None, is_training=True): """Creates a base part of the Model (no gradients, no loss, no summaries). Args: images: A tensor of size [batch_size, height, width, channels]. num_classes: The number of predicted classes. scope: Optional variable_scope. reuse: Whether or not the network or its variables should be reused. To be able to reuse 'scope' must be given. is_training: Whether is training or not. Returns: A named tuple OutputEndpoints. """ with tf.variable_scope(scope, [images], reuse=reuse): with slim.arg_scope(inception_v3.inception_v3_arg_scope(weight_decay=weight_decay)): logits, endpoints = inception_v3.inception_v3( inputs = images, num_classes=num_classes, is_training=is_training) return logits, endpoints
def _extra_conv_arg_scope(weight_decay=0.00001, activation_fn=None, normalizer_fn=None): with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], padding='SAME', weights_regularizer=slim.l2_regularizer(weight_decay), weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=activation_fn, normalizer_fn=normalizer_fn,) as arg_sc: with slim.arg_scope( [slim.fully_connected], weights_regularizer=slim.l2_regularizer(weight_decay), weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=activation_fn, normalizer_fn=normalizer_fn) as arg_sc: return arg_sc
def build_feature_pyramid(self): ''' reference: https://github.com/CharlesShang/FastMaskRCNN build P2, P3, P4, P5, P6 :return: multi-scale feature map ''' feature_pyramid = {} with tf.variable_scope('feature_pyramid'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(self.rpn_weight_decay)): feature_pyramid['P5'] = slim.conv2d(self.feature_maps_dict['C5'], num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P5') feature_pyramid['P6'] = slim.max_pool2d(feature_pyramid['P5'], kernel_size=[2, 2], stride=2, scope='build_P6') # P6 is down sample of P5 for layer in range(4, 1, -1): p, c = feature_pyramid['P' + str(layer + 1)], self.feature_maps_dict['C' + str(layer)] up_sample_shape = tf.shape(c) up_sample = tf.image.resize_nearest_neighbor(p, [up_sample_shape[1], up_sample_shape[2]], name='build_P%d/up_sample_nearest_neighbor' % layer) c = slim.conv2d(c, num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P%d/reduce_dimension' % layer) p = up_sample + c p = slim.conv2d(p, 256, kernel_size=[3, 3], stride=1, padding='SAME', scope='build_P%d/avoid_aliasing' % layer) feature_pyramid['P' + str(layer)] = p return feature_pyramid
def build_graph(self, image, label): image = tf.expand_dims(image, 3) image = image * 2 - 1 is_training = get_current_tower_context().is_training with slim.arg_scope([slim.layers.fully_connected], weights_regularizer=slim.l2_regularizer(1e-5)): l = slim.layers.conv2d(image, 32, [3, 3], scope='conv0') l = slim.layers.max_pool2d(l, [2, 2], scope='pool0') l = slim.layers.conv2d(l, 32, [3, 3], padding='SAME', scope='conv1') l = slim.layers.conv2d(l, 32, [3, 3], scope='conv2') l = slim.layers.max_pool2d(l, [2, 2], scope='pool1') l = slim.layers.conv2d(l, 32, [3, 3], scope='conv3') l = slim.layers.flatten(l, scope='flatten') l = slim.layers.fully_connected(l, 512, scope='fc0') l = slim.layers.dropout(l, is_training=is_training) logits = slim.layers.fully_connected(l, 10, activation_fn=None, scope='fc1') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') acc = tf.to_float(tf.nn.in_top_k(logits, label, 1)) acc = tf.reduce_mean(acc, name='accuracy') summary.add_moving_summary(acc) summary.add_moving_summary(cost) summary.add_param_summary(('.*/weights', ['histogram', 'rms'])) # slim uses different variable names return cost + regularize_cost_from_collection()
def create_network(self, name): with tf.variable_scope(name) as scope: inputs = tf.placeholder(fl32, [None, self.state_dim], 'inputs') actions = tf.placeholder(fl32, [None, self.action_dim], 'actions') with slim.arg_scope( [slim.fully_connected], activation_fn=relu, weights_initializer=uniform, weights_regularizer=None ): net = tf.concat(1, [inputs, actions]) net = slim.fully_connected(net, 400) net = slim.fully_connected(net, 300) '''net = slim.fully_connected(inputs, 400) w1 = tf.get_variable( "w1", shape=[400, 300], initializer=uniform ) w2 = tf.get_variable( "w2", shape=[self.action_dim, 300], initializer=uniform ) b = tf.get_variable( "b", shape=[300], initializer=constant ) net = relu(tf.matmul(net, w1) + tf.matmul(actions, w2) + b)''' out = slim.fully_connected(net, 1, activation_fn=None) return (inputs, actions, out, scope.name)
def build_arch(input, is_train, num_classes): data_size = int(input.get_shape()[1]) # initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) # bias_initializer = tf.constant_initializer(0.0) # weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04) with slim.arg_scope([slim.conv2d], trainable=is_train):#, activation_fn=None, , , biases_initializer=bias_initializer, weights_regularizer=weights_regularizer with tf.variable_scope('conv1') as scope: output = slim.conv2d(input, num_outputs=256, kernel_size=[9, 9], stride=1, padding='VALID', scope=scope) data_size = data_size-8 assert output.get_shape() == [cfg.batch_size, data_size, data_size, 256] tf.logging.info('conv1 output shape: {}'.format(output.get_shape())) with tf.variable_scope('primary_caps_layer') as scope: output = slim.conv2d(output, num_outputs=32*8, kernel_size=[9, 9], stride=2, padding='VALID', scope=scope)#, activation_fn=None output = tf.reshape(output, [cfg.batch_size, -1, 8]) output = squash(output) data_size = int(np.floor((data_size-8)/2)) assert output.get_shape() == [cfg.batch_size, data_size*data_size*32, 8] tf.logging.info('primary capsule output shape: {}'.format(output.get_shape())) with tf.variable_scope('digit_caps_layer') as scope: with tf.variable_scope('u') as scope: u_hats = vec_transform(output, num_classes, 16) assert u_hats.get_shape() == [cfg.batch_size, num_classes, data_size*data_size*32, 16] tf.logging.info('digit_caps_layer u_hats shape: {}'.format(u_hats.get_shape())) with tf.variable_scope('routing') as scope: output = dynamic_routing(u_hats) assert output.get_shape() == [cfg.batch_size, num_classes, 16] tf.logging.info('the output capsule has shape: {}'.format(output.get_shape())) output_len = tf.norm(output, axis=-1) return output, output_len
def build_single_inceptionv1(train_tfdata, is_train, dropout_keep_prob): with slim.arg_scope(inception.inception_v1_arg_scope()): identity, end_points = inception.inception_v1(train_tfdata, dropout_keep_prob = dropout_keep_prob, is_training=is_train) net = slim.avg_pool2d(end_points['Mixed_5c'], [7, 7], stride=1, scope='MaxPool_0a_7x7') net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b') feature = tf.squeeze(net, [1, 2]) return identity, feature
def network_det(self,inputs,reuse=False): if reuse: tf.get_variable_scope().reuse_variables() with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn = tf.nn.relu, weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)): conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1') max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1') conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2') max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2') conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3') conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4') conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5') pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5') shape = int(np.prod(pool5.get_shape()[1:])) fc6 = slim.fully_connected(tf.reshape(pool5, [-1, shape]), 4096, scope='fc6') fc_detection = slim.fully_connected(fc6, 512, scope='fc_det1') out_detection = slim.fully_connected(fc_detection, 2, scope='fc_det2', activation_fn = None) return out_detection
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 128, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 # net = slim.conv2d(net, 32, 3, padding='VALID', # scope='Conv2d_2a_3x3') # end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 # net = slim.conv2d(net, 32, [1,3], scope='Conv2d_2b_3x3') # net = slim.conv2d(net, 32, [3,1], scope='Conv2d_2c_3x3') # end_points['Conv2d_2_3x3'] = net # # 73 x 73 x 64 # net = slim.max_pool2d(net, 3, stride=3, padding='VALID', # scope='MaxPool_3a_3x3') # end_points['MaxPool_3a_3x3'] = net # # 73 x 73 x 80 # net = slim.conv2d(net, 64, 1, padding='VALID', # scope='Conv2d_3b_1x1') net = slim.max_pool2d(net, 3, stride=3, padding='VALID', scope='MaxPool_3b_3x3') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 # net = slim.conv2d(net, 192, 3, padding='VALID', # scope='Conv2d_4a_3x3') # end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 32, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 1, block35, scale=0.27) end_points['Mixed_5a'] = net # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 96, 48, 96, 64) end_points['Mixed_6a'] = net # # 10 x Inception-Resnet-B # net = slim.repeat(net, 1, block17, scale=0.10) # end_points['Mixed_6b'] = net # # # Reduction-B # with tf.variable_scope('Mixed_7a'): # net = reduction_b(net) # end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 1, block8, scale=0.20) end_points['Mixed_8a'] = net net = block8(net, activation_fn=None) end_points['Mixed_8b'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d( net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') ### 修改成max pool net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def resnet_v1(inputs, blocks, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, spatial_squeeze=True, reuse=None, scope=None): """Generator for v1 ResNet models. This function generates a family of ResNet v1 models. See the resnet_v1_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce ResNets of various depths. Training for image classification on Imagenet is usually done with [224, 224] inputs, resulting in [7, 7] feature maps at the output of the last ResNet block for the ResNets defined in [1] that have nominal stride equal to 32. However, for dense prediction tasks we advise that one uses inputs with spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In this case the feature maps at the ResNet output will have spatial shape [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] and corners exactly aligned with the input image corners, which greatly facilitates alignment of the features to the image. Using as input [225, 225] images results in [8, 8] feature maps at the output of the last ResNet block. For dense prediction tasks, the ResNet needs to run in fully-convolutional (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all have nominal stride equal to 32 and a good choice in FCN mode is to use output_stride=16 in order to increase the density of the computed features at small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. blocks: A list of length equal to the number of ResNet blocks. Each element is a resnet_utils.Block object describing the units in the block. num_classes: Number of predicted classes for classification tasks. If None we return the features before the logit layer. is_training: whether is training or not. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. include_root_block: If True, include the initial convolution followed by max-pooling, if False excludes it. spatial_squeeze: if True, logits is of shape [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. If global_pool is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, else both height_out and width_out equal one. If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. If num_classes is not None, net contains the pre-softmax activations. end_points: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope( [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense], outputs_collections=end_points_collection): with slim.arg_scope([slim.batch_norm], is_training=is_training): net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError( 'The output_stride needs to be a multiple of 4.' ) output_stride /= 4 net = resnet_utils.conv2d_same(net, 32, 3, stride=1, rate=1, scope="conv0") net = slim.utils.collect_named_outputs( end_points_collection, "pool0", net) net = resnet_utils.conv2d_same(net, 64, 7, stride=2, rate=1, scope='conv1') net = slim.utils.collect_named_outputs( end_points_collection, "pool1", net) net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = slim.utils.collect_named_outputs( end_points_collection, 'pool2', net) net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) end_points = slim.utils.convert_collection_to_dict( end_points_collection) # end_points['pool2'] = end_points['resnet_v1_50/pool1/MaxPool:0'] try: end_points['pool3'] = end_points['resnet_v1_50/block1'] end_points['pool4'] = end_points['resnet_v1_50/block2'] except: end_points['pool3'] = end_points[ 'Detection/resnet_v1_50/block1'] end_points['pool4'] = end_points[ 'Detection/resnet_v1_50/block2'] end_points['pool5'] = net # if global_pool: # # Global average pooling. # net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) # if num_classes is not None: # net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, # normalizer_fn=None, scope='logits') # if spatial_squeeze: # logits = tf.squeeze(net, [1, 2], name='SpatialSqueeze') # else: # logits = net # # Convert end_points_collection into a dictionary of end_points. # end_points = slim.utils.convert_collection_to_dict(end_points_collection) # if num_classes is not None: # end_points['predictions'] = slim.softmax(logits, scope='predictions') return net, end_points
def interface_vgg16(self, inputs, reuse=None, is_training=True): endpoints = {} with slim.arg_scope(vgg_arg_scope()): _, vgg_end_points = vgg_16(inputs, is_training=is_training, reuse=reuse, spatial_squeeze=False, num_classes=None) endpoints['net1'] = vgg_end_points['vgg_16/conv1/conv1_2'] endpoints['net2'] = vgg_end_points['vgg_16/conv2/conv2_2'] endpoints['net3'] = vgg_end_points['vgg_16/conv3/conv3_3'] endpoints['net4'] = vgg_end_points['vgg_16/conv4/conv4_3'] endpoints['net5'] = vgg_end_points['vgg_16/conv5/conv5_3'] with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)): with tf.variable_scope('cloud_net', 'cloud_net', [inputs], reuse=reuse): with tf.variable_scope('feature_exatraction'): nets = vgg_end_points['vgg_16/conv5/conv5_3'] nets = slim.conv2d(nets, 512, [3, 3], stride=2, scope='pool5') nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3], scope='conv6') endpoints['net6'] = nets nets = slim.conv2d(nets, 512, [3, 3], stride=2, scope='pool6') nets = slim.conv2d(nets, 512, [3, 3], scope='conv7') endpoints['net7'] = nets with tf.variable_scope('alpha_prediction'): # alpha prediction nets = endpoints['net7'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans1') + endpoints['net6'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans2') + endpoints['net5'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans3') + endpoints['net4'] nets = slim.conv2d_transpose( nets, 256, [3, 3], stride=2, scope='conv_trans4') + endpoints['net3'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans5') + endpoints['net2'] nets = slim.conv2d_transpose( nets, 64, [3, 3], stride=2, scope='conv_trans6') + endpoints['net1'] alpha_logits = slim.conv2d(nets, self.alpha_channel, [3, 3], scope='pred', activation_fn=None) with tf.variable_scope('reflectance_prediction'): # reflectance prediction nets = endpoints['net7'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans1') + endpoints['net6'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans2') + endpoints['net5'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans3') + endpoints['net4'] nets = slim.conv2d_transpose( nets, 256, [3, 3], stride=2, scope='conv_trans4') + endpoints['net3'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans5') + endpoints['net2'] nets = slim.conv2d_transpose( nets, 64, [3, 3], stride=2, scope='conv_trans6') + endpoints['net1'] reflectance_logits = slim.conv2d(nets, self.reflectance_channel, [3, 3], scope='pred', activation_fn=None) return alpha_logits, reflectance_logits
def inference(input_tensor, regularizer=None): with slim.arg_scope([slim.conv2d, slim.max_pool2d], stride=1, padding='SAME'): with tf.variable_scope("layer1-initconv"): data = slim.conv2d(input_tensor, CONV_DEEP, [7, 7]) data = slim.max_pool2d(data, [2, 2], stride=2) with tf.variable_scope("resnet_layer"): data = res_block(input_tensor=data, kshape=[CONV_SIZE, CONV_SIZE], deph=CONV_DEEP, layer=6, half=False, name="layer4-9-conv") data = res_block(input_tensor=data, kshape=[CONV_SIZE, CONV_SIZE], deph=CONV_DEEP * 2, layer=8, half=True, name="layer10-15-conv") data = res_block(input_tensor=data, kshape=[CONV_SIZE, CONV_SIZE], deph=CONV_DEEP * 4, layer=12, half=True, name="layer16-27-conv") data = res_block(input_tensor=data, kshape=[CONV_SIZE, CONV_SIZE], deph=CONV_DEEP * 8, layer=6, half=True, name="layer28-33-conv") data = slim.avg_pool2d(data, [2, 2], stride=2) #得到输出信息的维度,用于全连接层的输入 data_shape = data.get_shape().as_list() nodes = data_shape[1] * data_shape[2] * data_shape[3] reshaped = tf.reshape(data, [data_shape[0], nodes]) #最后全连接层 with tf.variable_scope('layer34-fc'): fc_weights = tf.get_variable( "weight", [nodes, NUM_LABELS], initializer=tf.truncated_normal_initializer( stddev=0.1)) # if regularizer != None: # tf.add_to_collection('losses', regularizer(fc_weights)) fc_biases = tf.get_variable( "bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1)) fc = tf.nn.relu( tf.matmul(reshaped, fc_weights) + fc_biases) # if train: # fc = tf.nn.dropout(fc, 0.5) # return fc return fc
def _build_network(images, num_outputs, alpha, keep_prob=1.0, is_training=True, scope='yolo'): with tf.variable_scope(scope): with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=leaky_relu(alpha), weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer((0.0005)), variables_collections='Variables'): net = tf.pad(images, np.array([[0, 0], [3, 3], [3, 3], [0, 0]]), name='pad_1') net = slim.conv2d(net, 64, 7, 2, padding='VALID', scope='conv_2', trainable=False) net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3') net = slim.conv2d(net, 192, 3, scope='conv_4', trainable=False) net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_5') net = slim.conv2d(net, 128, 1, scope='conv_6') net = slim.conv2d(net, 256, 3, scope='conv_7') net = slim.conv2d(net, 256, 1, scope='conv_8') net = slim.conv2d(net, 512, 3, scope='conv_9') net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_10') net = slim.conv2d(net, 256, 1, scope='conv_11') net = slim.conv2d(net, 512, 3, scope='conv_12') net = slim.conv2d(net, 256, 1, scope='conv_13') net = slim.conv2d(net, 512, 3, scope='conv_14') net = slim.conv2d(net, 256, 1, scope='conv_15') net = slim.conv2d(net, 512, 3, scope='conv_16') net = slim.conv2d(net, 256, 1, scope='conv_17') net = slim.conv2d(net, 512, 3, scope='conv_18') net = slim.conv2d(net, 512, 1, scope='conv_19') # tf.summary.histogram('conv19', net) net = slim.conv2d(net, 1024, 3, scope='conv_20') # tf.summary.histogram('conv20', net) net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_21') net = slim.conv2d(net, 512, 1, scope='conv_22') net = slim.conv2d(net, 1024, 3, scope='conv_23') net = slim.conv2d(net, 512, 1, scope='conv_24') net = slim.conv2d(net, 1024, 3, scope='conv_25') net = slim.conv2d(net, 1024, 3, scope='conv_26') # tf.summary.histogram('conv26', net) net = tf.pad(net, np.array([[0, 0], [1, 1], [1, 1], [0, 0]]), name='pad_27') net = slim.conv2d(net, 1024, 3, 2, padding='VALID', scope='conv_28') net = slim.conv2d(net, 1024, 3, scope='conv_29') net = slim.conv2d(net, 1024, 3, scope='conv_30') net = tf.transpose(net, [0, 3, 1, 2], name='trans_31') net = slim.flatten(net, scope='flat_32') net = slim.fully_connected(net, 512, scope='fc_33') net = slim.fully_connected(net, 4096, scope='fc_34') net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training, scope='dropout_35') net = slim.fully_connected(net, num_outputs, activation_fn=None, scope='fc_36') # net ~ batch * 7 * 7 * 30 return net
def generator(self, inputs, content_extractor_layers, reuse=False): # inputs: (batch, 1, 1, 128) with tf.variable_scope('generator', reuse=reuse): with slim.arg_scope([slim.conv2d_transpose], padding='SAME', activation_fn=None, stride=2, weights_initializer=tf.contrib.layers. xavier_initializer()): with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, activation_fn=tf.nn.relu, is_training=(self.mode == 'train')): with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=None, stride=1, weights_initializer=tf.contrib.layers. xavier_initializer()): net = slim.conv2d_transpose( inputs, 512, [4, 4], padding='VALID', scope='conv_transpose1_1' ) # (batch_size, 4, 4, 512) net = slim.batch_norm(net, scope='bn1_1') net = slim.conv2d(net, 512, [3, 3], scope='conv_transpose1_2' ) # (batch_size, 4, 4, 512) net = slim.batch_norm(net, scope='bn1_2') concat = tf.concat( 3, (net, content_extractor_layers['conv4_1'])) net = slim.conv2d_transpose( concat, 256, [3, 3], scope='conv_transpose2_1' ) # (batch_size, 8, 8, 256) net = slim.batch_norm(net, scope='bn2') net = slim.conv2d(net, 256, [3, 3], scope='conv_transpose2_2' ) # (batch_size, 8, 8, 256) net = slim.batch_norm(net, scope='bn2_2') concat = tf.concat( 3, (net, content_extractor_layers['conv3_1'])) net = slim.conv2d_transpose( concat, 128, [3, 3], scope='conv_transpose3_1' ) # (batch_size, 16, 16, 128) net = slim.batch_norm(net, scope='bn3') net = slim.conv2d(net, 128, [3, 3], scope='conv_transpose3_2' ) # (batch_size, 16, 16, 128) net = slim.batch_norm(net, scope='bn3_2') concat = tf.concat( 3, (net, content_extractor_layers['conv2_1'])) net = slim.conv2d_transpose( concat, 3, [3, 3], activation_fn=tf.nn.tanh, scope='conv_transpose4') # (batch_size, 32, 32, 3) return net
def content_extractor(self, images, reuse=False): # images: (batch, 32, 32, 3) or (batch, 32, 32, 1) if images.get_shape()[3] == 1: # For mnist dataset, replicate the gray scale image 3 times. images = tf.image.grayscale_to_rgb(images) with tf.variable_scope('content_extractor', reuse=reuse): with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=None, weights_initializer=tf.contrib.layers. xavier_initializer()): with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, activation_fn=tf.nn.relu, is_training=(self.mode == 'train' or self.mode == 'pretrain')): layers = {} conv1_1 = slim.conv2d( images, 64, [3, 3], stride=1, scope='conv1_1') # (batch_size, 32, 32, 64) conv1_1 = slim.batch_norm(conv1_1, scope='bn1_1') layers['conv1_1'] = conv1_1 conv1_2 = slim.conv2d( conv1_1, 64, [3, 3], stride=2, scope='conv1_2') # (batch_size, 16, 16, 64) conv1_2 = slim.batch_norm(conv1_2, scope='bn1_2') layers['conv1_2'] = conv1_2 conv2_1 = slim.conv2d( conv1_2, 128, [3, 3], stride=1, scope='conv2_1') # (batch_size, 16, 16, 128) conv2_1 = slim.batch_norm(conv2_1, scope='bn2_1') layers['conv2_1'] = conv2_1 conv2_2 = slim.conv2d( conv2_1, 128, [3, 3], stride=2, scope='conv2_2') # (batch_size, 8, 8, 128) conv2_2 = slim.batch_norm(conv2_2, scope='bn2_2') layers['conv2_2'] = conv2_2 conv3_1 = slim.conv2d( conv2_2, 256, [3, 3], stride=1, scope='conv3_1') # (batch_size, 8, 8, 256) conv3_1 = slim.batch_norm(conv3_1, scope='bn3_1') layers['conv3_1'] = conv3_1 conv3_2 = slim.conv2d( conv3_1, 256, [3, 3], stride=2, scope='conv3_2') # (batch_size, 4, 4, 256) conv3_2 = slim.batch_norm(conv3_2, scope='bn3_2') layers['conv3_2'] = conv3_2 conv4_1 = slim.conv2d( conv3_2, 512, [3, 3], stride=1, scope='conv4_1') # (batch_size, 4, 4, 512) conv4_1 = slim.batch_norm(conv4_1, scope='bn4_1') layers['conv4_1'] = conv4_1 net = slim.conv2d( conv4_1, 512, [4, 4], stride=2, padding='VALID', scope='conv4_2') # (batch_size, 1, 1, 512) net = slim.batch_norm(net, activation_fn=tf.nn.tanh, scope='bn4_2') layers['conv4_2'] = net if self.mode == 'pretrain': net = slim.conv2d(net, self.num_classes, [1, 1], padding='VALID', scope='out') net = slim.flatten(net) return net, layers
def resnet_base(img_batch, scope_name, is_training=True): if scope_name.endswith('b'): get_resnet_fn = get_resnet_v1_b_base elif scope_name.endswith('d'): get_resnet_fn = get_resnet_v1_d_base else: raise ValueError("scope Name erro....") _, feature_dict = get_resnet_fn( input_x=img_batch, scope=scope_name, bottleneck_nums=BottleNeck_NUM_DICT[scope_name], base_channels=BASE_CHANNELS_DICT[scope_name], is_training=is_training, freeze_norm=True, freeze=cfgs.FREEZE_BLOCKS) pyramid_dict = {} with tf.variable_scope('build_pyramid'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY), activation_fn=None, normalizer_fn=None): P5 = slim.conv2d(feature_dict['C5'], num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P5') pyramid_dict['P5'] = P5 for level in range(4, 2, -1): # build [P4, P3] pyramid_dict['P%d' % level] = fusion_two_layer( C_i=feature_dict["C%d" % level], P_j=pyramid_dict["P%d" % (level + 1)], scope='build_P%d' % level) for level in range(5, 2, -1): pyramid_dict['P%d' % level] = slim.conv2d( pyramid_dict['P%d' % level], num_outputs=256, kernel_size=[3, 3], padding="SAME", stride=1, scope="fuse_P%d" % level) p6 = slim.conv2d( pyramid_dict['P5'] if cfgs.USE_P5 else feature_dict['C5'], num_outputs=256, kernel_size=[3, 3], padding="SAME", stride=2, scope='p6_conv') pyramid_dict['P6'] = p6 p7 = tf.nn.relu(p6, name='p6_relu') p7 = slim.conv2d(p7, num_outputs=256, kernel_size=[3, 3], padding="SAME", stride=2, scope='p7_conv') pyramid_dict['P7'] = p7 # for level in range(7, 1, -1): # add_heatmap(pyramid_dict['P%d' % level], name='Layer%d/P%d_heat' % (level, level)) return pyramid_dict
def interface_unet(self, inputs, reuse=None, is_training=True): endpoints = {} with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)): with tf.variable_scope('cloud_net', 'cloud_net', [inputs], reuse=reuse): with tf.variable_scope('feature_exatraction'): nets = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3]) # 508*508*64 endpoints['net1'] = nets nets = slim.max_pool2d(nets, [2, 2]) # 254*254*64 nets = slim.repeat(nets, 2, slim.conv2d, 128, [3, 3]) # 250*250*128 endpoints['net2'] = nets nets = slim.max_pool2d(nets, [2, 2]) # 125*125*128 nets = slim.repeat(nets, 2, slim.conv2d, 256, [3, 3]) # 121*121*256 endpoints['net3'] = nets nets = slim.max_pool2d(nets, [2, 2]) # 61*61*256 nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3]) # 57*57*512 endpoints['net4'] = nets nets = slim.max_pool2d(nets, [2, 2]) # 29*29*512 nets = slim.repeat(nets, 2, slim.conv2d, 1024, [3, 3]) # 25*25*1024 endpoints['net5'] = nets with tf.variable_scope('alpha_prediction'): nets = endpoints['net5'] nets = slim.conv2d_transpose(nets, 512, [3, 3], stride=2) # 50*50*512 nets = self.crop_and_concat(endpoints['net4'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3]) # 46*46*512 nets = slim.conv2d_transpose(nets, 256, [3, 3], stride=2) # 92*92*256 nets = self.crop_and_concat(endpoints['net3'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 256, [3, 3]) # 88*88*256 nets = slim.conv2d_transpose(nets, 128, [3, 3], stride=2) # 176*176*128 nets = self.crop_and_concat(endpoints['net2'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 128, [3, 3]) # 172*172*128 nets = slim.conv2d_transpose(nets, 64, [3, 3], stride=2) # 344*344*64 nets = self.crop_and_concat(endpoints['net1'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 64, [3, 3]) # 340*340*64 logits = slim.conv2d(nets, self.alpha_channel, [3, 3], padding='SAME', activation_fn=None) alpha_logits = tf.image.resize_images( logits, [self.img_size, self.img_size]) with tf.variable_scope('reflectance_prediction'): nets = endpoints['net5'] nets = slim.conv2d_transpose(nets, 512, [3, 3], stride=2) # 50*50*512 nets = self.crop_and_concat(endpoints['net4'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3]) # 46*46*512 nets = slim.conv2d_transpose(nets, 256, [3, 3], stride=2) # 92*92*256 nets = self.crop_and_concat(endpoints['net3'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 256, [3, 3]) # 88*88*256 nets = slim.conv2d_transpose(nets, 128, [3, 3], stride=2) # 176*176*128 nets = self.crop_and_concat(endpoints['net2'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 128, [3, 3]) # 172*172*128 nets = slim.conv2d_transpose(nets, 64, [3, 3], stride=2) # 344*344*64 nets = self.crop_and_concat(endpoints['net1'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 64, [3, 3]) # 340*340*64 logits = slim.conv2d(nets, self.reflectance_channel, [3, 3], padding='SAME', activation_fn=None) reflectance_logits = tf.image.resize_images( logits, [self.img_size, self.img_size]) return alpha_logits, reflectance_logits
def interface_resnet50(self, inputs, reuse=None, is_training=False): endpoints = {} with slim.arg_scope(resnet_arg_scope(use_batch_norm=True)): _, resnet_endpoints = resnet_v2_50( inputs, reuse=reuse, is_training=is_training, ) endpoints['net1'] = resnet_endpoints[ 'resnet_v2_50/block1/unit_2/bottleneck_v2'] # 128*128 256 endpoints['net2'] = resnet_endpoints[ 'resnet_v2_50/block2/unit_3/bottleneck_v2'] # 64*64 512 endpoints['net3'] = resnet_endpoints[ 'resnet_v2_50/block3/unit_5/bottleneck_v2'] # 32*32 1024 endpoints['net4'] = resnet_endpoints[ 'resnet_v2_50/block4/unit_3/bottleneck_v2'] # 16*16 2048 with slim.arg_scope( self.fcn_arg_scope(is_training=is_training, normalizer_fn=None)): with tf.variable_scope('cloud_net', 'cloud_net', [inputs], reuse=reuse): with tf.variable_scope('alpha_prediction'): # alpha prediction nets = resnet_endpoints[ 'resnet_v2_50/block4'] # 64*64*2048 nets = slim.conv2d_transpose( nets, 512, kernel_size=[3, 3], stride=2) + resnet_endpoints[ 'resnet_v2_50/block2/unit_2/bottleneck_v2'] nets = slim.conv2d_transpose( nets, 256, kernel_size=[3, 3], stride=2) + resnet_endpoints[ 'resnet_v2_50/block1/unit_2/bottleneck_v2'] nets = slim.conv2d_transpose( nets, 64, kernel_size=[3, 3], stride=2) + resnet_endpoints['resnet_v2_50/conv1'] alpha_logits = slim.conv2d(nets, self.alpha_channel, [3, 3], scope='pred', activation_fn=None) with tf.variable_scope('reflectance_prediction'): # reflectance prediction nets = resnet_endpoints[ 'resnet_v2_50/block4'] # 64*64*2048 nets = slim.conv2d_transpose( nets, 512, kernel_size=[3, 3], stride=2) + resnet_endpoints[ 'resnet_v2_50/block2/unit_2/bottleneck_v2'] nets = slim.conv2d_transpose( nets, 256, kernel_size=[3, 3], stride=2) + resnet_endpoints[ 'resnet_v2_50/block1/unit_2/bottleneck_v2'] nets = slim.conv2d_transpose( nets, 64, kernel_size=[3, 3], stride=2) + resnet_endpoints['resnet_v2_50/conv1'] reflectance_logits = slim.conv2d(nets, self.reflectance_channel, [3, 3], scope='pred', activation_fn=None) return alpha_logits, reflectance_logits
def create_model(input, landmark, phase_train, args): batch_norm_params = { 'decay': 0.995, 'epsilon': 0.001, 'updates_collections': None, #tf.GraphKeys.UPDATE_OPS, 'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES], 'is_training': phase_train } landmark_dim = int(landmark.get_shape()[-1]) features, landmarks_pre = pfld_inference(input, args.weight_decay, batch_norm_params) # loss landmarks_loss = tf.reduce_sum(tf.square(landmarks_pre - landmark), axis=1) landmarks_loss = tf.reduce_mean(landmarks_loss) # add the auxiliary net # : finish the loss function print('\nauxiliary net') with slim.arg_scope([slim.convolution2d, slim.fully_connected], \ activation_fn=tf.nn.relu,\ weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(args.weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): pfld_input = features['auxiliary_input'] net_aux = slim.convolution2d(pfld_input, 128, [3, 3], stride=2, scope='pfld_conv1') print(net_aux.name, net_aux.get_shape()) # net = slim.max_pool2d(net, kernel_size=[3, 3], stride=1, scope='pool1', padding='SAME') net_aux = slim.convolution2d(net_aux, 128, [3, 3], stride=1, scope='pfld_conv2') print(net_aux.name, net_aux.get_shape()) net_aux = slim.convolution2d(net_aux, 32, [3, 3], stride=2, scope='pfld_conv3') print(net_aux.name, net_aux.get_shape()) net_aux = slim.convolution2d(net_aux, 128, [7, 7], stride=1, scope='pfld_conv4') print(net_aux.name, net_aux.get_shape()) net_aux = slim.max_pool2d(net_aux, kernel_size=[3, 3], stride=1, scope='pool1', padding='SAME') print(net_aux.name, net_aux.get_shape()) net_aux = slim.flatten(net_aux) print(net_aux.name, net_aux.get_shape()) fc1 = slim.fully_connected(net_aux, num_outputs=32, activation_fn=None, scope='pfld_fc1') print(fc1.name, fc1.get_shape()) euler_angles_pre = slim.fully_connected(fc1, num_outputs=3, activation_fn=None, scope='pfld_fc2') print(euler_angles_pre.name, euler_angles_pre.get_shape()) # return landmarks_loss, landmarks, heatmap_loss, HeatMaps return landmarks_pre, landmarks_loss, euler_angles_pre
def resnet_v2(inputs, blocks, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, spatial_squeeze=True, reuse=None, scope=None): with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope( [slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense], outputs_collections=end_points_collection): with slim.arg_scope([slim.batch_norm], is_training=is_training): net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError( 'The output_stride needs to be a multiple of 4.' ) output_stride /= 4 # We do not include batch normalization or activation functions in # conv1 because the first ResNet unit will perform these. Cf. # Appendix of [2]. with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None): net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) # This is needed because the pre-activation variant does not have batch # normalization or activation functions in the residual unit output. See # Appendix of [2]. net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') # Convert end_points_collection into a dictionary of end_points. end_points = slim.utils.convert_collection_to_dict( end_points_collection) if global_pool: # Global average pooling. net = tf.reduce_mean(input_tensor=net, axis=[1, 2], name='pool5', keepdims=True) end_points['global_pool'] = net if num_classes: net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') net = slim.flatten(net) net = slim.fully_connected(net, num_classes, activation_fn=None) end_points[sc.name + '/logits'] = net if spatial_squeeze: net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') end_points[sc.name + '/spatial_squeeze'] = net end_points['predictions'] = slim.softmax( net, scope='predictions') print('------------------------') print(net) print('------------------------') return net, end_points
def pfld_inference(input, weight_decay, batch_norm_params): coefficient = 1 with tf.variable_scope('pfld_inference'): features = {} with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], \ activation_fn=tf.nn.relu6,\ weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='SAME'): print('PFLD input shape({}): {}'.format(input.name, input.get_shape())) # 112*112*3 conv1 = slim.convolution2d(input, 64 * coefficient, [3, 3], stride=2, scope='conv_1') print(conv1.name, conv1.get_shape()) # 56*56*64 conv2 = slim.separable_convolution2d(conv1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv2/dwise') print(conv2.name, conv2.get_shape()) # 56*56*64 conv3_1 = slim.convolution2d(conv2, 128, [1, 1], stride=2, scope='conv3_1/expand') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.separable_convolution2d(conv3_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_1/dwise') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.convolution2d(conv3_1, 64 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_1/linear') print(conv3_1.name, conv3_1.get_shape()) conv3_2 = slim.convolution2d(conv3_1, 128, [1, 1], stride=1, scope='conv3_2/expand') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.separable_convolution2d(conv3_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_2/dwise') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.convolution2d(conv3_2, 64 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_2/linear') print(conv3_2.name, conv3_2.get_shape()) block3_2 = conv3_1 + conv3_2 print(block3_2.name, block3_2.get_shape()) conv3_3 = slim.convolution2d(block3_2, 128, [1, 1], stride=1, scope='conv3_3/expand') print(conv3_3.name, conv3_3.get_shape()) conv3_3 = slim.separable_convolution2d(conv3_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_3/dwise') print(conv3_3.name, conv3_3.get_shape()) conv3_3 = slim.convolution2d(conv3_3, 64 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_3linear') print(conv3_3.name, conv3_3.get_shape()) block3_3 = block3_2 + conv3_3 print(block3_3.name, block3_3.get_shape()) conv3_4 = slim.convolution2d(block3_3, 128, [1, 1], stride=1, scope='conv3_4/expand') print(conv3_4.name, conv3_4.get_shape()) conv3_4 = slim.separable_convolution2d(conv3_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_4/dwise') print(conv3_4.name, conv3_4.get_shape()) conv3_4 = slim.convolution2d(conv3_4, 64 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_4/linear') print(conv3_4.name, conv3_4.get_shape()) block3_4 = block3_3 + conv3_4 print(block3_4.name, block3_4.get_shape()) conv3_5 = slim.convolution2d(block3_4, 128, [1, 1], stride=1, scope='conv3_5/expand') print(conv3_5.name, conv3_5.get_shape()) conv3_5 = slim.separable_convolution2d(conv3_5, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_5/dwise') print(conv3_5.name, conv3_5.get_shape()) conv3_5 = slim.convolution2d(conv3_5, 64 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_5/linear') print(conv3_5.name, conv3_5.get_shape()) block3_5 = block3_4 + conv3_5 print(block3_5.name, block3_5.get_shape()) features['auxiliary_input'] = block3_5 #28*28*64 conv4_1 = slim.convolution2d(block3_5, 128, [1, 1], stride=2, scope='conv4_1/expand') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.separable_convolution2d(conv4_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_1/dwise') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.convolution2d(conv4_1, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv4_1/linear') print(conv4_1.name, conv4_1.get_shape()) #14*14*128 conv5_1 = slim.convolution2d(conv4_1, 512, [1, 1], stride=1, scope='conv5_1/expand') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.separable_convolution2d(conv5_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_1/dwise') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.convolution2d(conv5_1, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_1/linear') print(conv5_1.name, conv5_1.get_shape()) conv5_2 = slim.convolution2d(conv5_1, 512, [1, 1], stride=1, scope='conv5_2/expand') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.separable_convolution2d(conv5_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_2/dwise') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.convolution2d(conv5_2, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_2/linear') print(conv5_2.name, conv5_2.get_shape()) block5_2 = conv5_1 + conv5_2 print(block5_2.name, block5_2.get_shape()) conv5_3 = slim.convolution2d(block5_2, 512, [1, 1], stride=1, scope='conv5_3/expand') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.separable_convolution2d(conv5_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_3/dwise') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.convolution2d(conv5_3, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_3/linear') print(conv5_3.name, conv5_3.get_shape()) block5_3 = block5_2 + conv5_3 print(block5_3.name, block5_3.get_shape()) conv5_4 = slim.convolution2d(block5_3, 512, [1, 1], stride=1, scope='conv5_4/expand') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.separable_convolution2d(conv5_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_4/dwise') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.convolution2d(conv5_4, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_4/linear') print(conv5_4.name, conv5_4.get_shape()) block5_4 = block5_3 + conv5_4 print(block5_4.name, block5_4.get_shape()) conv5_5 = slim.convolution2d(block5_4, 512, [1, 1], stride=1, scope='conv5_5/expand') print(conv5_5.name, conv5_5.get_shape()) conv5_5 = slim.separable_convolution2d(conv5_5, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_5/dwise') print(conv5_5.name, conv5_5.get_shape()) conv5_5 = slim.convolution2d(conv5_5, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_5/linear') print(conv5_5.name, conv5_5.get_shape()) block5_5 = block5_4 + conv5_5 print(block5_5.name, block5_5.get_shape()) conv5_6 = slim.convolution2d(block5_5, 512, [1, 1], stride=1, scope='conv5_6/expand') print(conv5_6.name, conv5_6.get_shape()) conv5_6 = slim.separable_convolution2d(conv5_6, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_6/dwise') print(conv5_6.name, conv5_6.get_shape()) conv5_6 = slim.convolution2d(conv5_6, 128 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_6/linear') print(conv5_6.name, conv5_6.get_shape()) block5_6 = block5_5 + conv5_6 print(block5_6.name, block5_6.get_shape()) #14*14*128 conv6_1 = slim.convolution2d(block5_6, 256, [1, 1], stride=1, scope='conv6_1/expand') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.separable_convolution2d(conv6_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_1/dwise') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.convolution2d(conv6_1, 16 * coefficient, [1, 1], stride=1, activation_fn=None, scope='conv6_1/linear') print(conv6_1.name, conv6_1.get_shape()) #14*14*16 conv7 = slim.convolution2d(conv6_1, 32 * coefficient, [3, 3], stride=2, scope='conv7') print(conv7.name, conv7.get_shape()) #7*7*32 conv8 = slim.convolution2d(conv7, 128 * coefficient, [7, 7], stride=1, scope='conv8', padding='VALID') print(conv8.name, conv8.get_shape()) avg_pool1 = slim.avg_pool2d( conv6_1, [conv6_1.get_shape()[1], conv6_1.get_shape()[2]], stride=1) print(avg_pool1.name, avg_pool1.get_shape()) avg_pool2 = slim.avg_pool2d( conv7, [conv7.get_shape()[1], conv7.get_shape()[2]], stride=1) print(avg_pool2.name, avg_pool2.get_shape()) s1 = slim.flatten(avg_pool1) s2 = slim.flatten(avg_pool2) #1*1*128 s3 = slim.flatten(conv8) multi_scale = tf.concat([s1, s2, s3], 1) landmarks = slim.fully_connected(multi_scale, num_outputs=196, activation_fn=None, scope='fc') return features, landmarks
def build_network(self, sess, is_training=True): # pyramid network scales changes at different levels of pyramid self._anchor_scales = {} # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 101: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 152: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 4) if cfg.RESNET.FIXED_BLOCKS == 4: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, endpoints = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, endpoints = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, endpoints = resnet_v1.resnet_v1(net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, endpoints = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) pyramid = self.build_pyramid(endpoints) self._layers['head'] = net_conv4 # not sure what to do with this with tf.variable_scope(self._resnet_scope, self._resnet_scope): for i in range(5, 1, -1): p = i self._act_summaries.append(pyramid[p]) self._feat_stride[p] = [2 ** i] shape = tf.shape(pyramid[p]) h, w = shape[1], shape[2] # in the paper they use only one anchor per layer of pyramid. But when I tried that we were frequently receiving no overlaps in anchor_target_proposal(...) self._anchor_scales[p] = [2**(i-j) for j in range(self._num_scales-1, -1, -1)] self._anchor_component(p, h, w) # build the anchors for the image # rpn rpn = slim.conv2d(pyramid[p], 256, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3", stride=1) self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p) rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor", p) # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, roi_scores = self._proposal_target_layer(rois, roi_scores, "rpn_rois", p) else: if cfg.TEST.MODE == 'nms': rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p) elif cfg.TEST.MODE == 'top': rois, roi_scores = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p) else: raise NotImplementedError self._predictions[p]['rois'] = rois self._predictions[p]['roi_scores'] = roi_scores self._predictions[p]['rpn_cls_score'] = rpn_cls_score self._predictions[p]['rpn_cls_score_reshape'] = rpn_cls_score_reshape self._predictions[p]['rpn_cls_prob'] = rpn_cls_prob self._predictions[p]['rpn_bbox_pred'] = rpn_bbox_pred all_roi_scores = tf.concat(values=[self._predictions[p]['roi_scores'] for p in pyramid], axis=0) all_rois = tf.concat(values=[self._predictions[p]['rois'] for p in pyramid], axis=0) p_vals = [tf.fill([tf.shape(self._predictions[p]['roi_scores'])[0], 1], float(p)) for p in pyramid] p_roi = tf.concat(values=[tf.reshape(p_vals, [-1, 1]), all_rois], axis=1) if is_training: all_proposal_target_labels = tf.concat(values=[self._proposal_targets[p]['labels'] for p in pyramid], axis=0) all_proposal_target_bbox = tf.concat(values=[self._proposal_targets[p]['bbox_targets'] for p in pyramid], axis=0) all_proposal_target_inside_w = tf.concat(values=[self._proposal_targets[p]['bbox_inside_weights'] for p in pyramid], axis=0) all_proposal_target_outside_w = tf.concat(values=[self._proposal_targets[p]['bbox_outside_weights'] for p in pyramid], axis=0) cfg_key = self._mode if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') nms_top_n = all_roi_scores.shape[0] \ if all_roi_scores.shape[0] < cfg[cfg_key].RPN_POST_NMS_TOP_N \ else cfg[cfg_key].RPN_POST_NMS_TOP_N _, top_indices = tf.nn.top_k(tf.reshape(all_roi_scores, [-1]), k=nms_top_n) p_roi = tf.gather(p_roi, top_indices) [assigned_rois, _, _] = \ assign_boxes(all_rois, [all_rois, top_indices], [2, 3, 4, 5], 'assign_boxes') for p in range(5, 1, -1): splitted_rois = assigned_rois[p-2] # rcnn if cfg.POOLING_MODE == 'crop': cropped_roi = self._crop_pool_layer(pyramid[p], splitted_rois, "cropped_roi", p) self._predictions[p]['cropped_roi'] = cropped_roi else: raise NotImplementedError cropped_rois = [self._predictions[p_layer]['cropped_roi'] for p_layer in self._predictions] cropped_rois = tf.concat(values=cropped_rois, axis=0) cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME') refine = slim.flatten(cropped_regions) refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu) refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training) refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu) refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training) cls_score = slim.fully_connected(refine, self._num_classes, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.01)) cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected(refine, self._num_classes*4, activation_fn=my_sigmoid, weights_initializer=tf.truncated_normal_initializer(stddev=0.001)) self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = tf.gather(all_rois, top_indices) if is_training: self._proposal_targets['labels'] = all_proposal_target_labels self._proposal_targets['bbox_targets'] = all_proposal_target_bbox self._proposal_targets['bbox_inside_weights'] = all_proposal_target_inside_w self._proposal_targets['bbox_outside_weights'] = all_proposal_target_outside_w #self._score_summaries.update(self._predictions) # score summaries not compatible w/ dict return self._predictions["rois"], cls_prob, bbox_pred
def get_model(self, inputs, weight_decay=0.0005, is_training=False): # End_points collect relevant activations for external use. arg_scope = self.__arg_scope(weight_decay=weight_decay) self.img_shape = tfe.get_shape(inputs)[1:3] with slim.arg_scope(arg_scope): end_points = {} channels = {} with tf.variable_scope('vgg_16', [inputs]): # Original VGG-16 blocks. net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') end_points['block1'] = net net = slim.max_pool2d(net, [2, 2], scope='pool1') # Block 2. net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') end_points['block2'] = net net = slim.max_pool2d(net, [2, 2], scope='pool2') # Block 3. net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') end_points['block3'] = net channels['block3'] = 256 self.layer_shape.append(tfe.get_shape(net)[1:3]) net = slim.max_pool2d(net, [2, 2], scope='pool3') # Block 4. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') end_points['block4'] = net channels['block4'] = 512 self.layer_shape.append(tfe.get_shape(net)[1:3]) net = slim.max_pool2d(net, [2, 2], scope='pool4') # Block 5. net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') end_points['block5'] = net channels['block5'] = 512 self.layer_shape.append(tfe.get_shape(net)[1:3]) net = slim.max_pool2d(net, [2, 2], scope='pool5') # Additional SSD blocks. #with slim.arg_scope([slim.conv2d], #activation_fn=None): #with slim.arg_scope([slim.batch_norm], #activation_fn=tf.nn.relu, is_training=is_training,updates_collections=None): #with slim.arg_scope([slim.dropout], #is_training=is_training,keep_prob=0.8): with tf.variable_scope(self.model_name): return self.__additional_ssd_block(end_points, channels, net, is_training=is_training)
def interface_cloudMattingNet(self, inputs, reuse=None, is_training=True): endpoints = {} with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)): with tf.variable_scope('cloud_net', 'cloud_net', [inputs], reuse=reuse): with tf.variable_scope('feature_exatraction'): nets = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') endpoints['net1'] = nets nets = slim.conv2d(nets, 64, [3, 3], stride=2, scope='pool1') nets = slim.repeat(nets, 2, slim.conv2d, 128, [3, 3], scope='conv2') endpoints['net2'] = nets nets = slim.conv2d(nets, 128, [3, 3], stride=2, scope='pool2') nets = slim.repeat(nets, 2, slim.conv2d, 128, [3, 3], scope='conv3') endpoints['net3'] = nets nets = slim.conv2d(nets, 128, [3, 3], stride=2, scope='pool3') nets = slim.repeat(nets, 2, slim.conv2d, 256, [3, 3], scope='conv4') endpoints['net4'] = nets nets = slim.conv2d(nets, 256, [3, 3], stride=2, scope='pool4') nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3], scope='conv5') endpoints['net5'] = nets nets = slim.conv2d(nets, 512, [3, 3], stride=2, scope='pool5') nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3], scope='conv6') endpoints['net6'] = nets nets = slim.conv2d(nets, 512, [3, 3], stride=2, scope='pool6') nets = slim.conv2d(nets, 512, [3, 3], scope='conv7') endpoints['net7'] = nets with tf.variable_scope('alpha_prediction'): # alpha prediction nets = endpoints['net7'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans1') + endpoints['net6'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans2') + endpoints['net5'] nets = slim.conv2d_transpose( nets, 256, [3, 3], stride=2, scope='conv_trans3') + endpoints['net4'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans4') + endpoints['net3'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans5') + endpoints['net2'] nets = slim.conv2d_transpose( nets, 64, [3, 3], stride=2, scope='conv_trans6') + endpoints['net1'] alpha_logits = slim.conv2d(nets, self.alpha_channel, [3, 3], scope='pred', activation_fn=None) with tf.variable_scope('reflectance_prediction'): # reflectance prediction nets = endpoints['net7'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans1') + endpoints['net6'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans2') + endpoints['net5'] nets = slim.conv2d_transpose( nets, 256, [3, 3], stride=2, scope='conv_trans3') + endpoints['net4'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans4') + endpoints['net3'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans5') + endpoints['net2'] nets = slim.conv2d_transpose( nets, 64, [3, 3], stride=2, scope='conv_trans6') + endpoints['net1'] reflectance_logits = slim.conv2d(nets, self.reflectance_channel, [3, 3], scope='pred', activation_fn=None) return alpha_logits, reflectance_logits
mnist = input_data.read_data_sets("MNIST_data/",one_hot = True) sess = tf.InteractiveSession() x = tf.placeholder(tf.float32,[None,784]) y = tf.placeholder(tf.float32,[None,10]) x_image = tf.reshape(x,[-1,28,28,1]) # LeNet # replaced sigmoid with ReLU # add dropout keep_prob = tf.placeholder(tf.float32) # Conv1 Layer with slim.arg_scope([slim.conv2d],padding='SAME', weights_initializer=tf.contrib.layers.xavier_initializer(),# this is default in slim.conv2d weights_regularizer=slim.l2_regularizer(0.005)): # MNIST conv1: 28*28*1 -> 25*25*16 -> 23*23*16 conv1 = slim.conv2d(x_image,16,[3,3],stride=1,scope='conv1') pool1 = slim.max_pool2d(conv1,[2,2],stride=1,scope='pool1') #lrn1 = tf.nn.lrn(pool1,2,1,1e-4,0.75,name='lrn1') # MNIST conv1: 23*23*16 -> 20*20*64 -> 18*18*64 conv2 = slim.conv2d(pool1,64,[3,3],stride=1,scope='conv2') pool2 = slim.max_pool2d(conv2,[2,2],stride=1,scope='pool2') #lrn2 = tf.nn.lrn(pool2,2,1,1e-4,0.75,name='lrn2') # conv3: 18*18*64 -> 8*8*128 conv3 = slim.conv2d(pool2,384,[2,2],stride=2,scope='conv3')
crop_size = [224, 224] batch_size = 120 output_size = 1001 mean_file = './input/meanIm.npy' train_dataset = './input/train_by_hotel.txt' train_data = CombinatorialTripletSet(train_dataset, mean_file, img_size, crop_size, isTraining=False) image_batch = tf.placeholder(tf.float32, shape=[None, crop_size[0], crop_size[0], 3]) print("Preparing network...") with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, layers = resnet_v2.resnet_v2_50(image_batch, num_classes=output_size, is_training=False) featLayer = 'resnet_v2_50/logits' feat = tf.squeeze(tf.nn.l2_normalize(layers[featLayer], 3)) c = tf.ConfigProto() c.gpu_options.visible_device_list = str(whichGPU) sess = tf.Session(config=c) saver = tf.train.Saver() saver.restore(sess, pretrained_net) train_ims = [] train_classes = [] for ims, cls in zip(train_data.files, train_data.classes):
def alexnet_model(inputs, is_training=True, augmentation_function=None, emb_size=128, l2_weight=1e-4, img_shape=None, new_shape=None, image_summary=False, batch_norm_decay=0.99): """Mostly identical to slim.nets.alexnt, except for the reverted fc layers""" from tensorflow.contrib import layers from tensorflow.contrib.framework.python.ops import arg_scope from tensorflow.contrib.layers.python.layers import layers as layers_lib from tensorflow.contrib.layers.python.layers import regularizers from tensorflow.python.ops import init_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import variable_scope trunc_normal = lambda stddev: init_ops.truncated_normal_initializer( 0.0, stddev) def alexnet_v2_arg_scope(weight_decay=0.0005): with arg_scope( [layers.conv2d, layers_lib.fully_connected], activation_fn=nn_ops.relu, biases_initializer=init_ops.constant_initializer(0.1), weights_regularizer=regularizers.l2_regularizer(weight_decay)): with arg_scope([layers.conv2d], padding='SAME'): with arg_scope([layers_lib.max_pool2d], padding='VALID') as arg_sc: return arg_sc def alexnet_v2(inputs, is_training=True, emb_size=4096, dropout_keep_prob=0.5, scope='alexnet_v2'): inputs = tf.cast(inputs, tf.float32) if new_shape is not None: shape = new_shape inputs = tf.image.resize_images( inputs, tf.constant(new_shape[:2]), method=tf.image.ResizeMethod.BILINEAR) else: shape = img_shape if is_training and augmentation_function is not None: inputs = augmentation_function(inputs, shape) if image_summary: tf.summary.image('Inputs', inputs, max_outputs=3) net = inputs mean = tf.reduce_mean(net, [1, 2], True) std = tf.reduce_mean(tf.square(net - mean), [1, 2], True) net = (net - mean) / (std + 1e-5) inputs = net with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope([ layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d ], outputs_collections=[end_points_collection]): net = layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1') net = layers.conv2d(net, 192, [5, 5], scope='conv2') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2') net = layers.conv2d(net, 384, [3, 3], scope='conv3') net = layers.conv2d(net, 384, [3, 3], scope='conv4') net = layers.conv2d(net, 256, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5') net = slim.flatten(net, scope='flatten') # Use conv2d instead of fully_connected layers. with arg_scope( [slim.fully_connected], weights_initializer=trunc_normal(0.005), biases_initializer=init_ops.constant_initializer(0.1)): net = layers.fully_connected(net, 4096, scope='fc6') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.fully_connected(net, emb_size, scope='fc7') return net with slim.arg_scope(alexnet_v2_arg_scope()): return alexnet_v2(inputs, is_training, emb_size)
def inception_v3_base(inputs,scope=None): end_points = {} with tf.variable_scope(scope,"Inception_v3",[inputs]): with slim.arg_scope([slim.conv2d,slim.max_pool2d,slim.avg_pool2d],stride=1,padding="VALID"): net = slim.conv2d(inputs,num_outputs=32,kernel_size=[3,3],stride=2,scope="Conv2d_1a_3x3") net = slim.conv2d(net,num_outputs=32,kernel_size=[3,3],scope="Conv2d_2a_3x3") net = slim.conv2d(net,num_outputs=64,kernel_size=[3,3],padding="SAME",scope="Conv2d_2b_3x3") net = slim.max_pool2d(net,kernel_size=[3,3],stride=2,scope="MaxPool_3a_3x3") net = slim.conv2d(net,num_outputs=80,kernel_size=[1,1],scope="Conv2d_3b_1x1") net = slim.conv2d(net,num_outputs=192,kernel_size=[3,3],scope="Conv2d_4a_3x3") net = slim.max_pool2d(net,kernel_size=[3,3],stride=2,scope="MaxPool_5a_3x3") # 定义第一个inception模块组 with slim.arg_scope([slim.conv2d,slim.max_pool2d,slim.avg_pool2d],sstride=1,padding="SAME"): # 定义第一个inception模块组中的第一个inception module with tf.variable_scope("Mixed_5b"): with tf.variable_scope("Branch_0"): batch_0 = slim.conv2d(net,num_outputs=64,kernel_size=[1,1],scope="Conv2d_0a_1x1") with tf.variable_scope("Branch_1"): batch_1 = slim.conv2d(net,num_outputs=48,kernel_size=[1,1],scope="Conv2d_0a_1x1") batch_1 = slim.conv2d(batch_1,num_outputs=64,kernel_size=[5,5],scope="Conv2d_0b_5x5") with tf.variable_scope("Branch_2"): batch_2 = slim.conv2d(net,num_outputs=64,kernel_size=[1,1],scope="Conv2d_0a_1x1") batch_2 = slim.conv2d(batch_2,num_outputs=96,kernel_size=[3,3],scope="Conv2d_0b_3x3") batch_2 = slim.conv2d(batch_2,num_outputs=96,kernel_size=[3,3],scope="Conv2d_0c_3x3") with tf.variable_scope("Branch_3"): batch_3 = slim.avg_pool2d(net,kernel_size=[3,3],scope="AvgPool_0a_3x3") batch_3 = slim.conv2d(batch_3,num_outputs=32,kernel_size=[1,1],scope="Conv2d_0b_1x1") net = tf.concat([batch_0,batch_1,batch_2,batch_3],3) # 定义第一个inception模块组中的第二个inception module with tf.variable_scope("Mixed_5c"): with tf.variable_scope("Branch_0"): batch_0 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1") with tf.variable_scope("Branch_1"): batch_1 = slim.conv2d(net, num_outputs=48, kernel_size=[1, 1], scope="Conv2d_0b_1x1") batch_1 = slim.conv2d(batch_1, num_outputs=64, kernel_size=[5, 5], scope="Conv2d_0c_5x5") with tf.variable_scope("Branch_2"): batch_2 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0b_3x3") batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0c_3x3") with tf.variable_scope("Branch_3"): batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3") batch_3 = slim.conv2d(batch_3, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0b_1x1") net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3) # 定义第一个inception模块组中的第三个inception module with tf.variable_scope("Mixed_5c"): with tf.variable_scope("Branch_0"): batch_0 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1") with tf.variable_scope("Branch_1"): batch_1 = slim.conv2d(net, num_outputs=48, kernel_size=[1, 1], scope="Conv2d_0b_1x1") batch_1 = slim.conv2d(batch_1, num_outputs=64, kernel_size=[5, 5], scope="Conv2d_0c_5x5") with tf.variable_scope("Branch_2"): batch_2 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0b_3x3") batch_2 = slim.conv2d(batch_2, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0c_3x3") with tf.variable_scope("Branch_3"): batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3") batch_3 = slim.conv2d(batch_3, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0b_1x1") net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3) # 定义第二个inception模块组中的第一个inception module with tf.variable_scope("Mixed_6a"): with tf.variable_scope("Branch_0"): batch_0 = slim.conv2d(net, num_outputs=384, kernel_size=[3, 3], stride=2, padding="VALID", scope="Conv2d_1a_1x1") with tf.variable_scope("Branch_1"): batch_1 = slim.conv2d(net, num_outputs=64, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_1 = slim.conv2d(batch_1, num_outputs=96, kernel_size=[3, 3], scope="Conv2d_0b_3x3") batch_1 = slim.conv2d(batch_1, num_outputs=96, kernel_size=[3, 3], stride=2, padding="VALID", scope="Conv2d_1a_1x1") with tf.variable_scope("Branch_2"): batch_2 = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, padding="VALID", scope="MaxPool_1a_3x3") net = tf.concat([batch_0, batch_1, batch_2], 3) # 定义第二个inception模块组中的第二个inception module with tf.variable_scope("Mixed_6b"): with tf.variable_scope("Branch_0"): batch_0 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1") with tf.variable_scope("Branch_1"): batch_1 = slim.conv2d(net, num_outputs=128, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_1 = slim.conv2d(batch_1, num_outputs=128, kernel_size=[1, 7], scope="Conv2d_0b_1x7") batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[7, 1], scope="Conv2d_0c_7x1") with tf.variable_scope("Branch_2"): batch_2 = slim.conv2d(net, num_outputs=128, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_2 = slim.conv2d(batch_2, num_outputs=128, kernel_size=[7, 1], scope="Conv2d_0b_7x1") batch_2 = slim.conv2d(batch_2, num_outputs=128, kernel_size=[1, 7], scope="Conv2d_0c_1x7") batch_2 = slim.conv2d(batch_2, num_outputs=128, kernel_size=[7, 1], scope="Conv2d_0d_7x1") batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7") with tf.variable_scope("Branch_3"): batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3") batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1") net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3) # 定义第二个inception模块组中的第三个inception module with tf.variable_scope("Mixed_6c"): with tf.variable_scope("Branch_0"): batch_0 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1") with tf.variable_scope("Branch_1"): batch_1 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0b_1x7") batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0c_7x1") with tf.variable_scope("Branch_2"): batch_2 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0b_7x1") batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0c_1x7") batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0d_7x1") batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7") with tf.variable_scope("Branch_3"): batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3") batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1") net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3) # 定义第二个inception模块组中的第四个inception module with tf.variable_scope("Mixed_6d"): with tf.variable_scope("Branch_0"): batch_0 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1") with tf.variable_scope("Branch_1"): batch_1 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0b_1x7") batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0c_7x1") with tf.variable_scope("Branch_2"): batch_2 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0b_7x1") batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0c_1x7") batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0d_7x1") batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7") with tf.variable_scope("Branch_3"): batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3") batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1") net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3) # 定义第二个inception模块组中的第五个inception module with tf.variable_scope("Mixed_6e"): with tf.variable_scope("Branch_0"): batch_0 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1") with tf.variable_scope("Branch_1"): batch_1 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0b_1x7") batch_1 = slim.conv2d(batch_1, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0c_7x1") with tf.variable_scope("Branch_2"): batch_2 = slim.conv2d(net, num_outputs=160, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0b_7x1") batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[1, 7], scope="Conv2d_0c_1x7") batch_2 = slim.conv2d(batch_2, num_outputs=160, kernel_size=[7, 1], scope="Conv2d_0d_7x1") batch_2 = slim.conv2d(batch_2, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0e_1x7") with tf.variable_scope("Branch_3"): batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3") batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1") net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3) end_points["Mixed_6e"] = net # 定义第三个inception模块组中的第一个inception module with tf.variable_scope("Mixed_7a"): with tf.variable_scope("Branch_0"): batch_0 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_0 = slim.conv2d(net, num_outputs=320, kernel_size=[3, 3], stride=2, padding="VALID", scope="Conv2d_1a_3x3") with tf.variable_scope("Branch_1"): batch_1 = slim.conv2d(net, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[1, 7], scope="Conv2d_0b_1x7") batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[7, 1], scope="Conv2d_0c_7x1") batch_1 = slim.conv2d(batch_1, num_outputs=192, kernel_size=[3, 3], stride=2, padding="VALID", scope="Conv2d_1a_3x3") with tf.variable_scope("Branch_2"): batch_2 = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, padding="VALID", scope="MaxPool_1a_3x3") net = tf.concat([batch_0, batch_1, batch_2], 3) # 定义第三个inception模块组中的第二个inception module with tf.variable_scope("Mixed_7b"): with tf.variable_scope("Branch_0"): batch_0 = slim.conv2d(net, num_outputs=320, kernel_size=[1, 1], scope="Conv2d_0a_1x1") with tf.variable_scope("Branch_1"): batch_1 = slim.conv2d(net, num_outputs=384, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_1 = tf.concat([ slim.conv2d(batch_1, num_outputs=384, kernel_size=[1, 3], scope="Conv2d_0b_1x3"), slim.conv2d(batch_1, num_outputs=384, kernel_size=[3, 1], scope="Conv2d_0b_3x1")], axis=3) with tf.variable_scope("Branch_2"): batch_2 = slim.conv2d(net, num_outputs=448, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_2 = slim.conv2d(batch_2, num_outputs=384, kernel_size=[3, 3], scope="Conv2d_0b_3x3") batch_2 = tf.concat([ slim.conv2d(batch_2, num_outputs=384, kernel_size=[1, 3], scope="Conv2d_0c_1x3"), slim.conv2d(batch_2, num_outputs=384, kernel_size=[3, 1], scope="Conv2d_0d_3x1")], axis=3) with tf.variable_scope("Branch_3"): batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3") batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1") net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3) # 定义第三个inception模块组中的第三个inception module with tf.variable_scope("Mixed_7c"): with tf.variable_scope("Branch_0"): batch_0 = slim.conv2d(net, num_outputs=320, kernel_size=[1, 1], scope="Conv2d_0a_1x1") with tf.variable_scope("Branch_1"): batch_1 = slim.conv2d(net, num_outputs=384, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_1 = tf.concat([ slim.conv2d(batch_1, num_outputs=384, kernel_size=[1, 3], scope="Conv2d_0b_1x3"), slim.conv2d(batch_1, num_outputs=384, kernel_size=[3, 1], scope="Conv2d_0b_3x1")], axis=3) with tf.variable_scope("Branch_2"): batch_2 = slim.conv2d(net, num_outputs=448, kernel_size=[1, 1], scope="Conv2d_0a_1x1") batch_2 = slim.conv2d(batch_2, num_outputs=384, kernel_size=[3, 3], scope="Conv2d_0b_3x3") batch_2 = tf.concat([ slim.conv2d(batch_2, num_outputs=384, kernel_size=[1, 3], scope="Conv2d_0c_1x3"), slim.conv2d(batch_2, num_outputs=384, kernel_size=[3, 1], scope="Conv2d_0d_3x1")], axis=3) with tf.variable_scope("Branch_3"): batch_3 = slim.avg_pool2d(net, kernel_size=[3, 3], scope="AvgPool_0a_3x3") batch_3 = slim.conv2d(batch_3, num_outputs=192, kernel_size=[1, 1], scope="Conv2d_0b_1x1") net = tf.concat([batch_0, batch_1, batch_2, batch_3], 3) return net,end_points
def get_model(model_in, dropout_keeprate_node, train_config, scope): net = model_in with tf.variable_scope(name_or_scope=scope, values=[model_in]): # batch norm arg_scope with slim.arg_scope([train_config.normalizer_fn], decay=train_config.batch_norm_decay, fused=train_config.batch_norm_fused, is_training=train_config.is_trainable, activation_fn=train_config.activation_fn): if train_config.normalizer_fn == None: conv_activation_fn = train_config.activation_fn else: conv_activation_fn = None # max_pool arg_scope with slim.arg_scope([slim.max_pool2d], stride=model_config['maxpool_stride'], kernel_size=model_config['maxpool_ksize'], padding='VALID'): # convolutional layer arg_scope with slim.arg_scope( [slim.conv2d], kernel_size=model_config['conv_ksize'], stride=model_config['conv_stride'], weights_initializer=train_config.weights_initializer, weights_regularizer=train_config.weights_regularizer, biases_initializer=train_config.biases_initializer, trainable=train_config.is_trainable, activation_fn=conv_activation_fn, normalizer_fn=train_config.normalizer_fn): net = slim.conv2d(inputs=net, num_outputs=model_chout_num['c1'], padding='SAME', scope='c1_conv') net = slim.max_pool2d(inputs=net, scope='s2_pool') net = slim.conv2d(inputs=net, num_outputs=model_chout_num['c3'], padding='VALID', scope='c3_conv') net = slim.max_pool2d(inputs=net, scope='s4_pool') net = slim.conv2d(inputs=net, num_outputs=model_chout_num['c5'], padding='VALID', scope='c5_conv') # output layer by fully-connected layer with slim.arg_scope([slim.fully_connected], trainable=train_config.is_trainable): with slim.arg_scope([slim.dropout], keep_prob=dropout_keeprate_node, is_training=train_config.is_trainable): net = slim.fully_connected( inputs=net, num_outputs=model_chout_num['f6'], activation_fn=train_config.activation_fn, scope='f6_fc') net = slim.dropout(inputs=net, scope='f6_dropout') net = slim.fully_connected(inputs=net, num_outputs=model_chout_num['out'], activation_fn=None, scope='out_fc') out_logit = slim.dropout(inputs=net, scope='out_dropout') out_logit = tf.reshape(out_logit, shape=[-1, model_chout_num['out']]) return out_logit
def mnist_model_dropout(inputs, is_training=True, emb_size=128, l2_weight=1e-3, batch_norm_decay=None, img_shape=None, new_shape=None, dropout_keep_prob=0.8, augmentation_function=None, image_summary=False): # pylint: disable=unused-argument """Construct the image-to-embedding vector model.""" inputs = tf.cast(inputs, tf.float32) # / 255.0 if new_shape is not None: shape = new_shape inputs = tf.image.resize_images(inputs, tf.constant(new_shape[:2]), method=tf.image.ResizeMethod.BILINEAR) else: shape = img_shape net = inputs if is_training and augmentation_function is not None: tf.map_fn(lambda frame: augmentation_function(frame), inputs) if augmentation_function is not None: tf.map_fn(lambda frame: tf.image.per_image_standardization(frame), inputs) with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.elu, weights_regularizer=slim.l2_regularizer(l2_weight)): with slim.arg_scope([slim.dropout], is_training=is_training): net = slim.conv2d(net, 32, [3, 3], scope='conv1_1') net = slim.conv2d(net, 32, [3, 3], scope='conv1_2') net = slim.max_pool2d(net, [2, 2], scope='pool1') # 14 net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout1') net = slim.conv2d(net, 64, [3, 3], scope='conv2_1') net = slim.conv2d(net, 64, [3, 3], scope='conv2_2') net = slim.max_pool2d(net, [2, 2], scope='pool2') # 7 net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout2') net = slim.conv2d(net, 128, [3, 3], scope='conv3_1') net = slim.conv2d(net, 128, [3, 3], scope='conv3_2') net = slim.max_pool2d(net, [2, 2], scope='pool3') # 3 net = slim.flatten(net, scope='flatten') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout3') emb = slim.fully_connected(net, emb_size, scope='fc1') return emb
def yolo2(net, is_training, num_anchors, classes, channel=32, name='yolo2'): def batch_norm(net): net = slim.batch_norm(net, center=True, scale=True, epsilon=1e-5, is_training=is_training) return net # Use 1*1 filters to compress the feature representation between 3*3 convolutions # Use batch normalization to stabilize training, speed up convergence, regularize the model with tf.variable_scope(name): with slim.arg_scope([slim.layers.conv2d], kernel_size=[3, 3], stride=1, padding='SAME', normalizer_fn=batch_norm, activation_fn=leaky_relu), slim.arg_scope( [slim.layers.max_pool2d], kernel_size=[2, 2], stride=2, padding='SAME'): layer_index = 0 for _ in range(2): net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) print(net.get_shape().as_list()) net = slim.layers.max_pool2d(net, scope='max_pool2d_%d' % layer_index) print(net.get_shape().as_list()) channel *= 2 layer_index += 1 # channel=128, layer_index=2 for _ in range(2): net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) print(net.get_shape().as_list()) layer_index += 1 net = slim.layers.conv2d(net, channel / 2, kernel_size=[1, 1], scope='conv2d_%d' % layer_index) print(net.get_shape().as_list()) layer_index += 1 net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) print(net.get_shape().as_list()) net = slim.layers.max_pool2d(net, scope='max_pool2d_%d' % layer_index) print(net.get_shape().as_list()) layer_index += 1 channel *= 2 # channel=512 net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) layer_index += 1 print(net.get_shape().as_list()) net = slim.layers.conv2d(net, channel / 2, kernel_size=[1, 1], scope='conv2d_%d' % layer_index) layer_index += 1 print(net.get_shape().as_list()) net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) layer_index += 1 print(net.get_shape().as_list()) net = slim.layers.conv2d(net, channel / 2, kernel_size=[1, 1], scope='conv2d_%d' % layer_index) layer_index += 1 print(net.get_shape().as_list()) net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) print(net.get_shape().as_list()) ''' For passthrough, we copy 26*26 resolution, (26,26,512) For localizing smaller objects, simply adding a passthrough layer that brings features from an earlier layer ''' pt = tf.identity(net, name='passthrough') net = slim.layers.max_pool2d(net, scope='max_pool2d_%d' % layer_index) layer_index += 1 channel *= 2 # channel=1024 net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) layer_index += 1 print(net.get_shape().as_list()) net = slim.layers.conv2d(net, channel / 2, kernel_size=[1, 1], scope='conv2d_%d' % layer_index) layer_index += 1 print(net.get_shape().as_list()) net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) layer_index += 1 print(net.get_shape().as_list()) net = slim.layers.conv2d(net, channel / 2, kernel_size=[1, 1], scope='conv2d_%d' % layer_index) layer_index += 1 print(net.get_shape().as_list()) net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) layer_index += 1 print(net.get_shape().as_list()) # Add three 3*3 convoultional layers with 1024 filters net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) layer_index += 1 print(net.get_shape().as_list()) net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) layer_index += 1 print(net.get_shape().as_list()) # passthrough layer concatenates (26,26,512) with (13,13,1024) by stacking adjacent features into different channels instead of spatial location ''' (6,6) -> (3,2,3,2) -> (3,3,4) [[1,2,3,4,5,6], [[[1,2,5,6], [6,5,4,3,2,1], [3,4,4,3], [1,2,3,4,5,6], [5,6,2,1]]], [6,5,4,3,2,1], -> ... [1,2,3,4,5,6], [[3,4,4,3], [6,5,4,3,2,1]] [5,6,2,1]] ''' pt_shape = pt.get_shape().as_list() print('passthrough', pt_shape) with tf.name_scope('pass_through'): pt_net = tf.reshape(pt, [ pt_shape[0], int(pt_shape[1] / 2), 2, int(pt_shape[2] / 2), 2, pt_shape[3] ]) pt_net = tf.transpose(pt_net, [0, 1, 3, 2, 4, 5]) pt_net = tf.reshape(pt_net, [ pt_shape[0], int(pt_shape[1] / 2), int(pt_shape[2] / 2), pt_shape[3] * 2 * 2 ]) print(pt_net.get_shape().as_list()) # pt_net: (13,13,2048) net = tf.concat([net, pt_net], axis=3, name='concat_pt') # Add a passthrough layer to the second to last convolutional layer net = slim.layers.conv2d(net, channel, scope='conv2d_%d' % layer_index) print(net.get_shape().as_list()) # Remove fully connected layers, instead add final 1*1 convolustional layers with the number of outputs we need for detection # Predict boxes with 5 coordinates each and 20 classes per box -> 125 filters net = slim.layers.conv2d(net, num_anchors * (5 + classes), kernel_size=[1, 1], activation_fn=None, scope='final') print(net.get_shape().as_list()) return net
def train(): faster_rcnn = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) gtboxes_and_label = tf.py_func( back_forward_convert, inp=[tf.squeeze(gtboxes_and_label_batch, 0)], Tout=tf.float32) gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 6]) with tf.name_scope('draw_gtboxes'): gtboxes_in_img = draw_box_with_color_rotate( img_batch, tf.reshape(gtboxes_and_label, [-1, 6])[:, :-1], text=tf.shape(gtboxes_and_label)[0]) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) # list as many types of layers as possible, even if they are not used now with slim.arg_scope([ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): final_boxes, final_scores, final_category, loss_dict = \ faster_rcnn.build_whole_detection_network(input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label) dets_in_img = draw_boxes_with_categories_and_scores_rotate( img_batch=img_batch, boxes=final_boxes, labels=final_category, scores=final_scores) # ----------------------------------------------------------------------------------------------------build loss weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) rpn_location_loss = loss_dict['rpn_loc_loss'] rpn_cls_loss = loss_dict['rpn_cls_loss'] rpn_total_loss = rpn_location_loss + rpn_cls_loss fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss'] fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss'] fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss # ____________________________________________________________________________________________________build loss # ---------------------------------------------------------------------------------------------------add summary tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss) tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss) tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss) tf.summary.scalar('LOSS/total_loss', total_loss) tf.summary.scalar('LOSS/regular_weights', weight_decay_loss) tf.summary.image('img/gtboxes', gtboxes_in_img) tf.summary.image('img/dets', dets_in_img) # ___________________________________________________________________________________________________add summary global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant( global_step, boundaries=[ np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1]) ], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) # ---------------------------------------------------------------------------------------------compute gradients gradients = faster_rcnn.get_gradients(optimizer, total_loss) # enlarge_gradients for bias if cfgs.MUTILPY_BIAS_GRADIENT: gradients = faster_rcnn.enlarge_gradients_for_bias(gradients) if cfgs.GRADIENT_CLIPPING_BY_NORM: with tf.name_scope('clip_gradients'): gradients = slim.learning.clip_gradient_norms( gradients, cfgs.GRADIENT_CLIPPING_BY_NORM) # _____________________________________________________________________________________________compute gradients # train_op train_op = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step) summary_op = tf.summary.merge_all() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=10) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \ _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \ _fast_rcnn_total_loss, _total_loss, _ = \ sess.run([global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss, fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss, train_op]) # final_boxes_r, _final_scores_r, _final_category_r = sess.run([final_boxes_r, final_scores_r, final_category_r]) # print('*'*100) # print(_final_boxes_r) # print(_final_scores_r) # print(_final_category_r) end = time.time() print(""" {}: step{} image_name:{} |\t rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} | fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} |\t total_loss:{} |\t pre_cost_time:{}s""" \ .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss, _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _total_loss, (end - start))) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, global_stepnp) summary_writer.flush() if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.makedirs(save_dir) save_ckpt = os.path.join( save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
spatial_squeeze=True, reuse=None, scope='resnet_v1_200'): """ResNet-200 model of [2]. See resnet_v1() for arg and return description.""" blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] return resnet_v1(inputs, blocks, num_classes, is_training, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope) resnet_v1_200.default_image_size = resnet_v1.default_image_size if __name__ == '__main__': input = tf.placeholder(tf.float32, shape=(None, 224, 224, 3), name='input') with slim.arg_scope(resnet_arg_scope()) as sc: logits = resnet_v1_50(input)
def STsingle(inputs, outputs, loss_weight, labels): # Mean subtraction (BGR) for flying chairs mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean") # tf.tile(mean, [4,192,256,1]) inputs = inputs - mean outputs = outputs - mean # Scaling to 0 ~ 1 or -0.4 ~ 0.6? inputs = tf.truediv(inputs, 255.0) outputs = tf.truediv(outputs, 255.0) # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7) outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected], activation_fn=tf.nn.elu): ''' Shared conv layers ''' conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1') # conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1') conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2') pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1') conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1') conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2') pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2') conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1') conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2') conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3') pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3') conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1') conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2') conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3') pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4') conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1') conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2') conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3') pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5') # print pool5.get_shape() ''' Spatial branch ''' flatten5 = slim.flatten(pool5, scope='flatten5') fc6 = slim.fully_connected(flatten5, 4096, scope='fc6') dropout6 = slim.dropout(fc6, 0.9, scope='dropout6') fc7 = slim.fully_connected(dropout6, 4096, scope='fc7') dropout7 = slim.dropout(fc7, 0.9, scope='dropout7') fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8') prob = tf.nn.softmax(fc8) actionPredictions = tf.argmax(prob, 1) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels) actionLoss = tf.reduce_mean(cross_entropy) ''' Temporal branch ''' # Hyper-params for computing unsupervised loss epsilon = 0.0001 alpha_c = 0.3 alpha_s = 0.3 lambda_smooth = 0.8 FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights") scale = 2 # for deconvolution # Expanding part pr5 = slim.conv2d(pool5, 2, [3, 3], activation_fn=None, scope='pr5') h5 = pr5.get_shape()[1].value w5 = pr5.get_shape()[2].value pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5]) pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5]) flow_scale_5 = 0.625 # (*20/32) loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights) upconv4 = slim.conv2d_transpose(pool5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4') pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4') concat4 = tf.concat(3, [pool4, upconv4, pr5to4]) pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4') h4 = pr4.get_shape()[1].value w4 = pr4.get_shape()[2].value pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4]) pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4]) flow_scale_4 = 1.25 # (*20/16) loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights) upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3') pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3') concat3 = tf.concat(3, [pool3, upconv3, pr4to3]) pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3') h3 = pr3.get_shape()[1].value w3 = pr3.get_shape()[2].value pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3]) pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3]) flow_scale_3 = 2.5 # (*20/8) loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights) upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2') pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2') concat2 = tf.concat(3, [pool2, upconv2, pr3to2]) pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2') h2 = pr2.get_shape()[1].value w2 = pr2.get_shape()[2].value pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2]) pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2]) flow_scale_2 = 5.0 # (*20/4) loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights) upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1') pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1') concat1 = tf.concat(3, [pool1, upconv1, pr2to1]) pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1') h1 = pr1.get_shape()[1].value w1 = pr1.get_shape()[2].value pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1]) pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1]) flow_scale_1 = 10.0 # (*20/2) loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights) # Adding intermediate losses all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \ loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[0]*actionLoss slim.losses.add_loss(all_loss) losses = [loss1, loss2, loss3, loss4, loss5, actionLoss] flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5] predictions = [prev1, actionPredictions] return losses, flows_all, predictions
def mobilenet_v2(input, weight_decay, batch_norm_params): features = {} with tf.variable_scope('Mobilenet'): with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], \ activation_fn=tf.nn.relu6,\ weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='SAME'): print('Mobilnet input shape({}): {}'.format( input.name, input.get_shape())) # 96*96*3 112*112*3 conv_1 = slim.convolution2d(input, 32, [3, 3], stride=2, scope='conv_1') print(conv_1.name, conv_1.get_shape()) # 48*48*32 56*56*32 conv2_1 = slim.separable_convolution2d(conv_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv2_1/dwise') print(conv2_1.name, conv2_1.get_shape()) conv2_1 = slim.convolution2d(conv2_1, 16, [1, 1], stride=1, activation_fn=None, scope='conv2_1/linear') print(conv2_1.name, conv2_1.get_shape()) features['feature2'] = conv2_1 # 48*48*16 56*56*16 conv3_1 = slim.convolution2d(conv2_1, 96, [1, 1], stride=1, scope='conv3_1/expand') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.separable_convolution2d(conv3_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_1/dwise') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.convolution2d(conv3_1, 24, [1, 1], stride=1, activation_fn=None, scope='conv3_1/linear') print(conv3_1.name, conv3_1.get_shape()) conv3_2 = slim.convolution2d(conv3_1, 144, [1, 1], stride=1, scope='conv3_2/expand') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.separable_convolution2d(conv3_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_2/dwise') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.convolution2d(conv3_2, 24, [1, 1], stride=1, activation_fn=None, scope='conv3_2/linear') print(conv3_2.name, conv3_2.get_shape()) block_3_2 = conv3_1 + conv3_2 print(block_3_2.name, block_3_2.get_shape()) features['feature3'] = block_3_2 features['pfld'] = block_3_2 # 24*24*24 28*28*24 conv4_1 = slim.convolution2d(block_3_2, 144, [1, 1], stride=1, scope='conv4_1/expand') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.separable_convolution2d(conv4_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_1/dwise') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.convolution2d(conv4_1, 32, [1, 1], stride=1, activation_fn=None, scope='conv4_1/linear') print(conv4_1.name, conv4_1.get_shape()) conv4_2 = slim.convolution2d(conv4_1, 192, [1, 1], stride=1, scope='conv4_2/expand') print(conv4_2.name, conv4_2.get_shape()) conv4_2 = slim.separable_convolution2d(conv4_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_2/dwise') print(conv4_2.name, conv4_2.get_shape()) conv4_2 = slim.convolution2d(conv4_2, 32, [1, 1], stride=1, activation_fn=None, scope='conv4_2/linear') print(conv4_2.name, conv4_2.get_shape()) block_4_2 = conv4_1 + conv4_2 print(block_4_2.name, block_4_2.get_shape()) conv4_3 = slim.convolution2d(block_4_2, 192, [1, 1], stride=1, scope='conv4_3/expand') print(conv4_3.name, conv4_3.get_shape()) conv4_3 = slim.separable_convolution2d(conv4_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_3/dwise') print(conv4_3.name, conv4_3.get_shape()) conv4_3 = slim.convolution2d(conv4_3, 32, [1, 1], stride=1, activation_fn=None, scope='conv4_3/linear') print(conv4_3.name, conv4_3.get_shape()) block_4_3 = block_4_2 + conv4_3 print(block_4_3.name, block_4_3.get_shape()) # 12*12*32 14*14*32 features['feature4'] = block_4_3 conv5_1 = slim.convolution2d(block_4_3, 192, [1, 1], stride=1, scope='conv5_1/expand') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.separable_convolution2d(conv5_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_1/dwise') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.convolution2d(conv5_1, 64, [1, 1], stride=1, activation_fn=None, scope='conv5_1/linear') print(conv5_1.name, conv5_1.get_shape()) conv5_2 = slim.convolution2d(conv5_1, 384, [1, 1], stride=1, scope='conv5_2/expand') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.separable_convolution2d(conv5_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_2/dwise') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.convolution2d(conv5_2, 64, [1, 1], stride=1, activation_fn=None, scope='conv5_2/linear') print(conv5_2.name, conv5_2.get_shape()) block_5_2 = conv5_1 + conv5_2 print(block_5_2.name, block_5_2.get_shape()) conv5_3 = slim.convolution2d(block_5_2, 384, [1, 1], stride=1, scope='conv5_3/expand') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.separable_convolution2d(conv5_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_3/dwise') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.convolution2d(conv5_3, 64, [1, 1], stride=1, activation_fn=None, scope='conv5_3/linear') print(conv5_3.name, conv5_3.get_shape()) block_5_3 = block_5_2 + conv5_3 print(block_5_3.name, block_5_3.get_shape()) conv5_4 = slim.convolution2d(block_5_3, 384, [1, 1], stride=1, scope='conv5_4/expand') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.separable_convolution2d(conv5_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_4/dwise') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.convolution2d(conv5_4, 64, [1, 1], stride=1, activation_fn=None, scope='conv5_4/linear') print(conv5_4.name, conv5_4.get_shape()) block_5_4 = block_5_3 + conv5_4 print(block_5_4.name, block_5_4.get_shape()) # 6*6*64 7*7*64 conv6_1 = slim.convolution2d(block_5_4, 384, [1, 1], stride=1, scope='conv6_1/expand') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.separable_convolution2d(conv6_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_1/dwise') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.convolution2d(conv6_1, 96, [1, 1], stride=1, activation_fn=None, scope='conv6_1/linear') print(conv6_1.name, conv6_1.get_shape()) conv6_2 = slim.convolution2d(conv6_1, 576, [1, 1], stride=1, scope='conv6_2/expand') print(conv6_2.name, conv6_2.get_shape()) conv6_2 = slim.separable_convolution2d(conv6_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_2/dwise') print(conv6_2.name, conv6_2.get_shape()) conv6_2 = slim.convolution2d(conv6_2, 96, [1, 1], stride=1, activation_fn=None, scope='conv6_2/linear') print(conv6_2.name, conv6_2.get_shape()) block_6_2 = conv6_1 + conv6_2 print(block_6_2.name, block_6_2.get_shape()) conv6_3 = slim.convolution2d(block_6_2, 576, [1, 1], stride=1, scope='conv6_3/expand') print(conv6_3.name, conv6_3.get_shape()) conv6_3 = slim.separable_convolution2d(conv6_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_3/dwise') print(conv6_3.name, conv6_3.get_shape()) conv6_3 = slim.convolution2d(conv6_3, 96, [1, 1], stride=1, activation_fn=None, scope='conv6_3/linear') print(conv6_3.name, conv6_3.get_shape()) block_6_3 = block_6_2 + conv6_3 print(block_6_3.name, block_6_3.get_shape()) features['feature5'] = block_6_3 # 6*6*96 7*7*96 conv7_1 = slim.convolution2d(block_6_3, 576, [1, 1], stride=1, scope='conv7_1/expand') print(conv7_1.name, conv7_1.get_shape()) conv7_1 = slim.separable_convolution2d(conv7_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_1/dwise') print(conv7_1.name, conv7_1.get_shape()) conv7_1 = slim.convolution2d(conv7_1, 160, [1, 1], stride=1, activation_fn=None, scope='conv7_1/linear') print(conv7_1.name, conv7_1.get_shape()) conv7_2 = slim.convolution2d(conv7_1, 960, [1, 1], stride=1, scope='conv7_2/expand') print(conv7_2.name, conv7_2.get_shape()) conv7_2 = slim.separable_convolution2d(conv7_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_2/dwise') print(conv7_2.name, conv7_2.get_shape()) conv7_2 = slim.convolution2d(conv7_2, 160, [1, 1], stride=1, activation_fn=None, scope='conv7_2/linear') print(conv7_2.name, conv7_2.get_shape()) block_7_2 = conv7_1 + conv7_2 print(block_7_2.name, block_7_2.get_shape()) conv7_3 = slim.convolution2d(block_7_2, 960, [1, 1], stride=1, scope='conv7_3/expand') print(conv7_3.name, conv7_3.get_shape()) conv7_3 = slim.separable_convolution2d(conv7_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_3/dwise') print(conv7_3.name, conv7_3.get_shape()) conv7_3 = slim.convolution2d(conv7_3, 160, [1, 1], stride=1, activation_fn=None, scope='conv7_3/linear') print(conv7_3.name, conv7_3.get_shape()) block_7_3 = block_7_2 + conv7_3 print(block_7_3.name, block_7_3.get_shape()) conv7_4 = slim.convolution2d(block_7_3, 960, [1, 1], stride=1, scope='conv7_4/expand') print(conv7_4.name, conv7_4.get_shape()) conv7_4 = slim.separable_convolution2d(conv7_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_4/dwise') print(conv7_4.name, conv7_4.get_shape()) conv7_4 = slim.convolution2d(conv7_4, 320, [1, 1], stride=1, activation_fn=None, scope='conv7_4/linear') print(conv7_4.name, conv7_4.get_shape()) features['feature6'] = conv7_4 return features
def build_graph(reader, model, train_data_pattern, label_loss_fn=losses.CrossEntropyLoss(), batch_size=1000, base_learning_rate=0.01, learning_rate_decay_examples=1000000, learning_rate_decay=0.95, optimizer_class=tf.train.AdamOptimizer, clip_gradient_norm=1.0, regularization_penalty=1, num_readers=1, num_epochs=None): """Creates the Tensorflow graph. This will only be called once in the life of a training model, because after the graph is created the model will be restored from a meta graph file rather than being recreated. Args: reader: The data file reader. It should inherit from BaseReader. model: The core model (e.g. logistic or neural net). It should inherit from BaseModel. train_data_pattern: glob path to the training data files. label_loss_fn: What kind of loss to apply to the model. It should inherit from BaseLoss. batch_size: How many examples to process at a time. base_learning_rate: What learning rate to initialize the optimizer with. optimizer_class: Which optimization algorithm to use. clip_gradient_norm: Magnitude of the gradient to clip to. regularization_penalty: How much weight to give the regularization loss compared to the label loss. num_readers: How many threads to use for I/O operations. num_epochs: How many passes to make over the data. 'None' means an unlimited number of passes. """ global_step = tf.Variable(0, trainable=False, name="global_step") local_device_protos = device_lib.list_local_devices() gpus = [x.name for x in local_device_protos if x.device_type == 'GPU'] num_gpus = len(gpus) if num_gpus > 0: logging.info("Using the following GPUs to train: " + str(gpus)) num_towers = num_gpus device_string = '/gpu:%d' else: logging.info("No GPUs found. Training on CPU.") num_towers = 1 device_string = '/cpu:%d' learning_rate = tf.train.exponential_decay(base_learning_rate, global_step * batch_size * num_towers, learning_rate_decay_examples, learning_rate_decay, staircase=True) tf.summary.scalar('learning_rate', learning_rate) optimizer = optimizer_class(learning_rate) unused_video_id, model_input_raw, labels_batch, num_frames = ( get_input_data_tensors(reader, train_data_pattern, batch_size=batch_size * num_towers, num_readers=num_readers, num_epochs=num_epochs)) tf.summary.histogram("model/input_raw", model_input_raw) feature_dim = len(model_input_raw.get_shape()) - 1 model_input = tf.nn.l2_normalize(model_input_raw, feature_dim) tower_inputs = tf.split(model_input, num_towers) tower_labels = tf.split(labels_batch, num_towers) tower_num_frames = tf.split(num_frames, num_towers) tower_gradients = [] tower_predictions = [] tower_label_losses = [] tower_reg_losses = [] # PRCCConcat phase = tf.constant(True) # for i in range(num_towers): # For some reason these 'with' statements can't be combined onto the same # line. They have to be nested. with tf.device(device_string % i): with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)): with (slim.arg_scope( [slim.model_variable, slim.variable], device="/cpu:0" if num_gpus != 1 else "/gpu:0")): result = model.create_model( tower_inputs[i], num_frames=tower_num_frames[i], vocab_size=reader.num_classes, labels=tower_labels[i], # PRCCConcat is_training=phase) #) for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) predictions = result["predictions"] tower_predictions.append(predictions) if "loss" in result.keys(): label_loss = result["loss"] else: label_loss = label_loss_fn.calculate_loss( predictions, tower_labels[i]) if "regularization_loss" in result.keys(): reg_loss = result["regularization_loss"] else: reg_loss = tf.constant(0.0) reg_losses = tf.losses.get_regularization_losses() if reg_losses: reg_loss += tf.add_n(reg_losses) tower_reg_losses.append(reg_loss) # Adds update_ops (e.g., moving average updates in batch normalization) as # a dependency to the train_op. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if "update_ops" in result.keys(): update_ops += result["update_ops"] if update_ops: with tf.control_dependencies(update_ops): barrier = tf.no_op(name="gradient_barrier") with tf.control_dependencies([barrier]): label_loss = tf.identity(label_loss) tower_label_losses.append(label_loss) # Incorporate the L2 weight penalties etc. final_loss = regularization_penalty * reg_loss + label_loss gradients = optimizer.compute_gradients( final_loss, colocate_gradients_with_ops=False) tower_gradients.append(gradients) label_loss = tf.reduce_mean(tf.stack(tower_label_losses)) tf.summary.scalar("label_loss", label_loss) if regularization_penalty != 0: reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses)) tf.summary.scalar("reg_loss", reg_loss) merged_gradients = utils.combine_gradients(tower_gradients) if clip_gradient_norm > 0: with tf.name_scope('clip_grads'): merged_gradients = utils.clip_gradient_norms( merged_gradients, clip_gradient_norm) train_op = optimizer.apply_gradients(merged_gradients, global_step=global_step) tf.add_to_collection("global_step", global_step) tf.add_to_collection("loss", label_loss) tf.add_to_collection("predictions", tf.concat(tower_predictions, 0)) tf.add_to_collection("input_batch_raw", model_input_raw) tf.add_to_collection("input_batch", model_input) tf.add_to_collection("num_frames", num_frames) tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32)) tf.add_to_collection("train_op", train_op) # PRCCConcat tf.add_to_collection("phase", phase)
def create_architecture(self, mode, num_classes, tag=None, anchor_scales=(8, 16, 32), anchor_ratios=(0.5, 1, 2)): self._image = tf.placeholder(tf.float32, shape=[1, None, None, 3 + 18]) self._im_info = tf.placeholder(tf.float32, shape=[3]) self._gt_boxes = tf.placeholder(tf.float32, shape=[None, 5]) self._tag = tag self._num_classes = num_classes self._mode = mode self._anchor_scales = anchor_scales self._num_scales = len(anchor_scales) self._anchor_ratios = anchor_ratios self._num_ratios = len(anchor_ratios) self._num_anchors = self._num_scales * self._num_ratios training = mode == 'TRAIN' testing = mode == 'TEST' print('Training', training, 'Testing', testing) assert tag != None # handle most of the regularizers here weights_regularizer = tf.contrib.layers.l2_regularizer( cfg.TRAIN.WEIGHT_DECAY) if cfg.TRAIN.BIAS_DECAY: biases_regularizer = weights_regularizer else: biases_regularizer = tf.no_regularizer # list as many types of layers as possible, even if they are not used now with arg_scope([slim.conv2d, slim.conv2d_in_plane, \ slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): rois, cls_prob, bbox_pred = self._build_network(training) layers_to_output = {'rois': rois} for var in tf.trainable_variables(): self._train_summaries.append(var) if testing: stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (self._num_classes)) means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (self._num_classes)) self._predictions["bbox_pred"] *= stds self._predictions["bbox_pred"] += means else: self._add_losses() layers_to_output.update(self._losses) val_summaries = [] with tf.device("/cpu:0"): # val_summaries.append(self._add_gt_image_summary()) val_summaries.extend(list(self._add_gt_image_summary())) for key, var in self._event_summaries.items(): val_summaries.append(tf.summary.scalar(key, var)) for key, var in self._score_summaries.items(): self._add_score_summary(key, var) for var in self._act_summaries: self._add_act_summary(var) for var in self._train_summaries: self._add_train_summary(var) self._summary_op = tf.summary.merge_all() self._summary_op_val = tf.summary.merge(val_summaries) layers_to_output.update(self._predictions) return layers_to_output
def STbaseline(inputs, outputs, loss_weight, labels): """ Spatial stream based on VGG16 Temporal stream based on Flownet simple """ # Mean subtraction (BGR) for flying chairs mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean") # tf.tile(mean, [4,192,256,1]) inputs = inputs - mean outputs = outputs - mean # Scaling to 0 ~ 1 or -0.4 ~ 0.6? inputs = tf.truediv(inputs, 255.0) outputs = tf.truediv(outputs, 255.0) # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7) outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.elu): # original use leaky ReLU, now we use elu # Contracting part Tconv1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [7, 7], stride=2, scope='Tconv1') Tconv2 = slim.conv2d(Tconv1, 128, [5, 5], stride=2, scope='Tconv2') Tconv3_1 = slim.conv2d(Tconv2, 256, [5, 5], stride=2, scope='Tconv3_1') Tconv3_2 = slim.conv2d(Tconv3_1, 256, [3, 3], scope='Tconv3_2') Tconv4_1 = slim.conv2d(Tconv3_2, 512, [3, 3], stride=2, scope='Tconv4_1') Tconv4_2 = slim.conv2d(Tconv4_1, 512, [3, 3], scope='Tconv4_2') Tconv5_1 = slim.conv2d(Tconv4_2, 512, [3, 3], stride=2, scope='Tconv5_1') Tconv5_2 = slim.conv2d(Tconv5_1, 512, [3, 3], scope='Tconv5_2') Tconv6_1 = slim.conv2d(Tconv5_2, 1024, [3, 3], stride=2, scope='Tconv6_1') Tconv6_2 = slim.conv2d(Tconv6_1, 1024, [3, 3], scope='Tconv6_2') # Hyper-params for computing unsupervised loss epsilon = 0.0001 alpha_c = 0.25 alpha_s = 0.37 lambda_smooth = 1.0 FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights") scale = 2 # for deconvolution # Expanding part pr6 = slim.conv2d(Tconv6_2, 2, [3, 3], activation_fn=None, scope='pr6') h6 = pr6.get_shape()[1].value w6 = pr6.get_shape()[2].value pr6_input = tf.image.resize_bilinear(inputs_norm, [h6, w6]) pr6_output = tf.image.resize_bilinear(outputs_norm, [h6, w6]) flow_scale_6 = 0.3125 # (*20/64) loss6, _ = loss_interp(pr6, pr6_input, pr6_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_6, FlowDeltaWeights) upconv5 = slim.conv2d_transpose(Tconv6_2, 512, [2*scale, 2*scale], stride=scale, scope='upconv5') pr6to5 = slim.conv2d_transpose(pr6, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr6to5') concat5 = tf.concat(3, [Tconv5_2, upconv5, pr6to5]) pr5 = slim.conv2d(concat5, 2, [3, 3], activation_fn=None, scope='pr5') h5 = pr5.get_shape()[1].value w5 = pr5.get_shape()[2].value pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5]) pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5]) flow_scale_5 = 0.625 # (*20/32) loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights) upconv4 = slim.conv2d_transpose(concat5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4') pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4') concat4 = tf.concat(3, [Tconv4_2, upconv4, pr5to4]) pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4') h4 = pr4.get_shape()[1].value w4 = pr4.get_shape()[2].value pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4]) pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4]) flow_scale_4 = 1.25 # (*20/16) loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights) upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3') pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3') concat3 = tf.concat(3, [Tconv3_2, upconv3, pr4to3]) pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3') h3 = pr3.get_shape()[1].value w3 = pr3.get_shape()[2].value pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3]) pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3]) flow_scale_3 = 2.5 # (*20/8) loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights) upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2') pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2') concat2 = tf.concat(3, [Tconv2, upconv2, pr3to2]) pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2') h2 = pr2.get_shape()[1].value w2 = pr2.get_shape()[2].value pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2]) pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2]) flow_scale_2 = 5.0 # (*20/4) loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights) upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1') pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1') concat1 = tf.concat(3, [Tconv1, upconv1, pr2to1]) pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1') h1 = pr1.get_shape()[1].value w1 = pr1.get_shape()[2].value pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1]) pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1]) flow_scale_1 = 10.0 # (*20/2) loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights) with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): # conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1') conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1') conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2') pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1') conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1') conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2') pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2') conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1') conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2') conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3') pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3') conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1') conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2') conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3') pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4') conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1') conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2') conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3') pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5') # Incorporate temporal feature concatST = tf.concat(3, [pool5, Tconv5_2]) poolST = slim.max_pool2d(concatST, [2, 2]) # print poolST.get_shape() concat2ST = tf.concat(3, [poolST, Tconv6_2]) # print concat2ST.get_shape() concatDR = slim.conv2d(concat2ST, 512, [1, 1]) # print concatDR.get_shape() flatten5 = slim.flatten(concatDR, scope='flatten5') fc6 = slim.fully_connected(flatten5, 4096, scope='fc6') dropout6 = slim.dropout(fc6, 0.9, scope='dropout6') fc7 = slim.fully_connected(dropout6, 4096, scope='fc7') dropout7 = slim.dropout(fc7, 0.9, scope='dropout7') fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8') prob = tf.nn.softmax(fc8) actionPredictions = tf.argmax(prob, 1) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels) actionLoss = tf.reduce_mean(cross_entropy) # Adding intermediate losses all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \ loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[5]*loss6["total"] + \ loss_weight[0]*actionLoss slim.losses.add_loss(all_loss) losses = [loss1, loss2, loss3, loss4, loss5, loss6, actionLoss] # pr1 = tf.mul(tf.constant(20.0), pr1) flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5, pr6*flow_scale_6] predictions = [prev1, actionPredictions] return losses, flows_all, predictions