def get_real_model(self): """Get real model of initializer.""" if self.model: return self.model else: if self.type == 'truncated_normal_initializer': self.model = tf.truncated_normal_initializer( mean=self.mean, stddev=self.stddev) elif self.type == 'random_normal_initializer': self.model = tf.random_normal_initializer(mean=self.mean, stddev=self.stddev) elif self.type == 'variance_scaling_initializer': enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer. DESCRIPTOR.enum_types_by_name['Mode']) mode = self.mode if mode == 'FAN_IN': mode = 0 elif mode == 'FAN_OUT': mode = 1 elif mode == 'FAN_AVG': mode = 2 mode = enum_descriptor.values_by_number[mode].name self.mode = slim.variance_scaling_initializer( factor=self.factor, mode=mode, uniform=self.uniform) else: self.model = None raise ValueError('Unknown initializer type: {}'.format( self.type)) return self.model
def get_initializer(desc): """Get initializer function.""" mean = desc.mean if 'mean' in desc else 0.0 stddev = desc.stddev if 'stddev' in desc else 0.01 if desc.type == 'truncated_normal_initializer': return tf.truncated_normal_initializer(mean=mean, stddev=stddev) elif desc.type == 'random_normal_initializer': return tf.random_normal_initializer(mean=mean, stddev=stddev) elif desc.type == 'variance_scaling_initializer': enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer. DESCRIPTOR.enum_types_by_name['Mode']) mode = desc.mode if mode == 'FAN_IN': mode = 0 elif mode == 'FAN_OUT': mode = 1 elif mode == 'FAN_AVG': mode = 2 mode = enum_descriptor.values_by_number[mode].name return slim.variance_scaling_initializer(factor=desc.factor, mode=mode, uniform=desc.uniform) else: raise ValueError('Unknown initializer type: {}'.format(desc.type))
def nasnet_large_arg_scope(weight_decay=5e-5, batch_norm_decay=0.9997, batch_norm_epsilon=1e-3): """Defines the default arg scope for the NASNet-A Large ImageNet model. Args: weight_decay: The weight decay to use for regularizing the model. batch_norm_decay: Decay for batch norm moving average. batch_norm_epsilon: Small float added to variance to avoid dividing by zero in batch norm. Returns: An `arg_scope` to use for the NASNet Large Model. """ batch_norm_params = { # Decay for the moving averages. 'decay': batch_norm_decay, # epsilon to prevent 0s in variance. 'epsilon': batch_norm_epsilon, 'scale': True, 'fused': True, } weights_regularizer = slim.l2_regularizer(weight_decay) weights_initializer = slim.variance_scaling_initializer(mode='FAN_OUT') with arg_scope([slim.fully_connected, slim.conv2d, slim.separable_conv2d], weights_regularizer=weights_regularizer, weights_initializer=weights_initializer): with arg_scope([slim.fully_connected], activation_fn=None, scope='FC'): with arg_scope([slim.conv2d, slim.separable_conv2d], activation_fn=None, biases_initializer=None): with arg_scope([slim.batch_norm], **batch_norm_params) as sc: return sc
def resnet_arg_scope( weight_decay=0.0001, batch_norm_decay=0.997, batch_norm_epsilon=1e-5, batch_norm_scale=True, activation_fn=tf.nn.relu, use_batch_norm=True, batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS): """Defines the default ResNet arg scope. TODO(gpapan): The batch-normalization related default values above are appropriate for use in conjunction with the reference ResNet models released at https://github.com/KaimingHe/deep-residual-networks. When training ResNets from scratch, they might need to be tuned. Args: weight_decay: The weight decay to use for regularizing the model. batch_norm_decay: The moving average decay when estimating layer activation statistics in batch normalization. batch_norm_epsilon: Small constant to prevent division by zero when normalizing activations by their variance in batch normalization. batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the activations in the batch normalization layer. activation_fn: The activation function which is used in ResNet. use_batch_norm: Whether or not to use batch normalization. batch_norm_updates_collections: Collection for the update ops for batch norm. Returns: An `arg_scope` to use for the resnet152 models. """ batch_norm_params = { 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale, 'updates_collections': batch_norm_updates_collections, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope( [slim.conv2d], weights_regularizer=slim.l2_regularizer(weight_decay), weights_initializer=slim.variance_scaling_initializer(), activation_fn=activation_fn, normalizer_fn=slim.batch_norm if use_batch_norm else None, normalizer_params=batch_norm_params): with slim.arg_scope([slim.batch_norm], **batch_norm_params): # The following implies padding='SAME' for pool1, which makes feature # alignment easier for dense prediction tasks. This is also used in # https://github.com/facebook/fb.resnet.torch. However the accompanying # code of 'Deep Residual Learning for Image Recognition' uses # padding='VALID' for pool1. You can switch to that choice by setting # slim.arg_scope([slim.max_pool2d], padding='VALID'). with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: return arg_sc
def attention_inception_v3_arg_scope( weight_decay=0.00004, use_batch_norm=True, batch_norm_decay=0.9997, batch_norm_epsilon=0.001, activation_fn=tf.nn.relu, batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS, batch_norm_scale=False): """Defines the default arg scope for inception models. Args: weight_decay: The weight decay to use for regularizing the model. use_batch_norm: "If `True`, batch_norm is applied after each convolution. batch_norm_decay: Decay for batch norm moving average. batch_norm_epsilon: Small float added to variance to avoid dividing by zero in batch norm. activation_fn: Activation function for conv2d. batch_norm_updates_collections: Collection for the update ops for batch norm. batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the activations in the batch normalization layer. Returns: An `arg_scope` to use for the inception models. """ batch_norm_params = { # Decay for the moving averages. 'decay': batch_norm_decay, # epsilon to prevent 0s in variance. 'epsilon': batch_norm_epsilon, # collection containing update_ops. 'updates_collections': batch_norm_updates_collections, # use fused batch norm if possible. 'fused': None, 'scale': batch_norm_scale, } if use_batch_norm: normalizer_fn = slim.batch_norm normalizer_params = batch_norm_params else: normalizer_fn = None normalizer_params = {} # Set weight_decay for weights in Conv and FC layers. with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_regularizer=slim.l2_regularizer(weight_decay)): with slim.arg_scope( [slim.conv2d], weights_initializer=slim.variance_scaling_initializer(), activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params) as sc: return sc
def resnet_arg_scope(): batch_norm_params = dict(decay=0.997, epsilon=1e-5, scale=True, is_training=tfu.is_training(), fused=True, data_format=tfu.data_format()) with slim.arg_scope( [slim.conv2d, slim.conv3d], weights_regularizer=slim.l2_regularizer(1e-4), weights_initializer=slim.variance_scaling_initializer(), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with slim.arg_scope([slim.batch_norm], **batch_norm_params): with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: return arg_sc
def conv_net(inputs, hparams): """Builds the ConvNet from Kelz 2016.""" with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=slim.variance_scaling_initializer( factor=2.0, mode='FAN_AVG', uniform=True)): net = inputs i = 0 for (conv_temporal_size, conv_freq_size, num_filters, freq_pool_size, dropout_amt) in zip( hparams.temporal_sizes, hparams.freq_sizes, hparams.num_filters, hparams.pool_sizes, hparams.dropout_keep_amts): net = slim.conv2d( net, num_filters, [conv_temporal_size, conv_freq_size], scope='conv' + str(i), normalizer_fn=slim.batch_norm) if freq_pool_size > 1: net = slim.max_pool2d( net, [1, freq_pool_size], stride=[1, freq_pool_size], scope='pool' + str(i)) if dropout_amt < 1: net = slim.dropout(net, dropout_amt, scope='dropout' + str(i)) i += 1 # Flatten while preserving batch and time dimensions. dims = tf.shape(net) net = tf.reshape( net, (dims[0], dims[1], net.shape[2] * net.shape[3]), 'flatten_end') net = slim.fully_connected(net, hparams.fc_size, scope='fc_end') net = slim.dropout(net, hparams.fc_dropout_keep_amt, scope='dropout_end') return net
def _build_initializer(initializer): """Build a tf initializer from config. Args: initializer: hyperparams_pb2.Hyperparams.regularizer proto. Returns: tf initializer. Raises: ValueError: On unknown initializer. """ initializer_oneof = initializer.WhichOneof('initializer_oneof') if initializer_oneof == 'truncated_normal_initializer': return tf.truncated_normal_initializer( mean=initializer.truncated_normal_initializer.mean, stddev=initializer.truncated_normal_initializer.stddev) if initializer_oneof == 'random_normal_initializer': return tf.random_normal_initializer( mean=initializer.random_normal_initializer.mean, stddev=initializer.random_normal_initializer.stddev) if initializer_oneof == 'variance_scaling_initializer': enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer. DESCRIPTOR.enum_types_by_name['Mode']) mode = enum_descriptor.values_by_number[ initializer.variance_scaling_initializer.mode].name return slim.variance_scaling_initializer( factor=initializer.variance_scaling_initializer.factor, mode=mode, uniform=initializer.variance_scaling_initializer.uniform) if initializer_oneof == 'glorot_normal_initializer': return tf.glorot_normal_initializer() if initializer_oneof == 'glorot_uniform_initializer': return tf.glorot_uniform_initializer() raise ValueError( 'Unknown initializer function: {}'.format(initializer_oneof))
def _build_initializer(initializer, build_for_keras=False): """Build a tf initializer from config. Args: initializer: hyperparams_pb2.Hyperparams.regularizer proto. build_for_keras: Whether the initializers should be built for Keras operators. If false builds for Slim. Returns: tf initializer. Raises: ValueError: On unknown initializer. """ initializer_oneof = initializer.WhichOneof('initializer_oneof') if initializer_oneof == 'truncated_normal_initializer': return tf.truncated_normal_initializer( mean=initializer.truncated_normal_initializer.mean, stddev=initializer.truncated_normal_initializer.stddev) if initializer_oneof == 'random_normal_initializer': return tf.random_normal_initializer( mean=initializer.random_normal_initializer.mean, stddev=initializer.random_normal_initializer.stddev) if initializer_oneof == 'variance_scaling_initializer': enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer. DESCRIPTOR.enum_types_by_name['Mode']) mode = enum_descriptor.values_by_number[ initializer.variance_scaling_initializer.mode].name if build_for_keras: if initializer.variance_scaling_initializer.uniform: return tf.variance_scaling_initializer( scale=initializer.variance_scaling_initializer.factor, mode=mode.lower(), distribution='uniform') else: # In TF 1.9 release and earlier, the truncated_normal distribution was # not supported correctly. So, in these earlier versions of tensorflow, # the ValueError will be raised, and we manually truncate the # distribution scale. # # It is insufficient to just set distribution to `normal` from the # start, because the `normal` distribution in newer Tensorflow versions # creates a truncated distribution, whereas it created untruncated # distributions in older versions. try: return tf.variance_scaling_initializer( scale=initializer.variance_scaling_initializer.factor, mode=mode.lower(), distribution='truncated_normal') except ValueError: truncate_constant = 0.87962566103423978 truncated_scale = initializer.variance_scaling_initializer.factor / ( truncate_constant * truncate_constant) return tf.variance_scaling_initializer( scale=truncated_scale, mode=mode.lower(), distribution='normal') else: return slim.variance_scaling_initializer( factor=initializer.variance_scaling_initializer.factor, mode=mode, uniform=initializer.variance_scaling_initializer.uniform) if initializer_oneof is None: return None raise ValueError( 'Unknown initializer function: {}'.format(initializer_oneof))
def discriminator(x, progress, num_filters_fn, resolution_schedule, num_blocks=None, kernel_size=3, simple_arch=False, scope='progressive_gan_discriminator', reuse=None): """Discriminator network for the progressive GAN model. Args: x: A `Tensor`of NHWC format representing images of size `resolution`. progress: A scalar float `Tensor` of training progress. num_filters_fn: A function that maps `block_id` to # of filters for the block. resolution_schedule: An object of `ResolutionSchedule`. num_blocks: An integer of number of blocks. None means maximum number of blocks, i.e. `resolution.schedule.num_resolutions`. Defaults to None. kernel_size: An integer of convolution kernel size. simple_arch: Bool, use a simple architecture. scope: A string or variable scope. reuse: Whether to reuse `scope`. Defaults to None which means to inherit the reuse option of the parent scope. Returns: A `Tensor` of model output and a dictionary of model end points. """ he_init = tf_slim.variance_scaling_initializer() if num_blocks is None: num_blocks = resolution_schedule.num_resolutions def _conv2d(scope, x, kernel_size, filters, padding='SAME'): return layers.custom_conv2d( x=x, filters=filters, kernel_size=kernel_size, padding=padding, activation=tf.nn.leaky_relu, he_initializer_slope=0.0, scope=scope) def _from_rgb(x, block_id): return _conv2d('from_rgb', x, 1, num_filters_fn(block_id)) if resolution_schedule.scale_mode == 'H': strides = (resolution_schedule.scale_base, 1) else: strides = (resolution_schedule.scale_base, resolution_schedule.scale_base) end_points = {} with tf.variable_scope(scope, reuse=reuse): x0 = x end_points['rgb'] = x0 lods = [] for block_id in range(num_blocks, 0, -1): with tf.variable_scope(block_name(block_id)): scale = resolution_schedule.scale_factor(block_id) lod = resolution_schedule.downscale(x0, scale) end_points['downscaled_rgb_{}'.format(block_id)] = lod if simple_arch: lod = tf.layers.conv2d( lod, num_filters_fn(block_id), kernel_size=1, padding='SAME', name='from_rgb', kernel_initializer=he_init) lod = tf.nn.relu(lod) else: lod = _from_rgb(lod, block_id) # alpha_i is used to replace lod_select. alpha = _discriminator_alpha(block_id, progress) end_points['alpha_{}'.format(block_id)] = alpha lods.append((lod, alpha)) lods_iter = iter(lods) x, _ = next(lods_iter) for block_id in range(num_blocks, 1, -1): with tf.variable_scope(block_name(block_id)): if simple_arch: x = tf.layers.conv2d( x, num_filters_fn(block_id-1), strides=strides, kernel_size=kernel_size, padding='SAME', name='conv', kernel_initializer=he_init) x = tf.nn.relu(x) else: x = _conv2d('conv0', x, kernel_size, num_filters_fn(block_id)) x = _conv2d('conv1', x, kernel_size, num_filters_fn(block_id - 1)) x = resolution_schedule.downscale(x, resolution_schedule.scale_base) lod, alpha = next(lods_iter) x = alpha * lod + (1.0 - alpha) * x with tf.variable_scope(block_name(1)): x = layers.scalar_concat(x, layers.minibatch_mean_stddev(x)) if simple_arch: x = tf.reshape(x, [tf.shape(x)[0], -1]) # flatten x = tf.layers.dense(x, num_filters_fn(0), name='last_conv', kernel_initializer=he_init) x = tf.reshape(x, [tf.shape(x)[0], 1, 1, num_filters_fn(0)]) x = tf.nn.relu(x) else: x = _conv2d('conv0', x, kernel_size, num_filters_fn(1)) x = _conv2d('conv1', x, resolution_schedule.start_resolutions, num_filters_fn(0), 'VALID') end_points['last_conv'] = x if simple_arch: logits = tf.layers.dense(x, 1, name='logits', kernel_initializer=he_init) else: logits = layers.custom_dense(x=x, units=1, scope='logits') end_points['logits'] = logits return logits, end_points
def generator(z, progress, num_filters_fn, resolution_schedule, num_blocks=None, kernel_size=3, colors=3, to_rgb_activation=None, simple_arch=False, scope='progressive_gan_generator', reuse=None): """Generator network for the progressive GAN model. Args: z: A `Tensor` of latent vector. The first dimension must be batch size. progress: A scalar float `Tensor` of training progress. num_filters_fn: A function that maps `block_id` to # of filters for the block. resolution_schedule: An object of `ResolutionSchedule`. num_blocks: An integer of number of blocks. None means maximum number of blocks, i.e. `resolution.schedule.num_resolutions`. Defaults to None. kernel_size: An integer of convolution kernel size. colors: Number of output color channels. Defaults to 3. to_rgb_activation: Activation function applied when output rgb. simple_arch: Architecture variants for lower memory usage and faster speed scope: A string or variable scope. reuse: Whether to reuse `scope`. Defaults to None which means to inherit the reuse option of the parent scope. Returns: A `Tensor` of model output and a dictionary of model end points. """ if num_blocks is None: num_blocks = resolution_schedule.num_resolutions start_h, start_w = resolution_schedule.start_resolutions final_h, final_w = resolution_schedule.final_resolutions def _conv2d(scope, x, kernel_size, filters, padding='SAME'): return layers.custom_conv2d( x=x, filters=filters, kernel_size=kernel_size, padding=padding, activation=lambda x: layers.pixel_norm(tf.nn.leaky_relu(x)), he_initializer_slope=0.0, scope=scope) def _to_rgb(x): return layers.custom_conv2d( x=x, filters=colors, kernel_size=1, padding='SAME', activation=to_rgb_activation, scope='to_rgb') he_init = tf_slim.variance_scaling_initializer() end_points = {} with tf.variable_scope(scope, reuse=reuse): with tf.name_scope('input'): x = tf_slim.flatten(z) end_points['latent_vector'] = x with tf.variable_scope(block_name(1)): if simple_arch: x_shape = tf.shape(x) x = tf.layers.dense(x, start_h*start_w*num_filters_fn(1), kernel_initializer=he_init) x = tf.nn.relu(x) x = tf.reshape(x, [x_shape[0], start_h, start_w, num_filters_fn(1)]) else: x = tf.expand_dims(tf.expand_dims(x, 1), 1) x = layers.pixel_norm(x) # Pad the 1 x 1 image to 2 * (start_h - 1) x 2 * (start_w - 1) # with zeros for the next conv. x = tf.pad(x, [[0] * 2, [start_h - 1] * 2, [start_w - 1] * 2, [0] * 2]) # The output is start_h x start_w x num_filters_fn(1). x = _conv2d('conv0', x, (start_h, start_w), num_filters_fn(1), 'VALID') x = _conv2d('conv1', x, kernel_size, num_filters_fn(1)) lods = [x] if resolution_schedule.scale_mode == 'H': strides = (resolution_schedule.scale_base, 1) else: strides = (resolution_schedule.scale_base, resolution_schedule.scale_base) for block_id in range(2, num_blocks + 1): with tf.variable_scope(block_name(block_id)): if simple_arch: x = tf.layers.conv2d_transpose( x, num_filters_fn(block_id), kernel_size=kernel_size, strides=strides, padding='SAME', kernel_initializer=he_init) x = tf.nn.relu(x) else: x = resolution_schedule.upscale(x, resolution_schedule.scale_base) x = _conv2d('conv0', x, kernel_size, num_filters_fn(block_id)) x = _conv2d('conv1', x, kernel_size, num_filters_fn(block_id)) lods.append(x) outputs = [] for block_id in range(1, num_blocks + 1): with tf.variable_scope(block_name(block_id)): if simple_arch: lod = lods[block_id - 1] lod = tf.layers.conv2d( lod, colors, kernel_size=1, padding='SAME', name='to_rgb', kernel_initializer=he_init) lod = to_rgb_activation(lod) else: lod = _to_rgb(lods[block_id - 1]) scale = resolution_schedule.scale_factor(block_id) lod = resolution_schedule.upscale(lod, scale) end_points['upscaled_rgb_{}'.format(block_id)] = lod # alpha_i is used to replace lod_select. Note sum(alpha_i) is # garanteed to be 1. alpha = _generator_alpha(block_id, progress) end_points['alpha_{}'.format(block_id)] = alpha outputs.append(lod * alpha) predictions = tf.add_n(outputs) batch_size = int(z.shape[0]) predictions.set_shape([batch_size, final_h, final_w, colors]) end_points['predictions'] = predictions return predictions, end_points
def convolutional_alexnet_arg_scope(embed_config, trainable=True, is_training=False): """Defines the default arg scope. Args: embed_config: A dictionary which contains configurations for the embedding function. trainable: If the weights in the embedding function is trainable. is_training: If the embedding function is built for training. Returns: An `arg_scope` to use for the convolutional_alexnet models. """ # Only consider the model to be in training mode if it's trainable. # This is vital for batch_norm since moving_mean and moving_variance # will get updated even if not trainable. is_model_training = trainable and is_training if get(embed_config, 'use_bn', True): batch_norm_scale = get(embed_config, 'bn_scale', True) batch_norm_decay = 1 - get(embed_config, 'bn_momentum', 3e-4) batch_norm_epsilon = get(embed_config, 'bn_epsilon', 1e-6) batch_norm_params = { "scale": batch_norm_scale, # Decay for the moving averages. "decay": batch_norm_decay, # Epsilon to prevent 0s in variance. "epsilon": batch_norm_epsilon, "trainable": trainable, "is_training": is_model_training, # Collection containing the moving mean and moving variance. "variables_collections": { "beta": None, "gamma": None, "moving_mean": ["moving_vars"], "moving_variance": ["moving_vars"], }, 'updates_collections': None, # Ensure that updates are done within a frame } normalizer_fn = slim.batch_norm else: batch_norm_params = {} normalizer_fn = None weight_decay = get(embed_config, 'weight_decay', 5e-4) if trainable: weights_regularizer = slim.l2_regularizer(weight_decay) else: weights_regularizer = None init_method = get(embed_config, 'init_method', 'kaiming_normal') if is_model_training: logging.info('embedding init method -- {}'.format(init_method)) if init_method == 'kaiming_normal': # The same setting as siamese-fc initializer = slim.variance_scaling_initializer(factor=2.0, mode='FAN_OUT', uniform=False) else: initializer = slim.xavier_initializer() with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer, weights_initializer=initializer, padding='VALID', trainable=trainable, activation_fn=tf.nn.relu, normalizer_fn=normalizer_fn, normalizer_params=batch_norm_params): with slim.arg_scope([slim.batch_norm], **batch_norm_params): with slim.arg_scope([slim.batch_norm], is_training=True) as arg_sc: return arg_sc
def run(self, inputs, trainable=True): """Runs model.""" _, height, width, _ = inputs["input_a"].shape.as_list() with tf.variable_scope("FlowNetC"): with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], # Only backprop this network if trainable. trainable=trainable, # He (aka MSRA) weight initialization. weights_initializer=slim.variance_scaling_initializer(), activation_fn=leaky_relu, # We will do our own padding to match the original Caffe code. padding="VALID"): weights_regularizer = slim.l2_regularizer(WEIGHT_DECAY) with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): with slim.arg_scope([slim.conv2d], stride=2): conv_a_1 = slim.conv2d(pad(inputs["input_a"], 3), 64, 7, scope="conv1") conv_a_2 = slim.conv2d(pad(conv_a_1, 2), 128, 5, scope="conv2") conv_a_3 = slim.conv2d(pad(conv_a_2, 2), 256, 5, scope="conv3") conv_b_1 = slim.conv2d(pad(inputs["input_b"], 3), 64, 7, scope="conv1", reuse=True) conv_b_2 = slim.conv2d(pad(conv_b_1, 2), 128, 5, scope="conv2", reuse=True) conv_b_3 = slim.conv2d(pad(conv_b_2, 2), 256, 5, scope="conv3", reuse=True) # Compute cross correlation with leaky relu activation. cc = correlation(conv_a_3, conv_b_3, 1, 20, 1, 2, 20) cc_relu = leaky_relu(cc) # Combine cross correlation results with convolution of feature map A. net_a_conv = slim.conv2d(conv_a_3, 32, 1, scope="conv_redir") # Concatenate along the channels axis. net = tf.concat([net_a_conv, cc_relu], axis=3) conv3_1 = slim.conv2d(pad(net), 256, 3, scope="conv3_1") with slim.arg_scope([slim.conv2d], num_outputs=512, kernel_size=3): conv4 = slim.conv2d(pad(conv3_1), stride=2, scope="conv4") conv4_1 = slim.conv2d(pad(conv4), scope="conv4_1") conv5 = slim.conv2d(pad(conv4_1), stride=2, scope="conv5") conv5_1 = slim.conv2d(pad(conv5), scope="conv5_1") conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope="conv6") conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope="conv6_1") # START: Refinement Network. with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None): predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3, scope="predict_flow6", activation_fn=None) deconv5 = antipad( slim.conv2d_transpose(conv6_1, 512, 4, stride=2, scope="deconv5")) upsample_flow6to5 = antipad( slim.conv2d_transpose(predict_flow6, 2, 4, stride=2, scope="upsample_flow6to5", activation_fn=None)) concat5 = tf.concat( [conv5_1, deconv5, upsample_flow6to5], axis=3) predict_flow5 = slim.conv2d(pad(concat5), 2, 3, scope="predict_flow5", activation_fn=None) deconv4 = antipad( slim.conv2d_transpose(concat5, 256, 4, stride=2, scope="deconv4")) upsample_flow5to4 = antipad( slim.conv2d_transpose(predict_flow5, 2, 4, stride=2, scope="upsample_flow5to4", activation_fn=None)) concat4 = tf.concat( [conv4_1, deconv4, upsample_flow5to4], axis=3) predict_flow4 = slim.conv2d(pad(concat4), 2, 3, scope="predict_flow4", activation_fn=None) deconv3 = antipad( slim.conv2d_transpose(concat4, 128, 4, stride=2, scope="deconv3")) upsample_flow4to3 = antipad( slim.conv2d_transpose(predict_flow4, 2, 4, stride=2, scope="upsample_flow4to3", activation_fn=None)) concat3 = tf.concat( [conv3_1, deconv3, upsample_flow4to3], axis=3) predict_flow3 = slim.conv2d(pad(concat3), 2, 3, scope="predict_flow3", activation_fn=None) deconv2 = antipad( slim.conv2d_transpose(concat3, 64, 4, stride=2, scope="deconv2")) upsample_flow3to2 = antipad( slim.conv2d_transpose(predict_flow3, 2, 4, stride=2, scope="upsample_flow3to2", activation_fn=None)) concat2 = tf.concat( [conv_a_2, deconv2, upsample_flow3to2], axis=3) predict_flow2 = slim.conv2d(pad(concat2), 2, 3, scope="predict_flow2", activation_fn=None) # END: Refinement Network. flow = predict_flow2 * 20.0 flow = tf.image.resize_bilinear(flow, tf.stack([height, width]), align_corners=True) return { "predict_flow6": predict_flow6, "predict_flow5": predict_flow5, "predict_flow4": predict_flow4, "predict_flow3": predict_flow3, "predict_flow2": predict_flow2, "flow": flow, }
def run(self, inputs, trainable=True): """Runs model.""" _, height, width, _ = inputs["input_a"].shape.as_list() with tf.variable_scope("FlowNet2"): # Forward pass through FlowNetCSS and FlowNetSD with weights frozen. net_css_predictions = self.net_css.run(inputs, trainable=False) net_sd_predictions = self.net_sd.run(inputs, trainable=False) sd_flow_norm = channel_norm(net_sd_predictions["flow"]) css_flow_norm = channel_norm(net_css_predictions["flow"]) flow_warp_sd = flow_warp(inputs["input_b"], net_sd_predictions["flow"]) img_diff_sd = inputs["input_a"] - flow_warp_sd img_diff_sd_norm = channel_norm(img_diff_sd) flow_warp_css = flow_warp(inputs["input_b"], net_css_predictions["flow"]) img_diff_css = inputs["input_a"] - flow_warp_css img_diff_css_norm = channel_norm(img_diff_css) input_to_fusion = tf.concat([ inputs["input_a"], net_sd_predictions["flow"], net_css_predictions["flow"], sd_flow_norm, css_flow_norm, img_diff_sd_norm, img_diff_css_norm ], axis=3) # Fusion Network. with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], # Only backprop this network if trainable. trainable=trainable, # He (aka MSRA) weight initialization. weights_initializer=slim.variance_scaling_initializer(), activation_fn=leaky_relu, # We will do our own padding to match the original Caffe code. padding="VALID"): weights_regularizer = slim.l2_regularizer(WEIGHT_DECAY) with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): fuse_conv0 = slim.conv2d(pad(input_to_fusion), 64, 3, scope="fuse_conv0") fuse_conv1 = slim.conv2d(pad(fuse_conv0), 64, 3, stride=2, scope="fuse_conv1") fuse_conv1_1 = slim.conv2d(pad(fuse_conv1), 128, 3, scope="fuse_conv1_1") fuse_conv2 = slim.conv2d(pad(fuse_conv1_1), 128, 3, stride=2, scope="fuse_conv2") fuse_conv2_1 = slim.conv2d(pad(fuse_conv2), 128, 3, scope="fuse_conv2_1") predict_flow2 = slim.conv2d(pad(fuse_conv2_1), 2, 3, scope="predict_flow2", activation_fn=None) fuse_deconv1 = antipad( slim.conv2d_transpose(fuse_conv2_1, 32, 4, stride=2, scope="fuse_deconv1")) fuse_upsample_flow2to1 = antipad( slim.conv2d_transpose(predict_flow2, 2, 4, stride=2, scope="fuse_upsample_flow2to1", activation_fn=None)) concat1 = tf.concat( [fuse_conv1_1, fuse_deconv1, fuse_upsample_flow2to1], axis=3) fuse_interconv1 = slim.conv2d(pad(concat1), 32, 3, activation_fn=None, scope="fuse_interconv1") predict_flow1 = slim.conv2d(pad(fuse_interconv1), 2, 3, scope="predict_flow1", activation_fn=None) fuse_deconv0 = antipad( slim.conv2d_transpose(concat1, 16, 4, stride=2, scope="fuse_deconv0")) fuse_upsample_flow1to0 = antipad( slim.conv2d_transpose(predict_flow1, 2, 4, stride=2, scope="fuse_upsample_flow1to0", activation_fn=None)) concat0 = tf.concat( [fuse_conv0, fuse_deconv0, fuse_upsample_flow1to0], axis=3) fuse_interconv0 = slim.conv2d(pad(concat0), 16, 3, activation_fn=None, scope="fuse_interconv0") predict_flow0 = slim.conv2d(pad(fuse_interconv0), 2, 3, activation_fn=None, scope="predict_flow0") flow = tf.image.resize_bilinear(predict_flow0, tf.stack([height, width]), align_corners=True) return { "predict_flow0": predict_flow0, "flow": flow, }
def run(self, inputs, trainable=True): """Runs model.""" _, height, width, _ = inputs["input_a"].shape.as_list() with tf.variable_scope("FlowNetSD"): concat_inputs = tf.concat([inputs["input_a"], inputs["input_b"]], axis=3) with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], # Only backprop this network if trainable. trainable=trainable, # He (aka MSRA) weight initialization. weights_initializer=slim.variance_scaling_initializer(), activation_fn=leaky_relu, # We will do our own padding to match the original Caffe code. padding="VALID"): weights_regularizer = slim.l2_regularizer(WEIGHT_DECAY) with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): conv0 = slim.conv2d(pad(concat_inputs), 64, 3, scope="conv0") conv1 = slim.conv2d(pad(conv0), 64, 3, stride=2, scope="conv1") conv1_1 = slim.conv2d(pad(conv1), 128, 3, scope="conv1_1") conv2 = slim.conv2d(pad(conv1_1), 128, 3, stride=2, scope="conv2") conv2_1 = slim.conv2d(pad(conv2), 128, 3, scope="conv2_1") conv3 = slim.conv2d(pad(conv2_1), 256, 3, stride=2, scope="conv3") conv3_1 = slim.conv2d(pad(conv3), 256, 3, scope="conv3_1") conv4 = slim.conv2d(pad(conv3_1), 512, 3, stride=2, scope="conv4") conv4_1 = slim.conv2d(pad(conv4), 512, 3, scope="conv4_1") conv5 = slim.conv2d(pad(conv4_1), 512, 3, stride=2, scope="conv5") conv5_1 = slim.conv2d(pad(conv5), 512, 3, scope="conv5_1") conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope="conv6") conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope="conv6_1") # START: Refinement Network. with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None): predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3, scope="predict_flow6", activation_fn=None) deconv5 = antipad( slim.conv2d_transpose(conv6_1, 512, 4, stride=2, scope="deconv5")) upsample_flow6to5 = antipad( slim.conv2d_transpose(predict_flow6, 2, 4, stride=2, scope="upsample_flow6to5", activation_fn=None)) concat5 = tf.concat( [conv5_1, deconv5, upsample_flow6to5], axis=3) interconv5 = slim.conv2d(pad(concat5), 512, 3, activation_fn=None, scope="interconv5") predict_flow5 = slim.conv2d(pad(interconv5), 2, 3, scope="predict_flow5", activation_fn=None) deconv4 = antipad( slim.conv2d_transpose(concat5, 256, 4, stride=2, scope="deconv4")) upsample_flow5to4 = antipad( slim.conv2d_transpose(predict_flow5, 2, 4, stride=2, scope="upsample_flow5to4", activation_fn=None)) concat4 = tf.concat( [conv4_1, deconv4, upsample_flow5to4], axis=3) interconv4 = slim.conv2d(pad(concat4), 256, 3, activation_fn=None, scope="interconv4") predict_flow4 = slim.conv2d(pad(interconv4), 2, 3, scope="predict_flow4", activation_fn=None) deconv3 = antipad( slim.conv2d_transpose(concat4, 128, 4, stride=2, scope="deconv3")) upsample_flow4to3 = antipad( slim.conv2d_transpose(predict_flow4, 2, 4, stride=2, scope="upsample_flow4to3", activation_fn=None)) concat3 = tf.concat( [conv3_1, deconv3, upsample_flow4to3], axis=3) interconv3 = slim.conv2d(pad(concat3), 128, 3, activation_fn=None, scope="interconv3") predict_flow3 = slim.conv2d(pad(interconv3), 2, 3, scope="predict_flow3", activation_fn=None) deconv2 = antipad( slim.conv2d_transpose(concat3, 64, 4, stride=2, scope="deconv2")) upsample_flow3to2 = antipad( slim.conv2d_transpose(predict_flow3, 2, 4, stride=2, scope="upsample_flow3to2", activation_fn=None)) concat2 = tf.concat( [conv2, deconv2, upsample_flow3to2], axis=3) interconv2 = slim.conv2d(pad(concat2), 64, 3, activation_fn=None, scope="interconv2") predict_flow2 = slim.conv2d(pad(interconv2), 2, 3, scope="predict_flow2", activation_fn=None) # END: Refinement Network. flow = predict_flow2 * 0.05 flow = tf.image.resize_bilinear(flow, tf.stack([height, width]), align_corners=True) return { "predict_flow6": predict_flow6, "predict_flow5": predict_flow5, "predict_flow4": predict_flow4, "predict_flow3": predict_flow3, "predict_flow2": predict_flow2, "flow": flow, }