def flownet2_fusion(self, x): """ Architecture in Table 4 of FlowNet 2.0. Args: x: NCHW tensor, where C=11 is the concatenation of 7 items of [3, 2, 2, 1, 1, 1, 1] channels. """ with argscope([tf.layers.conv2d], activation=lambda x: tf.nn.leaky_relu(x, 0.1), padding='valid', strides=2, kernel_size=3, data_format='channels_first'), \ argscope([tf.layers.conv2d_transpose], padding='same', activation=tf.identity, data_format='channels_first', strides=2, kernel_size=4): conv0 = tf.layers.conv2d(pad(x, 1), 64, name='conv0', strides=1) x = tf.layers.conv2d(pad(conv0, 1), 64, name='conv1') conv1 = tf.layers.conv2d(pad(x, 1), 128, name='conv1_1', strides=1) x = tf.layers.conv2d(pad(conv1, 1), 128, name='conv2') conv2 = tf.layers.conv2d(pad(x, 1), 128, name='conv2_1', strides=1) flow2 = tf.layers.conv2d(pad(conv2, 1), 2, name='predict_flow2', strides=1, activation=tf.identity) flow2_up = tf.layers.conv2d_transpose(flow2, 2, name='upsampled_flow2_to_1') x = tf.layers.conv2d_transpose(conv2, 32, name='deconv1', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat1 = tf.concat([conv1, x, flow2_up], axis=1, name='concat1') interconv1 = tf.layers.conv2d(pad(concat1, 1), 32, strides=1, name='inter_conv1', activation=tf.identity) flow1 = tf.layers.conv2d(pad(interconv1, 1), 2, name='predict_flow1', strides=1, activation=tf.identity) flow1_up = tf.layers.conv2d_transpose(flow1, 2, name='upsampled_flow1_to_0') x = tf.layers.conv2d_transpose(concat1, 16, name='deconv0', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat0 = tf.concat([conv0, x, flow1_up], axis=1, name='concat0') interconv0 = tf.layers.conv2d(pad(concat0, 1), 16, strides=1, name='inter_conv0', activation=tf.identity) flow0 = tf.layers.conv2d(pad(interconv0, 1), 2, name='predict_flow0', strides=1, activation=tf.identity) return tf.identity(flow0, name='flow2')
def flownet2_sd(self, x): """ Architecture in Table 3 of FlowNet 2.0. Args: x: concatenation of two inputs, of shape [1, 2xC, H, W] """ with argscope([tf.layers.conv2d], activation=lambda x: tf.nn.leaky_relu(x, 0.1), padding='valid', strides=2, kernel_size=3, data_format='channels_first'), \ argscope([tf.layers.conv2d_transpose], padding='same', activation=tf.identity, data_format='channels_first', strides=2, kernel_size=4): x = tf.layers.conv2d(pad(x, 1), 64, name='conv0', strides=1) x = tf.layers.conv2d(pad(x, 1), 64, name='conv1') conv1 = tf.layers.conv2d(pad(x, 1), 128, name='conv1_1', strides=1) x = tf.layers.conv2d(pad(conv1, 1), 128, name='conv2') conv2 = tf.layers.conv2d(pad(x, 1), 128, name='conv2_1', strides=1) x = tf.layers.conv2d(pad(conv2, 1), 256, name='conv3') conv3 = tf.layers.conv2d(pad(x, 1), 256, name='conv3_1', strides=1) x = tf.layers.conv2d(pad(conv3, 1), 512, name='conv4') conv4 = tf.layers.conv2d(pad(x, 1), 512, name='conv4_1', strides=1) x = tf.layers.conv2d(pad(conv4, 1), 512, name='conv5') conv5 = tf.layers.conv2d(pad(x, 1), 512, name='conv5_1', strides=1) x = tf.layers.conv2d(pad(conv5, 1), 1024, name='conv6') conv6 = tf.layers.conv2d(pad(x, 1), 1024, name='conv6_1', strides=1) flow6 = tf.layers.conv2d(pad(conv6, 1), 2, name='predict_flow6', strides=1, activation=tf.identity) flow6_up = tf.layers.conv2d_transpose(flow6, 2, name='upsampled_flow6_to_5') x = tf.layers.conv2d_transpose(conv6, 512, name='deconv5', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat5 = tf.concat([conv5, x, flow6_up], axis=1, name='concat5') interconv5 = tf.layers.conv2d(pad(concat5, 1), 512, strides=1, name='inter_conv5', activation=tf.identity) flow5 = tf.layers.conv2d(pad(interconv5, 1), 2, name='predict_flow5', strides=1, activation=tf.identity) flow5_up = tf.layers.conv2d_transpose(flow5, 2, name='upsampled_flow5_to_4') x = tf.layers.conv2d_transpose(concat5, 256, name='deconv4', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat4 = tf.concat([conv4, x, flow5_up], axis=1, name='concat4') interconv4 = tf.layers.conv2d(pad(concat4, 1), 256, strides=1, name='inter_conv4', activation=tf.identity) flow4 = tf.layers.conv2d(pad(interconv4, 1), 2, name='predict_flow4', strides=1, activation=tf.identity) flow4_up = tf.layers.conv2d_transpose(flow4, 2, name='upsampled_flow4_to_3') x = tf.layers.conv2d_transpose(concat4, 128, name='deconv3', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat3 = tf.concat([conv3, x, flow4_up], axis=1, name='concat3') interconv3 = tf.layers.conv2d(pad(concat3, 1), 128, strides=1, name='inter_conv3', activation=tf.identity) flow3 = tf.layers.conv2d(pad(interconv3, 1), 2, name='predict_flow3', strides=1, activation=tf.identity) flow3_up = tf.layers.conv2d_transpose(flow3, 2, name='upsampled_flow3_to_2') x = tf.layers.conv2d_transpose(concat3, 64, name='deconv2', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat2 = tf.concat([conv2, x, flow3_up], axis=1, name='concat2') interconv2 = tf.layers.conv2d(pad(concat2, 1), 64, strides=1, name='inter_conv2', activation=tf.identity) flow2 = tf.layers.conv2d(pad(interconv2, 1), 2, name='predict_flow2', strides=1, activation=tf.identity) return resize(flow2 / DISP_SCALE, mode='nearest')
def graph_structure(self, x, standalone=True): """ Architecture of FlowNetSimple in Figure 2 of FlowNet 1.0. Args: x: 2CHW if standalone==True, else NCHW where C=12 is a concatenation of 5 tensors of [3, 3, 3, 2, 1] channels. standalone: If True, this model is used to predict flow from two inputs. If False, this model is used as part of the FlowNet2. """ if standalone: x = tf.concat(tf.split(x, 2, axis=0), axis=1) with argscope([tf.layers.conv2d], activation=lambda x: tf.nn.leaky_relu(x, 0.1), padding='valid', strides=2, kernel_size=3, data_format='channels_first'), \ argscope([tf.layers.conv2d_transpose], padding='same', activation=tf.identity, data_format='channels_first', strides=2, kernel_size=4): x = tf.layers.conv2d(pad(x, 3), 64, kernel_size=7, name='conv1') conv2 = tf.layers.conv2d(pad(x, 2), 128, kernel_size=5, name='conv2') x = tf.layers.conv2d(pad(conv2, 2), 256, kernel_size=5, name='conv3') conv3 = tf.layers.conv2d(pad(x, 1), 256, name='conv3_1', strides=1) x = tf.layers.conv2d(pad(conv3, 1), 512, name='conv4') conv4 = tf.layers.conv2d(pad(x, 1), 512, name='conv4_1', strides=1) x = tf.layers.conv2d(pad(conv4, 1), 512, name='conv5') conv5 = tf.layers.conv2d(pad(x, 1), 512, name='conv5_1', strides=1) x = tf.layers.conv2d(pad(conv5, 1), 1024, name='conv6') conv6 = tf.layers.conv2d(pad(x, 1), 1024, name='conv6_1', strides=1) flow6 = tf.layers.conv2d(pad(conv6, 1), 2, name='predict_flow6', strides=1, activation=tf.identity) flow6_up = tf.layers.conv2d_transpose(flow6, 2, name='upsampled_flow6_to_5', use_bias=False) x = tf.layers.conv2d_transpose(conv6, 512, name='deconv5', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat5 = tf.concat([conv5, x, flow6_up], axis=1, name='concat5') flow5 = tf.layers.conv2d(pad(concat5, 1), 2, name='predict_flow5', strides=1, activation=tf.identity) flow5_up = tf.layers.conv2d_transpose(flow5, 2, name='upsampled_flow5_to_4', use_bias=False) x = tf.layers.conv2d_transpose(concat5, 256, name='deconv4', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat4 = tf.concat([conv4, x, flow5_up], axis=1, name='concat4') flow4 = tf.layers.conv2d(pad(concat4, 1), 2, name='predict_flow4', strides=1, activation=tf.identity) flow4_up = tf.layers.conv2d_transpose(flow4, 2, name='upsampled_flow4_to_3', use_bias=False) x = tf.layers.conv2d_transpose(concat4, 128, name='deconv3', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat3 = tf.concat([conv3, x, flow4_up], axis=1, name='concat3') flow3 = tf.layers.conv2d(pad(concat3, 1), 2, name='predict_flow3', strides=1, activation=tf.identity) flow3_up = tf.layers.conv2d_transpose(flow3, 2, name='upsampled_flow3_to_2', use_bias=False) x = tf.layers.conv2d_transpose(concat3, 64, name='deconv2', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat2 = tf.concat([conv2, x, flow3_up], axis=1, name='concat2') flow2 = tf.layers.conv2d(pad(concat2, 1), 2, name='predict_flow2', strides=1, activation=tf.identity) return tf.identity(flow2, name='flow2')
def __init__(self, input, model, d_period=1, g_period=1): """ Args: d_period(int): period of each d_opt run g_period(int): period of each g_opt run """ super(SeparateGANTrainer, self).__init__() self._d_period = int(d_period) self._g_period = int(g_period) assert min(d_period, g_period) == 1 # Setup input cbs = input.setup(model.get_inputs_desc()) self.register_callback(cbs) # Build the graph self.tower_func = TowerFuncWrapper(model.build_graph, model.get_inputs_desc()) with TowerContext('', is_training=True), \ argscope(BatchNorm, internal_update=True): # should not hook the updates to both train_op, it will hurt training speed. self.tower_func(*input.get_input_tensors()) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if len(update_ops): logger.warn("Found {} ops in UPDATE_OPS collection!".format(len(update_ops))) logger.warn("Using SeparateGANTrainer with UPDATE_OPS may hurt your training speed a lot!") opt = model.get_optimizer() with tf.name_scope('optimize'): self.d_min = opt.minimize( model.d_loss, var_list=model.d_vars, name='d_min') self.g_min = opt.minimize( model.g_loss, var_list=model.g_vars, name='g_min')
def _get_NN_prediction(self, state): assert state.shape.rank == 5 # Batch, H, W, Channel, History state = tf.transpose( state, [0, 1, 2, 4, 3 ]) # swap channel & history, to be compatible with old models image = tf.reshape(state, [-1] + list(self.state_shape[:2]) + [self.state_shape[2] * self.frame_history]) image = tf.cast(image, tf.float32) / 255.0 with argscope(Conv2D, activation=tf.nn.relu): l = Conv2D('conv0', image, 32, 5) l = MaxPooling('pool0', l, 2) l = Conv2D('conv1', l, 32, 5) l = MaxPooling('pool1', l, 2) l = Conv2D('conv2', l, 64, 4) l = MaxPooling('pool2', l, 2) l = Conv2D('conv3', l, 64, 3) l = FullyConnected('fc0', l, 512) l = PReLU('prelu', l) logits = FullyConnected('fc-pi', l, self.num_actions) # unnormalized policy value = FullyConnected('fc-v', l, 1) return logits, value
def get_logits(self, image, nfeatures): """From tensorpack_examples/imagenet-resnet.py""" with tp.argscope([tp.Conv2D, tp.MaxPooling, tp.GlobalAvgPooling, tp.BatchNorm], data_format="NHWC"): return resnet_backbone( image, self.num_blocks, preresnet_group if self.mode == 'preact' else resnet_group, self.block_func, nfeatures)
def _get_DQN_prediction(self, image): """ image: [0,255] :returns predicted Q values""" # FIXME norm not needed # normalize image values to [0, 1] image = image / 255.0 with argscope(Conv3D, nl=PReLU.symbolic_function, use_bias=True): # core layers of the network conv = ( LinearWrap(image) # TODO: use obsrvation dimensions? .Conv3D('conv0', out_channel=32, kernel_shape=[5, 5, 5], stride=[1, 1, 1]).MaxPooling3D('pool0', 2).Conv3D( 'conv1', out_channel=32, kernel_shape=[5, 5, 5], stride=[1, 1, 1]).MaxPooling3D('pool1', 2).Conv3D( 'conv2', out_channel=64, kernel_shape=[4, 4, 4], stride=[1, 1, 1]).MaxPooling3D( 'pool2', 2).Conv3D('conv3', out_channel=64, kernel_shape=[3, 3, 3], stride=[1, 1, 1]) # .MaxPooling3D('pool3',2) ) if 'Dueling' not in self.method: lq = (conv.FullyConnected( 'fc0', 512).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc1', 256).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc2', 128).tf.nn.leaky_relu(alpha=0.01)()) Q = FullyConnected('fct', lq, self.num_actions, nl=tf.identity) else: # Dueling DQN or Double Dueling # state value function lv = (conv.FullyConnected( 'fc0V', 512).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc1V', 256).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc2V', 128).tf.nn.leaky_relu(alpha=0.01)()) V = FullyConnected('fctV', lv, 1, nl=tf.identity) # advantage value function la = (conv.FullyConnected( 'fc0A', 512).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc1A', 256).tf.nn.leaky_relu(alpha=0.01).FullyConnected( 'fc2A', 128).tf.nn.leaky_relu(alpha=0.01)()) As = FullyConnected('fctA', la, self.num_actions, nl=tf.identity) Q = tf.add(As, V - tf.reduce_mean(As, 1, keepdims=True)) return tf.identity(Q, name='Qvalue')
def build_graph(self, image: Any, label: Any) -> Any: """ This function builds the model which takes the input variables and returns cost. """ # In tensorflow, inputs to convolution function are assumed to be NHWC. # Add a single channel here. image = tf.expand_dims(image, 3) # Center the pixels values at zero. image = image * 2 - 1 # The context manager `argscope` sets the default option for all the layers under # this context. Here we use 32 channel convolution with shape 3x3. with tp.argscope(tp.Conv2D, kernel_size=3, activation=tf.nn.relu, filters=self.hparams["n_filters"]): logits = (tp.LinearWrap(image).Conv2D("conv0").MaxPooling( "pool0", 2).Conv2D("conv1").MaxPooling("pool1", 2).FullyConnected( "fc0", 512, activation=tf.nn.relu).Dropout( "dropout", rate=0.5).FullyConnected("fc1", 10, activation=tf.identity)()) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean( cost, name="cross_entropy_loss") # the average cross-entropy loss correct = tf.cast(tf.nn.in_top_k(predictions=logits, targets=label, k=1), tf.float32, name="correct") accuracy = tf.reduce_mean(correct, name="accuracy") train_error = tf.reduce_mean(1 - correct, name="train_error") tp.summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers. wd_cost = tf.multiply( self.hparams["weight_cost"], tp.regularize_cost("fc.*/W", tf.nn.l2_loss), name="regularize_loss", ) total_cost = tf.add_n([wd_cost, cost], name="loss") return total_cost
def encode(self, x): with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE): with argscope(Conv2D, activation=tf.nn.relu): h = Conv2D('conv3x3_1', x, 32, 3, strides=(2, 2), padding='valid') h = Conv2D('conv3x3_2', h, 64, 3, strides=(2, 2), padding='valid') h = tf.layers.Flatten()(h) h = FullyConnected('fc', h, 2 * self._latent_dim) mean, logvar = tf.split(h, num_or_size_splits=2, axis=1) return mean, logvar
def decode(self, z, apply_sigmoid=False): pre_convT_shape = [ -1, int(self._image_shape[0] / 4), int(self._image_shape[1] / 4), 32 ] pre_convT_unit = pre_convT_shape[1] * \ pre_convT_shape[2] * pre_convT_shape[3] with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE): with argscope([Conv2D, FullyConnected], activation=tf.nn.relu): h = FullyConnected('fc', z, pre_convT_unit) h = tf.reshape(h, pre_convT_shape) h = Conv2DTranspose('convT3x3_1', h, 64, 3, strides=(2, 2)) h = Conv2DTranspose('convT3x3_2', h, 32, 3, strides=(2, 2)) h = Conv2DTranspose('convT1x1_1', h, self._image_shape[2], 3, strides=(1, 1)) if apply_sigmoid: h = tf.sigmoid(h) return h
def build_graph(self, x, image_target): with tf.name_scope("preprocess"): image_target = image_target / 255. def viz(name, images): with tf.name_scope(name): im = tf.concat(images, axis=2) #im = tf.transpose(im, [0, 2, 3, 1]) if self._act_input == tf.tanh: im = (im + 1.0) * 127.5 else: im = im * 255 im = tf.clip_by_value(im, 0, 255) im = tf.round(im) im = tf.cast(im, tf.uint8, name="viz") return im # calculate gram_target _, gram_target = self._build_extractor(image_target, name="ext_target") # inference pre_image_output from pre_image_input and gram_target self.image_outputs = list() self.loss_per_stage = list() x_output = x with tf.variable_scope("syn"): # use data stats in both train and test phases with argscope(BatchNorm, training=True): for s in range(self._n_stage): # get the first (s+1) coefs coefs = OrderedDict() for k in list(SynTexModelDesc.DEFAULT_COEFS.keys())[:s + 1]: coefs[k] = SynTexModelDesc.DEFAULT_COEFS[k] x_image, loss_input, _, x_output = \ self.build_stage(x_output, gram_target, coefs, name="stage%d" % s) self.image_outputs.append(x_image) self.loss_per_stage.append( tf.reduce_mean(loss_input, name="loss%d" % s)) self.collect_variables("syn") # image_output = self._act_input(x_output, name="output") loss_output, loss_per_layer_output, _ = \ self._build_loss(image_output, gram_target, calc_grad=False) self.image_outputs.append(image_output) self.loss_per_stage.append( tf.reduce_mean(loss_output, name="loss_output")) self.loss_per_layer_output = OrderedDict() with tf.name_scope("loss_per_layer_output"): for layer in loss_per_layer_output: self.loss_per_layer_output[layer] = tf.reduce_mean( loss_per_layer_output[layer], name=layer) # average losses from all stages weights = [1.] for _ in range(len(self.loss_per_stage) - 1): weights.append(weights[-1] * self._loss_scale) # skip the first loss as it is computed from noise self.loss = tf.add_n([weights[i] * loss \ for i, loss in enumerate(reversed(self.loss_per_stage[1:]))], name="loss") # summary #with tf.device("/cpu:0"): stages_target = viz("stages-target", self.image_outputs + [image_target]) ctx = get_current_tower_context() if ctx is not None and ctx.is_main_training_tower: tf.summary.image("stages-target", stages_target, max_outputs=10, collections=["image_summaries"]) add_moving_summary(self.loss, *self.loss_per_stage, *self.loss_per_layer_output.values())
def build_stage(self, x, gram_target, coefs, name="stage"): acti = ActFactory(self._norm_type, self._alpha) norm = ActFactory(self._norm_type, None) nonlinear = ActFactory("none", self._alpha) upsample = upsampling_deconv if self._deconv else upsampling_nnconv first = True gain = 1 / np.sqrt(2) with tf.variable_scope(name, reuse=tf.AUTO_REUSE): # extract features and gradients x_image = self._act_input(x, name="input_" + name) feat, loss_input, loss_per_layer, grad_per_layer = \ self.build_stage_preparation(x_image, gram_target, coefs) # none + # grad[4] conv[4] -> res[4] -> up[4] + # grad[3] conv[3] -> res[3] -> up[3] + # grad[2] conv[2] -> res[2] -> up[2] + # ... ... # up[1] + # grad[0] conv[0] -> res[0] -> output with argscope([Conv2D, Conv2DTranspose], activation=acti, use_bias=False): for layer in reversed(feat): if layer in grad_per_layer: grad = grad_per_layer[layer] chan = grad.get_shape().as_list()[-1] with tf.variable_scope(layer): # compute pseudo grad of current layer grad = pad_conv2d("grad_conv", grad, chan, self._grad_ksize, self._pad_type, activation=tf.identity) # merge with grad from deeper layers if first: delta = tf.identity(grad, name="grad_merged") first = False else: # change chan of delta delta = pad_conv2d("conv_chan", delta, chan, 3, self._pad_type, activation=tf.identity) delta = tf.add(grad, delta, name="grad_merged") * gain # upsample if layer != "conv1_1": # this norm is needed because conv or conv_transposed is applied in upsample delta = norm(delta, "norm_before_up") delta = upsample("up", delta, self._pad_type, chan=chan) # no activated if not self._pre_act: delta = norm(delta, "norm_merged") #------------------- # add relu gate here if self._gate: gate = get_relu_gate( "gate", feat[StylePO.GATE_SOURCE[layer]], 0.) assert gate.get_shape().as_list( ) == delta.get_shape().as_list() delta = delta * gate #------------------- # simulate the backpropagation to next level if self._same_block: n_block = self._n_block else: n_block = self._n_block * StylePO.N_BLOCK_BASE[ layer] for k in range(n_block): delta = res_block("res{}".format(k), delta, chan, self._pad_type, self._norm_type, self._alpha, self._bottleneck, self._pre_act) if self._pre_act: delta = acti(delta, "acti_output") else: delta = nonlinear(delta, "acti_output") # output delta_x = pad_conv2d("conv_last", delta, 3, 1, self._pad_type, activation=tf.identity, use_bias=True) if self._stop_grad: x = tf.add(tf.stop_gradient(x), delta_x, name="output") else: x = tf.add(x, delta_x, name="output") return x_image, loss_input, loss_per_layer, x
def graph_structure(self, x1x2): """ Architecture of FlowNetCorr in Figure 2 of FlowNet 1.0. Args: x: 2CHW. """ with argscope([tf.layers.conv2d], activation=lambda x: tf.nn.leaky_relu(x, 0.1), padding='valid', strides=2, kernel_size=3, data_format='channels_first'), \ argscope([tf.layers.conv2d_transpose], padding='same', activation=tf.identity, data_format='channels_first', strides=2, kernel_size=4): # extract features x = tf.layers.conv2d(pad(x1x2, 3), 64, kernel_size=7, name='conv1') conv2 = tf.layers.conv2d(pad(x, 2), 128, kernel_size=5, name='conv2') conv3 = tf.layers.conv2d(pad(conv2, 2), 256, kernel_size=5, name='conv3') conv2a, _ = tf.split(conv2, 2, axis=0) conv3a, conv3b = tf.split(conv3, 2, axis=0) corr = correlation(conv3a, conv3b, kernel_size=1, max_displacement=20, stride_1=1, stride_2=2, pad=20, data_format='NCHW') corr = tf.nn.leaky_relu(corr, 0.1) conv_redir = tf.layers.conv2d(conv3a, 32, kernel_size=1, strides=1, name='conv_redir') x = tf.concat([conv_redir, corr], axis=1, name='concat_redir') in_conv3_1 = tf.concat([conv_redir, corr], axis=1, name='in_conv3_1') conv3_1 = tf.layers.conv2d(pad(in_conv3_1, 1), 256, name='conv3_1', strides=1) x = tf.layers.conv2d(pad(conv3_1, 1), 512, name='conv4') conv4 = tf.layers.conv2d(pad(x, 1), 512, name='conv4_1', strides=1) x = tf.layers.conv2d(pad(conv4, 1), 512, name='conv5') conv5 = tf.layers.conv2d(pad(x, 1), 512, name='conv5_1', strides=1) x = tf.layers.conv2d(pad(conv5, 1), 1024, name='conv6') conv6 = tf.layers.conv2d(pad(x, 1), 1024, name='conv6_1', strides=1) flow6 = tf.layers.conv2d(pad(conv6, 1), 2, name='predict_flow6', strides=1, activation=tf.identity) flow6_up = tf.layers.conv2d_transpose(flow6, 2, name='upsampled_flow6_to_5') x = tf.layers.conv2d_transpose( conv6, 512, name='deconv5', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) # return flow6 concat5 = tf.concat([conv5, x, flow6_up], axis=1, name='concat5') flow5 = tf.layers.conv2d(pad(concat5, 1), 2, name='predict_flow5', strides=1, activation=tf.identity) flow5_up = tf.layers.conv2d_transpose(flow5, 2, name='upsampled_flow5_to_4') x = tf.layers.conv2d_transpose( concat5, 256, name='deconv4', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat4 = tf.concat([conv4, x, flow5_up], axis=1, name='concat4') flow4 = tf.layers.conv2d(pad(concat4, 1), 2, name='predict_flow4', strides=1, activation=tf.identity) flow4_up = tf.layers.conv2d_transpose(flow4, 2, name='upsampled_flow4_to_3') x = tf.layers.conv2d_transpose( concat4, 128, name='deconv3', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat3 = tf.concat([conv3_1, x, flow4_up], axis=1, name='concat3') flow3 = tf.layers.conv2d(pad(concat3, 1), 2, name='predict_flow3', strides=1, activation=tf.identity) flow3_up = tf.layers.conv2d_transpose(flow3, 2, name='upsampled_flow3_to_2') x = tf.layers.conv2d_transpose( concat3, 64, name='deconv2', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat2 = tf.concat([conv2a, x, flow3_up], axis=1, name='concat2') flow2 = tf.layers.conv2d(pad(concat2, 1), 2, name='predict_flow2', strides=1, activation=tf.identity) return tf.identity(flow2, name='flow2')
def build_graph(self, pre_image_input, image_target): """ Parameters ---------- pre_image_input : tf.Tensor The value are considered as the linear value before activation. The activation function is defined by self._act . image_target : tf.Tensor The value are considered as the actual pixel value in [0, 255] """ with tf.name_scope("preprocess"): image_target = image_target / 255. def viz(name, images): with tf.name_scope(name): im = tf.concat(images, axis=2) #im = tf.transpose(im, [0, 2, 3, 1]) if self._act == tf.tanh: im = (im + 1.0) * 127.5 else: im = im * 255 im = tf.clip_by_value(im, 0, 255) im = tf.round(im) im = tf.cast(im, tf.uint8, name="viz") tf.summary.image(name, im, max_outputs=10, collections=["image_summaries"]) # calculate gram_target _, gram_target = self._build_extractor(image_target, name="ext_target") # inference pre_image_output from pre_image_input and gram_target self.image_outputs = list() self.losses = list() pre_image_output = pre_image_input with tf.variable_scope("syn"): # TODO Due to the mistake of design, the batch size is always 1. # Thus, batchnorm has no difference with instancenorm. We need # to set training=True for all phases. # NOTE fixed the problem of unchangable batch size. Add sync option. # NOTE (2020-03-02) The old models use batchnorm the names are not found if # we simply set norm-type=instance. We disable batchnorm temporarily. with argscope(BatchNorm, training=True): #with argscope(BatchNorm, sync_statistics=self._sync_stats): for s in range(self._n_stage): image_input, loss_overall_input, _, pre_image_output = \ self.build_stage(pre_image_output, gram_target, s+1, name="stage%d" % s) self.image_outputs.append(image_input) self.losses.append(tf.reduce_mean(loss_overall_input, name="loss%d" % s)) self.collect_variables("syn") # image_output = self._act(pre_image_output, name="output") loss_overall_output, loss_layer_output, _ = \ self._build_loss(image_output, gram_target, calc_grad=False) self.image_outputs.append(image_output) self.losses.append(tf.reduce_mean(loss_overall_output, name="loss_output")) self.loss_layer_output = loss_layer_output self.loss_layer_output = OrderedDict() with tf.name_scope("loss_layer_output"): for layer in loss_layer_output: self.loss_layer_output[layer] = tf.reduce_mean(loss_layer_output[layer], name=layer) # average losses from all stages weights = [1.] for _ in range(len(self.losses) - 1): weights.append(weights[-1] * self._loss_scale) # skip the first loss as it is computed from noise self.loss = tf.add_n([weights[i] * loss \ for i, loss in enumerate(reversed(self.losses[1:]))], name="loss") # summary with tf.device("/cpu:0"): viz("stages-target", self.image_outputs + [image_target]) add_moving_summary(self.loss, *self.losses, *self.loss_layer_output.values())
def build_stage(self, pre_image_input, gram_target, name="stage"): res_block = SingleSynTex.build_pre_res_block if self._pre_act \ else SingleSynTex.build_res_block upsample = SingleSynTex.build_upsampling_nn if self._nn_upsample \ else SingleSynTex.build_upsampling_deconv with tf.variable_scope(name, reuse=tf.AUTO_REUSE): # extract features and gradients image_input = self._act(pre_image_input, name="input_" + name) feat_input, _ = self._build_extractor(image_input, calc_gram=False) loss_overall_input, loss_layer_input, _ = \ build_texture_loss(feat_input, gram_target, SynTexModelDesc.DEFAULT_COEFS, calc_grad=False, name="grad") # For a single texture synthesizer, we don't provide gradients to the # synthesizer. That information is implicitly provided by final loss. # none + # f[4] -> conv[4] -> res[4] -> up[4] + # f[3] -> conv[3] -> res[3] -> up[3] + # f[2] -> conv[2] -> res[2] -> up[2] + # ... ... # up[1] + # f[0] -> conv[0] -> res[0] -> output with argscope([Conv2D, Conv2DTranspose], activation=INReLU, use_bias=False): first = True for layer in reversed(feat_input): feat = feat_input[layer] chan = feat.get_shape().as_list()[-1] with tf.variable_scope(layer): # compute pseudo grad of current layer grad = Conv2D("grad_conv1", feat, chan, 3) grad = Conv2D("grad_conv2", grad, chan, 3, activation=tf.identity) # merge with grad from deeper layers if first: delta = tf.identity(grad, name="grad_merged") first = False else: # upsample deeper grad if self._pre_act: delta = INReLU(delta, "pre_inrelu") else: delta = tf.nn.relu(delta, "pre_relu") delta = upsample(delta, "up", chan=chan) # add two grads delta = tf.add(grad, delta, name="grad_merged") if not self._pre_act: delta = InstanceNorm("post_inorm", delta) # simulate the backpropagation procedure to next level for k in range(self._n_block): delta = res_block(delta, "res{}".format(k), chan, first=(k == 0)) # output if self._pre_act: delta = INReLU(delta, "actlast") else: delta = tf.nn.relu(delta, "actlast") delta = tf.pad(delta, [[0, 0], [1, 1], [1, 1], [0, 0]], mode="SYMMETRIC") delta_input = Conv2D("convlast", delta, 3, 3, padding="VALID", activation=tf.identity, use_bias=True) pre_image_output = tf.add(pre_image_input, delta_input, name="pre_image_output") return image_input, loss_overall_input, loss_layer_input, pre_image_output
def graph_structure(self, x1x2): """ Architecture of FlowNetCorr in Figure 2 of FlowNet 1.0. Args: x: 2CHW. """ with argscope([tf.layers.conv2d], activation=lambda x: tf.nn.leaky_relu(x, 0.1), padding='valid', strides=2, kernel_size=3, data_format='channels_first'), \ argscope([tf.layers.conv2d_transpose], padding='same', activation=tf.identity, data_format='channels_first', strides=2, kernel_size=4): # extract features x = tf.layers.conv2d(pad(x1x2, 3), 64, kernel_size=7, name='conv1') conv2 = tf.layers.conv2d(pad(x, 2), 128, kernel_size=5, name='conv2') conv3 = tf.layers.conv2d(pad(conv2, 2), 256, kernel_size=5, name='conv3') conv2a, _ = tf.split(conv2, 2, axis=0) conv3a, conv3b = tf.split(conv3, 2, axis=0) corr = correlation(conv3a, conv3b, kernel_size=1, max_displacement=20, stride_1=1, stride_2=2, pad=20, data_format='NCHW') corr = tf.nn.leaky_relu(corr, 0.1) conv_redir = tf.layers.conv2d(conv3a, 32, kernel_size=1, strides=1, name='conv_redir') x = tf.concat([conv_redir, corr], axis=1, name='concat_redir') in_conv3_1 = tf.concat([conv_redir, corr], axis=1, name='in_conv3_1') conv3_1 = tf.layers.conv2d(pad(in_conv3_1, 1), 256, name='conv3_1', strides=1) x = tf.layers.conv2d(pad(conv3_1, 1), 512, name='conv4') conv4 = tf.layers.conv2d(pad(x, 1), 512, name='conv4_1', strides=1) x = tf.layers.conv2d(pad(conv4, 1), 512, name='conv5') conv5 = tf.layers.conv2d(pad(x, 1), 512, name='conv5_1', strides=1) x = tf.layers.conv2d(pad(conv5, 1), 1024, name='conv6') conv6 = tf.layers.conv2d(pad(x, 1), 1024, name='conv6_1', strides=1) flow6 = tf.layers.conv2d(pad(conv6, 1), 2, name='predict_flow6', strides=1, activation=tf.identity) flow6_up = tf.layers.conv2d_transpose(flow6, 2, name='upsampled_flow6_to_5') x = tf.layers.conv2d_transpose(conv6, 512, name='deconv5', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) # return flow6 concat5 = tf.concat([conv5, x, flow6_up], axis=1, name='concat5') flow5 = tf.layers.conv2d(pad(concat5, 1), 2, name='predict_flow5', strides=1, activation=tf.identity) flow5_up = tf.layers.conv2d_transpose(flow5, 2, name='upsampled_flow5_to_4') x = tf.layers.conv2d_transpose(concat5, 256, name='deconv4', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat4 = tf.concat([conv4, x, flow5_up], axis=1, name='concat4') flow4 = tf.layers.conv2d(pad(concat4, 1), 2, name='predict_flow4', strides=1, activation=tf.identity) flow4_up = tf.layers.conv2d_transpose(flow4, 2, name='upsampled_flow4_to_3') x = tf.layers.conv2d_transpose(concat4, 128, name='deconv3', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat3 = tf.concat([conv3_1, x, flow4_up], axis=1, name='concat3') flow3 = tf.layers.conv2d(pad(concat3, 1), 2, name='predict_flow3', strides=1, activation=tf.identity) flow3_up = tf.layers.conv2d_transpose(flow3, 2, name='upsampled_flow3_to_2') x = tf.layers.conv2d_transpose(concat3, 64, name='deconv2', activation=lambda x: tf.nn.leaky_relu(x, 0.1)) concat2 = tf.concat([conv2a, x, flow3_up], axis=1, name='concat2') flow2 = tf.layers.conv2d(pad(concat2, 1), 2, name='predict_flow2', strides=1, activation=tf.identity) return tf.identity(flow2, name='flow2')
def build_stage(self, x, gram_target, coefs, gain=1 / np.sqrt(2), name="stage"): acti = ActFactory(self._norm_type, self._alpha) upsample = upsampling_deconv if self._deconv else upsampling_nnconv first = True with tf.variable_scope(name, reuse=tf.AUTO_REUSE): # extract features and gradients x_image = self._act_input(x, name="input_" + name) feat, loss_input, loss_per_layer, grad_per_layer = \ self.build_stage_preparation(x_image, gram_target, coefs) # none + # grad[4] conv[4] -> res[4] -> up[4] + # grad[3] conv[3] -> res[3] -> up[3] + # grad[2] conv[2] -> res[2] -> up[2] + # ... ... # up[1] + # grad[0] conv[0] -> res[0] -> output with argscope([Conv2D, Conv2DTranspose], activation=acti, use_bias=False): for layer in reversed(feat): if layer in grad_per_layer: with tf.variable_scope(layer): # compute pseudo grad of current layer grad = tf.identity(grad_per_layer[layer], name="input") add_activation_summary( grad, types=["rms", "histogram"], collections=["acti_summaries"]) chan = grad.get_shape().as_list()[-1] grad = pad_conv2d("grad_conv", grad, chan, self._grad_ksize, self._pad_type, activation=tf.identity) add_activation_summary( grad, types=["rms", "histogram"], collections=["acti_summaries"]) # merge with grad from deeper layers if first: delta = tf.identity(grad, name="grad_merged") first = False else: # change chan of delta delta = pad_conv2d("conv_chan", delta, chan, 3, self._pad_type, activation=tf.identity) #delta = tf.add(grad, delta, name="grad_merged") * gain delta = SphericalAdd("grad_merged", delta, grad, self._theta_mean, lrmul=self._theta_lrmul) # upsample if layer != "conv1_1": delta = upsample("up", delta, self._pad_type, chan=chan) # no activated #------------------- # add relu gate here if self._gate: gate = get_relu_gate( "gate", feat[Style2PO.GATE_SOURCE[layer]], 0.) assert gate.get_shape().as_list() == delta.get_shape().as_list(),\ "{} vs {}".format(gate.get_shape().as_list(), delta.get_shape().as_list()) delta = delta * gate #------------------- # simulate the backpropagation to next level if self._same_block: n_block = self._n_block else: n_block = self._n_block * Style2PO.N_BLOCK_BASE[ layer] for k in range(n_block): delta = res_block("res{}".format(k), delta, chan, self._pad_type, self._norm_type, self._alpha, self._bottleneck, self._pre_act) delta = acti(delta, "acti_output") # output delta_x = pad_conv2d("conv_last", delta, 3, 1, self._pad_type, demodulate=False, activation=tf.identity, use_bias=True) if self._stop_grad: x = tf.add(tf.stop_gradient(x), delta_x, name="output") else: x = tf.add(x, delta_x, name="output") return x_image, loss_input, loss_per_layer, x
def build_graph(self, image: Any, label: Any) -> Any: """ This function builds the model which takes the input variables and returns cost. """ # In tensorflow, inputs to convolution function are assumed to be NHWC. # Add a single channel here. image = tf.reshape(image, [-1, self.image_size, self.image_size, 1]) # Center the pixels values at zero. # tf.summary.image("input", (tf.expand_dims(og_image * 2 - 1, 3) + 1.0) * 128.0) image = image * 2 - 1 # The context manager `argscope` sets the default option for all the layers under # this context. Here we use 32 channel convolution with shape 3x3. with tensorpack.argscope( tensorpack.Conv2D, kernel_size=3, activation=tf.nn.relu, filters=self.hparams["n_filters"], ): c0 = tensorpack.Conv2D("conv0", image) p0 = tensorpack.MaxPooling("pool0", c0, 2) c1 = tensorpack.Conv2D("conv1", p0) c2 = tensorpack.Conv2D("conv2", c1) p1 = tensorpack.MaxPooling("pool1", c2, 2) c3 = tensorpack.Conv2D("conv3", p1) fc1 = tensorpack.FullyConnected("fc0", c3, 512, nl=tf.nn.relu) fc1 = tensorpack.Dropout("dropout", fc1, 0.5) logits = tensorpack.FullyConnected("fc1", fc1, out_dim=10, nl=tf.identity) # This line will cause Tensorflow to detect GPU usage. If session is not properly # configured it causes multi-GPU runs to crash. _preprocess_conv2d_input(image, "channels_first") label = tf.reshape(label, [-1]) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean( cost, name="cross_entropy_loss") # the average cross-entropy loss correct = tf.cast(tf.nn.in_top_k(predictions=logits, targets=label, k=1), tf.float32, name="correct") accuracy = tf.reduce_mean(correct, name="accuracy") train_error = tf.reduce_mean(1 - correct, name="train_error") tensorpack.summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers. wd_cost = tf.multiply( self.hparams["weight_cost"], tensorpack.regularize_cost("fc.*/W", tf.nn.l2_loss), name="regularize_loss", ) total_cost = tf.add_n([wd_cost, cost], name="total_cost") return total_cost