def iter_func(self, state): sc = predictron_arg_scope() with tf.variable_scope('value'): value_net = slim.fully_connected(slim.flatten(state), 32, scope='fc0') value_net = layers.batch_norm(value_net, activation_fn=tf.nn.relu, scope='fc0/preact') value_net = slim.fully_connected(value_net, self.maze_size, activation_fn=None, scope='fc1') with slim.arg_scope(sc): net = slim.conv2d(state, 32, [3, 3], scope='conv1') net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='conv1/preact') net_flatten = slim.flatten(net, scope='conv1/flatten') with tf.variable_scope('reward'): reward_net = slim.fully_connected(net_flatten, 32, scope='fc0') reward_net = layers.batch_norm(reward_net, activation_fn=tf.nn.relu, scope='fc0/preact') reward_net = slim.fully_connected(reward_net, self.maze_size, activation_fn=None, scope='fc1') with tf.variable_scope('gamma'): gamma_net = slim.fully_connected(net_flatten, 32, scope='fc0') gamma_net = layers.batch_norm(gamma_net, activation_fn=tf.nn.relu, scope='fc0/preact') gamma_net = slim.fully_connected(gamma_net, self.maze_size, activation_fn=tf.nn.sigmoid, scope='fc1') with tf.variable_scope('lambda'): lambda_net = slim.fully_connected(net_flatten, 32, scope='fc0') lambda_net = layers.batch_norm(lambda_net, activation_fn=tf.nn.relu, scope='fc0/preact') lambda_net = slim.fully_connected(lambda_net, self.maze_size, activation_fn=tf.nn.sigmoid, scope='fc1') net = slim.conv2d(net, 32, [3, 3], scope='conv2') net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='conv2/preact') net = slim.conv2d(net, 32, [3, 3], scope='conv3') net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='conv3/preact') return net, reward_net, gamma_net, lambda_net, value_net
def _build_network(self, name): with tf.variable_scope(name): # Weight initializer he_init = tf.contrib.layers.variance_scaling_initializer(factor=2.0, mode='FAN_AVG', uniform=False) # 'FAN_AVG'-mode-he-init -> works better than 'FAN' # The size of the final layer before splitting it into Advantage and Value streams. h_size = 500 # BNN : 베이지언 신경망(접근법)(=드롭아웃) : 학습 과정 중 네트워크의 활성 노드를 랜덤하게 0으로 설정함으로써, 일종의 정규화 역할을 수행하는 기법 # 드롭아웃으로 네트워크에서 하나의 샘플을 취하는 것은 BNN 에서 샘플링하는 것과 유사한 일이다. # 시간의 경과에 따라 드롭아웃 확률을 줄여준다. -> 추정값에서 노이즈를 줄여주기 위해 # RESULT : 확실히 눈에띄게 Learning Performance 가 상승함을 확인할 수있다. # Honestly speaking, I'm not sure just adding dropout is right. model = tf.layers.dense(inputs=self.input_X, units=250, activation=tf.nn.relu, kernel_initializer=he_init) model = tf.layers.dropout(model, rate=0.5) # E.g. "rate=0.1" would drop out 10% of input units. model = tf.layers.dense(model, units=250, activation=tf.nn.relu, kernel_initializer=he_init) model = tf.layers.dropout(model, rate=0.5) # E.g. "rate=0.1" would drop out 10% of input units. model = tf.layers.dense(model, units=250, activation=tf.nn.relu, kernel_initializer=he_init) model = tf.layers.dropout(model, rate=0.5) # E.g. "rate=0.1" would drop out 10% of input units. model = tf.layers.dense(model, units=250, activation=tf.nn.relu, kernel_initializer=he_init) model = tf.layers.dropout(model, rate=0.5) # E.g. "rate=0.1" would drop out 10% of input units. model = tf.layers.dense(model, units=250, activation=tf.nn.relu, kernel_initializer=he_init) model = tf.layers.dropout(model, rate=0.5) # E.g. "rate=0.1" would drop out 10% of input units. model = tf.layers.dense(model, units=250, activation=tf.nn.relu, kernel_initializer=he_init) model = tf.layers.dropout(model, rate=0.5) # E.g. "rate=0.1" would drop out 10% of input units. model = tf.layers.dense(model, units=250, activation=tf.nn.relu, kernel_initializer=he_init) model = tf.layers.dense(model, units=h_size, activation=tf.nn.relu, kernel_initializer=he_init) # NOTE "h_size" must be located at the end hidden_layer before split # This right above hidden layer is the end of DQN hidden layer. That's why there's no dropout. # From here, it's for "Duel DQN" -> Not output Q at once but split into A(advantage), V(value) and combine them to make Q # We take the output from the final convolutional layer and split it into separate advantage(A) and value streams(V). streamAC, streamVC = tf.split( model, num_or_size_splits=2, axis=1 ) # Flattened_Action & Flattened_Value -> Since I'm not using Conv, I don't need it. Just I leave it here. streamA = slim.flatten( streamAC ) streamV = slim.flatten( streamVC ) # Call the class to initialize weights, which improve training performance - ref.http://hwangpy.tistory.com/153 he_init = tf.contrib.layers.variance_scaling_initializer(factor=2.0, mode='FAN_AVG', uniform=False) # Action_Weight & Value_Weight AW = tf.Variable( he_init([h_size // 2, self.n_action]) ) # xavier_init( [row_size , column_size] ) VW = tf.Variable( he_init([h_size // 2, 1]) ) # Flattened_ones * Weights Advantage = tf.matmul(streamA, AW) Value = tf.matmul(streamV, VW) # Then combine them together to get our final Q-values. self.Qout = Value + tf.subtract(Advantage, tf.reduce_mean(Advantage, axis=1, keep_dims=True)) Q = self.Qout ### Double DQN from this line. # Take an action according to 'greedy-policy' : 1. Decide next_action using predictNN(=mainNN) predict = tf.argmax( self.Qout, axis=1 ) # -> Be careful when applying 볼츠만 approach return Q, predict
def LResnet50E_IR(images, keep_probability, phase_train=True, bottleneck_layer_size=512, weight_decay=0.0, reuse=None): ''' conv name conv[conv_layer]_[block_index]_[block_layer_index] for resnet50 n_units=[3,4,14,3], consider one unit is dim_reduction_layer repeat n_units=[2,3,13,2] ''' with tf.variable_scope('Conv1'): net = slim.conv2d(images,64,scope='Conv1_pre') net = slim.batch_norm(net,scope='Conv1_bn') with tf.variable_scope('Conv2'): net = resface_block(net,64,stride=2,dim_match=False,scope='Conv2_pre') net = slim.repeat(net,2,resface_block,64,1,True,scope='Conv2_main') with tf.variable_scope('Conv3'): net = resface_block(net,128,stride=2,dim_match=False,scope='Conv3_pre') net = slim.repeat(net,3,resface_block,128,1,True,scope='Conv3_main') with tf.variable_scope('Conv4'): net = resface_block(net,256,stride=2,dim_match=False,scope='Conv4_pre') net = slim.repeat(net,13,resface_block,256,1,True,scope='Conv4_main') with tf.variable_scope('Conv5'): net = resface_block(net,512,stride=2,dim_match=False,scope='Conv5_pre') net = slim.repeat(net,2,resface_block,512,1,True,scope='Conv5_main') with tf.variable_scope('Logits'): net = slim.batch_norm(net,activation_fn=None,scope='bn1') net = slim.dropout(net, keep_probability, is_training=phase_train,scope='Dropout') net = slim.flatten(net) net = slim.fully_connected(net, bottleneck_layer_size, biases_initializer=tf.contrib.layers.xavier_initializer(), scope='fc1') net = slim.batch_norm(net, activation_fn=None, scope='Bottleneck') return net,''
def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('encoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = images net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.flatten(net) fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2
def content_extractor(self, images, reuse=False): # images: (batch, 32, 32, 3) or (batch, 32, 32, 1) if images.get_shape()[3] == 1: # For mnist dataset, replicate the gray scale image 3 times. images = tf.image.grayscale_to_rgb(images) with tf.variable_scope('content_extractor', reuse=reuse): with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=None, stride=2, weights_initializer=tf.contrib.layers.xavier_initializer()): with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, activation_fn=tf.nn.relu, is_training=(self.mode=='train' or self.mode=='pretrain')): net = slim.conv2d(images, 64, [3, 3], scope='conv1') # (batch_size, 16, 16, 64) net = slim.batch_norm(net, scope='bn1') net = slim.conv2d(net, 128, [3, 3], scope='conv2') # (batch_size, 8, 8, 128) net = slim.batch_norm(net, scope='bn2') net = slim.conv2d(net, 256, [3, 3], scope='conv3') # (batch_size, 4, 4, 256) net = slim.batch_norm(net, scope='bn3') net = slim.conv2d(net, 128, [4, 4], padding='VALID', scope='conv4') # (batch_size, 1, 1, 128) net = slim.batch_norm(net, activation_fn=tf.nn.tanh, scope='bn4') if self.mode == 'pretrain': net = slim.conv2d(net, 10, [1, 1], padding='VALID', scope='out') net = slim.flatten(net) return net
def _build_graph(self): normalized_input = tf.div(self._input, 255.0) #d = tf.divide(1.0, tf.sqrt(8. * 8. * 4.)) conv1 = slim.conv2d(normalized_input, 16, [8, 8], activation_fn=tf.nn.relu, padding='VALID', stride=4, biases_initializer=None) # weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) #d = tf.divide(1.0, tf.sqrt(4. * 4. * 16.)) conv2 = slim.conv2d(conv1, 32, [4, 4], activation_fn=tf.nn.relu, padding='VALID', stride=2, biases_initializer=None) #weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) flattened = slim.flatten(conv2) #d = tf.divide(1.0, tf.sqrt(2592.)) fc1 = slim.fully_connected(flattened, 256, activation_fn=tf.nn.relu, biases_initializer=None) #weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) #d = tf.divide(1.0, tf.sqrt(256.)) # estimate of the value function self.value_func_prediction = slim.fully_connected(fc1, 1, activation_fn=None, biases_initializer=None) #weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) # softmax output with one entry per action representing the probability of taking an action self.policy_predictions = slim.fully_connected(fc1, self.output_size, activation_fn=tf.nn.softmax, biases_initializer=None)
def build_arch_baseline(input, is_train: bool, num_classes: int): bias_initializer = tf.truncated_normal_initializer( mean=0.0, stddev=0.01) # tf.constant_initializer(0.0) # The paper didnot mention any regularization, a common l2 regularizer to weights is added here weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04) tf.logging.info('input shape: {}'.format(input.get_shape())) # weights_initializer=initializer, with slim.arg_scope([slim.conv2d, slim.fully_connected], trainable=is_train, biases_initializer=bias_initializer, weights_regularizer=weights_regularizer): with tf.variable_scope('relu_conv1') as scope: output = slim.conv2d(input, num_outputs=32, kernel_size=[ 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu) output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer1') tf.logging.info('output shape: {}'.format(output.get_shape())) with tf.variable_scope('relu_conv2') as scope: output = slim.conv2d(output, num_outputs=64, kernel_size=[ 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu) output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer2') tf.logging.info('output shape: {}'.format(output.get_shape())) output = slim.flatten(output) output = slim.fully_connected(output, 1024, scope='relu_fc3', activation_fn=tf.nn.relu) tf.logging.info('output shape: {}'.format(output.get_shape())) output = slim.dropout(output, 0.5, scope='dp') output = slim.fully_connected(output, num_classes, scope='final_layer', activation_fn=None) tf.logging.info('output shape: {}'.format(output.get_shape())) return output
def resface36(images, keep_probability, phase_train=True, bottleneck_layer_size=512, weight_decay=0.0, reuse=None): ''' conv name conv[conv_layer]_[block_index]_[block_layer_index] ''' with tf.variable_scope('Conv1'): net = resface_pre(images,64,scope='Conv1_pre') net = slim.repeat(net,2,resface_block,64,scope='Conv_1') with tf.variable_scope('Conv2'): net = resface_pre(net,128,scope='Conv2_pre') net = slim.repeat(net,4,resface_block,128,scope='Conv_2') with tf.variable_scope('Conv3'): net = resface_pre(net,256,scope='Conv3_pre') net = slim.repeat(net,8,resface_block,256,scope='Conv_3') with tf.variable_scope('Conv4'): net = resface_pre(net,512,scope='Conv4_pre') #net = resface_block(Conv4_pre,512,scope='Conv4_1') net = slim.repeat(net,1,resface_block,512,scope='Conv4') with tf.variable_scope('Logits'): #pylint: disable=no-member #net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', # scope='AvgPool') net = slim.flatten(net) net = slim.dropout(net, keep_probability, is_training=phase_train, scope='Dropout') net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net,''
def flatten_fully_connected(inputs, num_outputs, activation_fn=tf.nn.relu, normalizer_fn=None, normalizer_params=None, weights_initializer=slim.xavier_initializer(), weights_regularizer=None, biases_initializer=tf.zeros_initializer(), biases_regularizer=None, reuse=None, variables_collections=None, outputs_collections=None, trainable=True, scope=None): with tf.variable_scope(scope, 'flatten_fully_connected', [inputs]): if inputs.shape.ndims > 2: inputs = slim.flatten(inputs) return slim.fully_connected(inputs, num_outputs, activation_fn, normalizer_fn, normalizer_params, weights_initializer, weights_regularizer, biases_initializer, biases_regularizer, reuse, variables_collections, outputs_collections, trainable, scope)
def build_single_inceptionv3(train_tfdata, is_train, dropout_keep_prob, reduce_dim = False): train_tfdata_resize = tf.image.resize_images(train_tfdata, (299, 299)) with slim.arg_scope(inception.inception_v3_arg_scope()): identity, end_points = inception.inception_v3(train_tfdata_resize, dropout_keep_prob = dropout_keep_prob, is_training=is_train) feature = slim.flatten(end_points['Mixed_7c']) if reduce_dim: feature = slim.fully_connected(feature, 256, scope='feat') return identity, feature
def loss(self, x, y): with tf.name_scope('loss'): z_mu, z_lv = self._encode(x) z = GaussianSampleLayer(z_mu, z_lv) xh = self._generate(z, y) D_KL = tf.reduce_mean( GaussianKLD( slim.flatten(z_mu), slim.flatten(z_lv), slim.flatten(tf.zeros_like(z_mu)), slim.flatten(tf.zeros_like(z_lv)), ) ) logPx = tf.reduce_mean( GaussianLogDensity( slim.flatten(x), slim.flatten(xh), tf.zeros_like(slim.flatten(xh))), ) loss = dict() loss['G'] = - logPx + D_KL loss['D_KL'] = D_KL loss['logP'] = logPx tf.summary.scalar('KL-div', D_KL) tf.summary.scalar('logPx', logPx) tf.summary.histogram('xh', xh) tf.summary.histogram('x', x) return loss
def build_graph(top_k): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch') labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch') is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag') with tf.device('/gpu:0'): with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params={'is_training': is_training}): conv3_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv3_1') max_pool_1 = slim.max_pool2d(conv3_1, [2, 2], [2, 2], padding='SAME', scope='pool1') conv3_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv3_2') max_pool_2 = slim.max_pool2d(conv3_2, [2, 2], [2, 2], padding='SAME', scope='pool2') conv3_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3_3') max_pool_3 = slim.max_pool2d(conv3_3, [2, 2], [2, 2], padding='SAME', scope='pool3') conv3_4 = slim.conv2d(max_pool_3, 512, [3, 3], padding='SAME', scope='conv3_4') conv3_5 = slim.conv2d(conv3_4, 512, [3, 3], padding='SAME', scope='conv3_5') max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2], padding='SAME', scope='pool4') flatten = slim.flatten(max_pool_4) fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.relu, scope='fc1') logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2') loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) loss = control_flow_ops.with_dependencies([updates], loss) global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=0.1) train_op = slim.learning.create_train_op(loss, optimizer, global_step=global_step) probabilities = tf.nn.softmax(logits) tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) merged_summary_op = tf.summary.merge_all() predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=top_k) accuracy_in_top_k = tf.reduce_mean(tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32)) return {'images': images, 'labels': labels, 'keep_prob': keep_prob, 'top_k': top_k, 'global_step': global_step, 'train_op': train_op, 'loss': loss, 'is_training': is_training, 'accuracy': accuracy, 'accuracy_top_k': accuracy_in_top_k, 'merged_summary_op': merged_summary_op, 'predicted_distribution': probabilities, 'predicted_index_top_k': predicted_index_top_k, 'predicted_val_top_k': predicted_val_top_k}
def make_tower(net): net = slim.conv2d(net, 20, [5, 5], padding='VALID', scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool1') net = slim.conv2d(net, 50, [5, 5], padding='VALID', scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool2') net = slim.flatten(net) net = slim.fully_connected(net, 500, scope='fc1') net = slim.fully_connected(net, 2, activation_fn=None, scope='fc2') return net
def generative_network(z): """Generative network to parameterize generative model. It takes latent variables as input and outputs the likelihood parameters. logits = neural_network(z) """ net = slim.fully_connected(z, 28 * 28, activation_fn=None) net = slim.flatten(net) return net
def _encoder(self, x, is_training=None): net = self.arch['encoder'] for i, (o, k, s) in enumerate(zip(net['output'], net['kernel'], net['stride'])): x = conv2d_nchw_layernorm( x, o, k, s, lrelu, name='Conv2d-{}'.format(i) ) x = slim.flatten(x) z_mu = tf.layers.dense(x, self.arch['z_dim']) z_lv = tf.layers.dense(x, self.arch['z_dim']) return z_mu, z_lv
def _add_single_ssd_head(self, blob, num_classes, num_anchors, prefix, suffix=''): with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None, padding='SAME', normalizer_params=None): if len(blob.shape) == 4: locs = slim.conv2d(blob, num_anchors * 4, (3, 3), scope='{}_mbox_loc{}'.format(prefix, suffix), data_format=self.data_format) locs = channel_to_last(locs, data_format=self.data_format) locs = slim.flatten(locs) conf = slim.conv2d(blob, num_anchors * num_classes, (3, 3), biases_initializer=tf.constant_initializer(0.0), scope='{}_mbox_conf{}'.format(prefix, suffix), data_format=self.data_format) conf = channel_to_last(conf, data_format=self.data_format) conf = slim.flatten(conf) self.flattens_for_tfmo.extend([locs, conf]) elif len(blob.shape) == 2: locs = slim.fully_connected(blob, num_anchors * 4, activation_fn=None, scope='{}_mbox_loc{}'.format(prefix, suffix)) conf = slim.fully_connected(blob, num_anchors * num_classes, activation_fn=None, scope='{}_mbox_conf{}'.format(prefix, suffix)) else: raise Exception('Unsupported input blob shape for SSD.') return conf, locs
def _contruct_network(self, inputs): # Actor network and critic network share all shallow layers conv1 = slim.conv2d(inputs=inputs, num_outputs=16, activation_fn=tf.nn.relu, kernel_size=[8, 8], stride=[4, 4], padding='VALID') conv2 = slim.conv2d(inputs=conv1, num_outputs=32, activation_fn=tf.nn.relu, kernel_size=[4, 4], stride=[2, 2], padding='VALID') hidden = slim.fully_connected(inputs=slim.flatten(conv2), num_outputs=256, activation_fn=tf.nn.relu) # Recurrent network for temporal dependencies lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=256) c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) self.state_in = (c_in, h_in) rnn_in = tf.expand_dims(hidden, [0]) step_size = tf.shape(inputs)[:1] state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in) lstm_out, lstm_state = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_out, [-1, 256]) # output for policy and value estimations self.policy = slim.fully_connected( inputs=rnn_out, num_outputs=self.a_dim, activation_fn=tf.nn.softmax, weights_initializer=normalized_columns_initializer(0.01), biases_initializer=None) self.value = slim.fully_connected( inputs=rnn_out, num_outputs=1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None)
def inference(inputs): x = tf.reshape(inputs,[-1,28,28,1]) conv_1 = tf.nn.relu(slim.conv2d(x,32,[3,3])) #28 * 28 * 32 pool_1 = slim.max_pool2d(conv_1,[2,2]) # 14 * 14 * 32 block_1 = res_identity(pool_1,32,[3,3],'layer_2') block_2 = res_change(block_1,64,[3,3],'layer_3') block_3 = res_identity(block_2,64,[3,3],'layer_4') block_4 = res_change(block_3,32,[3,3],'layer_5') net_flatten = slim.flatten(block_4,scope='flatten') fc_1 = slim.fully_connected(slim.dropout(net_flatten,0.8),200,activation_fn=tf.nn.tanh,scope='fc_1') output = slim.fully_connected(slim.dropout(fc_1,0.8),10,activation_fn=None,scope='output_layer') return output
def _build_network(self, sess, is_training=True): with tf.variable_scope('vgg_16', 'vgg_16'): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') self._act_summaries.append(net) self._layers['head'] = net # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net, rois, "pool5") else: raise NotImplementedError pool5_flat = slim.flatten(pool5, scope='flatten') fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') # region classification cls_prob, bbox_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def _head_to_tail(self, pool5, is_training, reuse=False): with tf.variable_scope(self._scope, self._scope, reuse=reuse): pool5_flat = slim.flatten(pool5, scope='flatten') fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') return fc7
def construct_net(self,is_trained = True): with slim.arg_scope([slim.conv2d], padding='VALID', weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = slim.conv2d(self.input_images,6,[5,5],1,padding='SAME',scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.conv2d(net,16,[5,5],1,scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.conv2d(net,120,[5,5],1,scope='conv5') net = slim.flatten(net, scope='flat6') net = slim.fully_connected(net, 84, scope='fc7') net = slim.dropout(net, self.dropout,is_training=is_trained, scope='dropout8') digits = slim.fully_connected(net, 10, scope='fc9') return digits
def __init__(self,s_size,a_size,scope,trainer): with tf.variable_scope(scope): # quantile regression dqn self.quantile = 1.0 / N self.cumulative_probabilities = (2.0 * np.arange(N) + 1) / (2.0 * N) # network self.inputs = tf.placeholder(shape=[None,s_size],dtype=tf.float32) self.imageIn = tf.reshape(self.inputs,shape=[-1,84,84,1]) self.conv1 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.imageIn,num_outputs=32, kernel_size=[8,8],stride=[4,4],padding='VALID') self.conv2 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.conv1,num_outputs=64, kernel_size=[4,4],stride=[2,2],padding='VALID') self.conv3 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.conv2,num_outputs=64, kernel_size=[3,3],stride=[1,1],padding='VALID') hidden = slim.fully_connected(slim.flatten(self.conv3),512,activation_fn=tf.nn.relu) self.out = slim.fully_connected(hidden, a_size * N, activation_fn=None, weights_initializer=normalized_columns_initializer(0.1), biases_initializer=None) self.out = tf.reshape(self.out, [-1, a_size, N]) self.Q = tf.reduce_sum(self.out * self.quantile, axis=2) #Only the worker network need ops for loss functions and gradient updating. if scope != 'global': self.actions_q = tf.placeholder(shape=[None, a_size, N], dtype=tf.float32) self.q_target = tf.placeholder(shape=[None, N], dtype=tf.float32) self.q_actiona = tf.multiply(self.out, self.actions_q) self.q_action = tf.reduce_sum(self.q_actiona, axis=1) self.u = self.q_target - self.q_action self.loss = tf.reduce_mean(tf.reduce_sum(tf.square(self.u),axis=1)) self.delta = tf.to_float(self.u < 0.0) self.loss1 = tf.abs(self.cumulative_probabilities - self.delta) self.loss2 = self.huber(self.u, k) #self.loss = tf.reduce_mean(tf.reduce_mean(self.loss1*self.loss2,axis=1)) #Get gradients from local network using local losses local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss,local_vars) self.var_norms = tf.global_norm(local_vars) grads,self.grad_norms = tf.clip_by_global_norm(self.gradients,40.0) #Apply local gradients to global network global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients(zip(grads,global_vars))
def __init__(self,h_size,rnn_cell,myScope): self.scalarInput = tf.placeholder(shape=[None,21168],dtype=tf.float32) self.imageIn = tf.reshape(self.scalarInput,shape=[-1,84,84,3]) self.conv1 = slim.convolution2d(inputs = self.imageIn,num_outputs=32,kernel_size = [8,8], stride=[4,4],padding='VALID',biases_initializer=None, scope=myScope + "_conv1") #[-1,20,20,32] self.conv2 = slim.convolution2d(inputs = self.conv1, num_outputs = 64,kernel_size=[4,4], stride=[2,2],padding='VALID', biases_initializer= None,scope=myScope+"_conv2") # [-1,9,9,64] self.conv3 = slim.convolution2d(inputs = self.conv2,num_outputs = 64,kernel_size=[3,3],stride=[1,1], padding='VALID',biases_initializer=None,scope=myScope+"_conv3") #[-1,7,7,64] self.conv4 = slim.convolution2d(inputs = self.conv3,num_outputs=h_size,kernel_size=[7,7],stride=[1,1], padding='VALID',biases_initializer=None,scope=myScope+"_conv4") #[-1,1,1,h_size] self.trainLength = tf.placeholder(tf.int32) self.batch_size = tf.placeholder(tf.int32,[]) self.convFlat = tf.reshape(slim.flatten(self.conv4),[self.batch_size,self.trainLength,h_size]) self.state_in = rnn_cell.zero_state(self.batch_size,tf.float32) self.rnn,self.rnn_state = tf.nn.dynamic_rnn(inputs=self.convFlat,cell=rnn_cell,dtype=tf.float32, initial_state=self.state_in,scope=myScope+"rnn") self.rnn = tf.reshape(self.rnn,shape=[-1,h_size]) self.streamA,self.streamV = tf.split(self.rnn,2,1) self.AW = tf.Variable(tf.random_normal([h_size//2,4])) self.VW = tf.Variable(tf.random_normal([h_size//2,1])) self.Advantage = tf.matmul(self.streamA,self.AW) self.Value = tf.matmul(self.streamV,self.VW) self.salience = tf.gradients(self.Advantage,self.imageIn) self.Qout = self.Value + tf.subtract(self.Advantage,tf.reduce_mean(self.Advantage,axis=1,keep_dims=True)) self.predict =tf.argmax(self.Qout,1) self.targetQ = tf.placeholder(shape=[None],dtype=tf.float32) self.actions = tf.placeholder(shape=[None],dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions,4,dtype=tf.float32) self.Q = tf.reduce_sum(tf.multiply(self.Qout,self.actions_onehot),axis=1) self.td_error = tf.square(self.targetQ - self.Q) self.maskA = tf.zeros([self.batch_size,self.trainLength//2]) self.maskB = tf.ones([self.batch_size,self.trainLength//2]) self.mask = tf.concat([self.maskA,self.maskB],1) self.mask = tf.reshape(self.mask,[-1]) self.loss = tf.reduce_mean(self.td_error * self.mask) self.trainer = tf.train.AdamOptimizer(learning_rate=0.001) self.updateModel = self.trainer.minimize(self.loss)
def network(inputs): '''Define the network''' with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = tf.reshape(inputs,[-1,FLAGS.im_size ,FLAGS.im_size,3]) net = slim.conv2d(net, 32, [3,3], scope='conv1') net = slim.max_pool2d(net, [4,4], scope = 'conv1') net = slim.conv2d(net,128,[3,3], scope = 'conv2') net = slim.max_pool2d(net,[4,4], scope = 'pool2') net = slim.flatten(net) net = slim.fully_connected(net,64, scope = 'fc') net = slim.fully_connected(net, n_classes, activation_fn = None, scope = 'output') return net
def inference(self): x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) # scale (divide by MNIST std) x = x * 0.0125 with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.0005)): model = slim.conv2d(x, 20, [5, 5], padding='VALID', scope='conv1') model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool1') model = slim.conv2d(model, 50, [5, 5], padding='VALID', scope='conv2') model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool2') model = slim.flatten(model) model = slim.fully_connected(model, 500, scope='fc1') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do1') model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fc2') return model
def neural_network(X): """loc, scale, logits = NN(x; theta)""" # 2 hidden layers with 15 hidden units sz_x = X.get_shape().as_list()[2] sz_y = X.get_shape().as_list()[1] max_sz = max(sz_x,sz_y) X_flat = slim.flatten(X) hidden1 = slim.fully_connected(X_flat, 400,normalizer_fn=slim.batch_norm) hidden2 = slim.fully_connected(hidden1, 400,normalizer_fn=slim.batch_norm) locs = slim.fully_connected(hidden2, 2*K, activation_fn=None) # locs = tf.minimum(0., tf.maximum(locs,max_sz)) o_scales = slim.fully_connected(hidden2, 2*K, activation_fn=tf.exp) scales = tf.minimum(3.,tf.maximum(2.,o_scales)) logits = slim.fully_connected(hidden2, K, activation_fn=None) locs = tf.reshape(locs,[-1,K,2]) scales = tf.reshape(scales,[-1,K,2]) return locs, scales, logits, hidden1
def discriminator(self, images, reuse=False): # images: (batch, 32, 32, 1) with tf.variable_scope('discriminator', reuse=reuse): with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=None, stride=2, weights_initializer=tf.contrib.layers.xavier_initializer()): with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, activation_fn=tf.nn.relu, is_training=(self.mode=='train')): net = slim.conv2d(images, 128, [3, 3], activation_fn=tf.nn.relu, scope='conv1') # (batch_size, 16, 16, 128) net = slim.batch_norm(net, scope='bn1') net = slim.conv2d(net, 256, [3, 3], scope='conv2') # (batch_size, 8, 8, 256) net = slim.batch_norm(net, scope='bn2') net = slim.conv2d(net, 512, [3, 3], scope='conv3') # (batch_size, 4, 4, 512) net = slim.batch_norm(net, scope='bn3') net = slim.conv2d(net, 1, [4, 4], padding='VALID', scope='conv4') # (batch_size, 1, 1, 1) net = slim.flatten(net) return net
def __init__(self,s_size,a_size,scope,trainer): with tf.variable_scope(scope): # distribution dqn self.atoms = 21 self.v_max = 10. self.v_min = -10. self.delta_z = (self.v_max - self.v_min) / (self.atoms - 1) self.z = [self.v_min + i * self.delta_z for i in range(self.atoms)] # network self.inputs = tf.placeholder(shape=[None,s_size],dtype=tf.float32) self.imageIn = tf.reshape(self.inputs,shape=[-1,84,84,1]) self.conv1 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.imageIn,num_outputs=32, kernel_size=[8,8],stride=[4,4],padding='VALID') self.conv2 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.conv1,num_outputs=64, kernel_size=[4,4],stride=[2,2],padding='VALID') self.conv3 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.conv2,num_outputs=64, kernel_size=[3,3],stride=[1,1],padding='VALID') hidden = slim.fully_connected(slim.flatten(self.conv3),512,activation_fn=tf.nn.relu) self.out = slim.fully_connected(hidden, a_size*self.atoms, activation_fn=None, weights_initializer=normalized_columns_initializer(0.1), biases_initializer=None) self.out = tf.reshape(self.out, [-1, a_size, self.atoms]) self.p = tf.nn.softmax(self.out, dim=2) self.Q = tf.reduce_sum(self.z * self.p, axis=2) #Only the worker network need ops for loss functions and gradient updating. if scope != 'global': self.m_input = tf.placeholder(shape=[None, self.atoms], dtype=tf.float32) self.actions_p = tf.placeholder(shape=[None, a_size, self.atoms],dtype=tf.float32) self.p_actiona = tf.multiply(self.p, self.actions_p) self.p_action = tf.reduce_sum(self.p_actiona, axis=1) self.p_alog = - tf.log(self.p_action+1e-20) + tf.log(self.m_input+1e-20) self.loss = tf.reduce_mean(tf.reduce_sum(self.m_input * self.p_alog, axis=1)) local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss,local_vars) self.var_norms = tf.global_norm(local_vars) grads,self.grad_norms = tf.clip_by_global_norm(self.gradients,40.0) #Apply local gradients to global network global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients(zip(grads,global_vars))
def generative_network(z): """Generative network to parameterize generative model. It takes latent variables as input and outputs the likelihood parameters. logits = neural_network(z) """ with slim.arg_scope([slim.conv2d_transpose], activation_fn=tf.nn.elu, normalizer_fn=slim.batch_norm, normalizer_params={'scale': True}): net = tf.reshape(z, [M, 1, 1, d]) net = slim.conv2d_transpose(net, 128, 3, padding='VALID') net = slim.conv2d_transpose(net, 64, 5, padding='VALID') net = slim.conv2d_transpose(net, 32, 5, stride=2) net = slim.conv2d_transpose(net, 1, 5, stride=2, activation_fn=None) net = slim.flatten(net) return net
def discriminator(tensor, num_category=10, batch_size=32, num_cont=2): """ """ reuse = len([t for t in tf.global_variables() if t.name.startswith('discriminator')]) > 0 print reuse print tensor.get_shape() with variable_scope.variable_scope('discriminator', reuse=reuse): tensor = slim.conv2d(tensor, num_outputs = 64, kernel_size=[4,4], stride=2, activation_fn=leaky_relu) tensor = slim.conv2d(tensor, num_outputs=128, kernel_size=[4,4], stride=2, activation_fn=leaky_relu) tensor = slim.flatten(tensor) shared_tensor = slim.fully_connected(tensor, num_outputs=1024, activation_fn = leaky_relu) recog_shared = slim.fully_connected(shared_tensor, num_outputs=128, activation_fn = leaky_relu) disc = slim.fully_connected(shared_tensor, num_outputs=1, activation_fn=None) disc = tf.squeeze(disc, -1) recog_cat = slim.fully_connected(recog_shared, num_outputs=num_category, activation_fn=None) recog_cont = slim.fully_connected(recog_shared, num_outputs=num_cont, activation_fn=tf.nn.sigmoid) return disc, recog_cat, recog_cont
def teacher(input_images, keep_prob, is_training=True, weight_decay=5e-5, batch_norm_decay=0.99, batch_norm_epsilon=0.001): with tf.variable_scope("Teacher_model"): net, endpoints = resnet_v2(inputs=input_images, num_classes=M, is_training=True, scope='resnet_v2') # co_trained layers var_scope = 'Teacher_model/resnet_v2/' co_list_0 = slim.get_model_variables(var_scope + 'Conv2d_0') # co_list_1 = slim.get_model_variables(var_scope +'InvertedResidual_16_0/conv') # co_list_2 = slim.get_model_variables(var_scope +'InvertedResidual_24_') t_co_list = co_list_0 base_var_list = slim.get_model_variables('Teacher_model/resnet_v2') # feature & attention t_g0 = endpoints["InvertedResidual_{}_{}".format(256, 2)] t_at0 = tf.nn.l2_normalize(tf.reduce_sum(tf.square(t_g0), -1), axis=0, name='t_at0') t_g1 = endpoints["InvertedResidual_{}_{}".format(512, 3)] t_at1 = tf.nn.l2_normalize(tf.reduce_sum(tf.square(t_g1), -1), axis=0, name='t_at1') part_feature = endpoints["InvertedResidual_{}_{}".format(1024, 3)] t_at2 = tf.nn.l2_normalize(tf.reduce_sum(tf.square(part_feature), -1), axis=0, name='t_at2') t_g3 = endpoints["InvertedResidual_{}_{}".format(1024, 4)] t_at3 = tf.nn.l2_normalize(tf.reduce_sum(tf.square(t_g3), -1), axis=0, name='t_at3') object_feature = endpoints["InvertedResidual_{}_{}".format(1024, 5)] t_at4 = tf.nn.l2_normalize(tf.reduce_sum(tf.square(object_feature), -1), axis=0, name='t_at4') t_g = (t_g0, t_g1, part_feature, object_feature) t_at = (t_at0, t_at1, t_at2, t_at3, t_at4) object_feature_h = object_feature.get_shape().as_list()[1] object_feature_w = object_feature.get_shape().as_list()[2] fc_obj = slim.max_pool2d(object_feature, (object_feature_h, object_feature_w), scope="GMP1") batch_norm_params = { 'center': True, 'scale': True, 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, } fc_obj = slim.conv2d( fc_obj, M, [1, 1], activation_fn=None, weights_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), biases_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), scope='fc_obj') fc_obj = tf.nn.dropout(fc_obj, keep_prob=keep_prob) fc_obj = slim.flatten(fc_obj) fc_part = slim.conv2d( part_feature, M * k, #卷积核个数 [1, 1], #卷积核高宽 activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, # 标准化器设置为BN normalizer_params=batch_norm_params, weights_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), biases_regularizer=tf.contrib.layers.l2_regularizer(weight_decay)) fc_part_h = fc_part.get_shape().as_list()[1] fc_part_w = fc_part.get_shape().as_list()[2] fc_part = slim.max_pool2d(fc_part, (fc_part_h, fc_part_w), scope="GMP2") ft_list = tf.split(fc_part, num_or_size_splits=M, axis=-1) #最后一维度(C) cls_list = [] for i in range(M): ft = tf.transpose(ft_list[i], [0, 1, 3, 2]) cls = layers_lib.pool(ft, [1, k], "AVG") cls = layers.flatten(cls) cls_list.append(cls) fc_ccp = tf.concat(cls_list, axis=-1) #cross_channel_pooling (N, M) fc_part = slim.conv2d( fc_part, M, [1, 1], activation_fn=None, weights_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), biases_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), scope="fc_part") fc_part = tf.nn.dropout(fc_part, keep_prob=keep_prob) fc_part = slim.flatten(fc_part) t_var_list = slim.get_model_variables() return t_co_list, t_g, t_at, fc_obj, fc_part, fc_ccp, base_var_list, t_var_list
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates model Args: inputs: a 4-D tensor of size [batch_size, 32, 32, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): #31 x 31 x 32 net = slim.conv2d(inputs, 32, 3, stride=1, padding='VALID', scope='conv_1_3x3') #15 * 15 * 64 net = slim.conv2d(net, 64, 3, stride=2, padding='VALID', scope='conv_2_3x3') #7 * 7 * 96 net = slim.conv2d(net, 96, 3, stride=2, padding='VALID', scope='conv_3_3x3') #7 * 7 * 96 net = slim.repeat(net, 4, block35, scale=0.17) #4 * 4 * 224 with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 32, 32, 64, 96) net = slim.repeat(net, 4, block8, scale=0.20) with tf.variable_scope('Mixed_7a'): net = reduction_b(net) with tf.variable_scope('Logits'): #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def __init__(self, h_size, env, name, LEARNING_RATE, n_step): # The network recieves a frame from the game, flattened into an array. # It then resizes it and processes it through four convolutional layers. WINDOW_SIZE = env.win_size CONV_FILTER_SIZE_X = [3,3,3,3] CONV_FILTER_SIZE_Y = [3,3,3,3] CONV_STRIDE_X = [3,1,1,3] CONV_STRIDE_Y = [3,1,1,3] CONV_LAYER_NUM = 4 CONV_FILTER_NUM = [8,32,32,64] IMAGE_SIZE = [2*(WINDOW_SIZE+2),8,3] self.scalarInput = tf.placeholder(shape=[None, IMAGE_SIZE[0]*IMAGE_SIZE[1]*IMAGE_SIZE[2]], dtype=tf.float32) self.imageIn = tf.reshape(self.scalarInput, shape=[-1, IMAGE_SIZE[0], IMAGE_SIZE[1], IMAGE_SIZE[2]]) depthwise_filter1 = tf.get_variable(shape=(CONV_FILTER_SIZE_X[0], CONV_FILTER_SIZE_Y[0], 3, 1), name=name+"_depthwise_filter1") pointwise_filter1 = tf.get_variable(shape=[1, 1, 3, CONV_FILTER_NUM[0]], name=name+"_pointwise_filter1") self.conv1 = nn.separable_conv2d(self.imageIn, depthwise_filter1, pointwise_filter1, strides=[1,CONV_STRIDE_X[0],CONV_STRIDE_Y[0],1], padding='SAME') print(np.shape(self.conv1)) self.relu1 = nn.relu(self.conv1, name=name+"_relu1") print(np.shape(self.relu1)) depthwise_filter2 = tf.get_variable(shape=(CONV_FILTER_SIZE_X[1], CONV_FILTER_SIZE_Y[1], CONV_FILTER_NUM[0], 1), name=name+"_depthwise_filter2") pointwise_filter2 = tf.get_variable(shape=[1, 1, CONV_FILTER_NUM[0], CONV_FILTER_NUM[1]], name=name+"_pointwise_filter2") self.conv2 = nn.separable_conv2d(self.relu1, depthwise_filter2, pointwise_filter2, strides=[1,CONV_STRIDE_X[1],CONV_STRIDE_Y[1],1], padding='SAME') print(np.shape(self.conv2)) self.relu2 = nn.relu(self.conv2, name=name+"_relu2") print(np.shape(self.relu2)) depthwise_filter3 = tf.get_variable(shape=(CONV_FILTER_SIZE_X[2], CONV_FILTER_SIZE_Y[2], CONV_FILTER_NUM[1], 1), name=name+"_depthwise_filter3") pointwise_filter3 = tf.get_variable(shape=[1, 1, CONV_FILTER_NUM[1], CONV_FILTER_NUM[2]], name=name+"_pointwise_filter3") self.conv3 = nn.separable_conv2d(self.relu2, depthwise_filter3, pointwise_filter3, strides=[1,CONV_STRIDE_X[2],CONV_STRIDE_Y[2],1], padding='SAME') print(np.shape(self.conv3)) self.relu3 = nn.relu(self.conv3, name=name+"_relu3") print(np.shape(self.relu3)) self.maxpool1 = nn.max_pool(self.relu3, ksize=[1,3,1,1], strides=[1,3,1,1], padding='VALID') print(np.shape(self.maxpool1)) if np.ceil(np.floor(np.ceil(2*(WINDOW_SIZE+2)/3)/3)/3) >= 2: conv_filter4 = tf.get_variable(shape=(CONV_FILTER_SIZE_X[3], CONV_FILTER_SIZE_Y[3], CONV_FILTER_NUM[2], CONV_FILTER_NUM[3]), name=name+"_conv_filter4") self.conv4 = nn.conv2d(self.maxpool1, conv_filter4, strides=[1,CONV_STRIDE_X[3],CONV_STRIDE_Y[3],1], padding='SAME') print(np.shape(self.conv4)) self.relu4 = nn.relu(self.conv4, name=name+"_relu4") print(np.shape(self.relu4)) self.maxpool2 = nn.max_pool(self.relu4, ksize=[1,2,1,1], strides=[1,2,1,1], padding='VALID') LAST_CONV_FILTER = [np.floor(np.ceil(np.floor(np.ceil(2*(WINDOW_SIZE+2)/3)/3)/3)/2),1] conv_filter5 = tf.get_variable(shape=(LAST_CONV_FILTER[0], LAST_CONV_FILTER[1], CONV_FILTER_NUM[3], h_size), name=name+"_conv_filter5") self.conv5 = nn.conv2d(self.maxpool2, conv_filter5, strides=[1,CONV_STRIDE_X[3],CONV_STRIDE_Y[3],1], padding='VALID') print(np.shape(self.maxpool2)) else: LAST_CONV_FILTER = [np.floor(np.ceil(2*(WINDOW_SIZE+2)/3)/3),3] conv_filter5 = tf.get_variable(shape=(LAST_CONV_FILTER[0], LAST_CONV_FILTER[1], CONV_FILTER_NUM[2], h_size), name=name+"_conv_filter5") self.conv5 = nn.conv2d(self.maxpool1, conv_filter5, strides=[1,CONV_STRIDE_X[3],CONV_STRIDE_Y[3],1], padding='VALID') print(np.shape(self.conv5)) self.relu5 = nn.relu(self.conv5, name=name+"_relu5") print(np.shape(self.relu5)) # We take the output from the final convolutional layer and split it into separate advantage and value streams. self.streamAC, self.streamVC = tf.split(self.relu5, 2, 3) self.streamA = slim.flatten(self.streamAC) self.streamV = slim.flatten(self.streamVC) xavier_init = tf.contrib.layers.xavier_initializer() self.AW = tf.Variable(xavier_init([h_size // 2, env.actions])) self.VW = tf.Variable(xavier_init([h_size // 2, 1])) print(self.conv5) print(self.streamA) print(self.AW) self.Advantage = tf.matmul(self.streamA, self.AW) self.Value = tf.matmul(self.streamV, self.VW) # Then combine them together to get our final Q-values. self.Qout = self.Value + tf.subtract(self.Advantage, tf.reduce_mean(self.Advantage, axis=1, keep_dims=True)) self.predict = tf.argmax(self.Qout, 1) # Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values. self.targetQ = tf.placeholder(shape=[None], dtype=tf.float32) self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, env.actions, dtype=tf.float32) self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis=1) self.td_error = tf.square(self.targetQ - self.Q) self.loss = tf.reduce_mean(self.td_error) self.trainer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE) self.updateModel = self.trainer.minimize(self.loss)
def _create_network(incoming, reuse=None, weight_decay=1e-8): nonlinearity = tf.nn.elu conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3) conv_bias_init = tf.zeros_initializer() conv_regularizer = slim.l2_regularizer(weight_decay) fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3) fc_bias_init = tf.zeros_initializer() fc_regularizer = slim.l2_regularizer(weight_decay) def batch_norm_fn(x): return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn") network = incoming network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) # NOTE(nwojke): This is missing a padding="SAME" to match the CNN # architecture in Table 1 of the paper. Information on how this affects # performance on MOT 16 training sequences can be found in # issue 10 https://github.com/nwojke/deep_sort/issues/10 network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1") network = residual_block(network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, is_first=True) network = residual_block(network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False) network = residual_block(network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True) network = residual_block(network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False) network = residual_block(network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True) network = residual_block(network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False) feature_dim = network.get_shape().as_list()[-1] network = slim.flatten(network) network = slim.dropout(network, keep_prob=0.6) network = slim.fully_connected(network, feature_dim, activation_fn=nonlinearity, normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer, scope="fc1", weights_initializer=fc_weight_init, biases_initializer=fc_bias_init) features = network # Features in rows, normalize axis 1. features = slim.batch_norm(features, scope="ball", reuse=reuse) feature_norm = tf.sqrt( tf.constant(1e-8, tf.float32) + tf.reduce_sum(tf.square(features), [1], keepdims=True)) features = features / feature_norm return features, None
def discriminator(self, x, reuse=False): if (reuse): tf.get_variable_scope().reuse_variables() with tf.variable_scope('Discriminator_scope'): with slim.arg_scope( [slim.conv2d], padding='SAME', # weights_initializer=tf.random_normal_initializer(stddev=0.02), weights_initializer=tf.contrib.layers.xavier_initializer(), normalizer_fn=slim.batch_norm, # weights_regularizer=slim.l2_regularizer(0.01) ): x = tf.image.resize_images( images=x, size=[self.D_input_size, self.D_input_size]) net = slim.repeat(x, 2, slim.conv2d, 128, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') net = slim.flatten(net) net = slim.fully_connected(net, 512) D_logit = slim.fully_connected(net, 1, activation_fn=None) D_prob = tf.nn.sigmoid(D_logit) return D_logit, D_prob
def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('encoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer( stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = images net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.flatten(net) fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2
def __init__(self, s_size, a_size, scope, trainer): with tf.variable_scope(scope): #Input and visual encoding layers self.inputs = tf.placeholder(shape=[None, s_size], dtype=tf.float32) self.imageIn = tf.reshape(self.inputs, shape=[-1, 84, 84, 1]) self.conv1 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.imageIn, num_outputs=16, kernel_size=[8, 8], stride=[4, 4], padding='VALID') self.conv2 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.conv1, num_outputs=32, kernel_size=[4, 4], stride=[2, 2], padding='VALID') hidden = slim.fully_connected(slim.flatten(self.conv2), 256, activation_fn=tf.nn.elu) #Recurrent network for temporal dependencies lstm_cell = tf.contrib.rnn.BasicLSTMCell(256, state_is_tuple=True) c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) self.state_in = (c_in, h_in) rnn_in = tf.expand_dims(hidden, [0]) step_size = tf.shape(self.imageIn)[:1] state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn( lstm_cell, rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_outputs, [-1, 256]) #Output layers for policy and value estimations self.policy = slim.fully_connected( rnn_out, a_size, activation_fn=tf.nn.softmax, weights_initializer=normalized_columns_initializer(0.01), biases_initializer=None) self.value = slim.fully_connected( rnn_out, 1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None) #Only the worker network need ops for loss functions and gradient updating. if scope != 'global': self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, a_size, dtype=tf.float32) self.target_v = tf.placeholder(shape=[None], dtype=tf.float32) self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) self.responsible_outputs = tf.reduce_sum( self.policy * self.actions_onehot, [1]) #Loss functions self.value_loss = 0.5 * tf.reduce_sum( tf.square(self.target_v - tf.reshape(self.value, [-1]))) self.entropy = -tf.reduce_sum( self.policy * tf.log(self.policy)) self.policy_loss = -tf.reduce_sum( tf.log(self.responsible_outputs) * self.advantages) self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01 #Get gradients from local network using local losses local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm( self.gradients, 40.0) #Apply local gradients to global network global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients( zip(grads, global_vars))
def loss(self, x_u, x_l, y_l): unlabel = self.circuit_loop(x_u) labeled = self.circuit_loop(x_l, y_l) with tf.name_scope('loss'): # def mean_sigmoid_cross_entropy_with_logits(logit, truth): # ''' # truth: 0. or 1. # ''' # return tf.reduce_mean( # tf.nn.sigmoid_cross_entropy_with_logits( # logit, # truth * tf.ones_like(logit))) loss = dict() # Note: # `log p(y)` should be a constant term if we assume that y # is equally distributed. # That's why I omitted it. # However, since y is now an approximation, I'm not sure # whether omitting it is correct. # [TODO] What PDF should I use to compute H(y|x)? # 1. Categorical? But now we have a Continuous y @_@ # 2. Gumbel-Softmax? But the PDF is.. clumsy with tf.name_scope('Labeled'): z_mu = labeled['z_mu'] z_lv = labeled['z_lv'] loss['KL(z_l)'] = tf.reduce_mean( GaussianKLD(z_mu, z_lv, tf.zeros_like(z_mu), tf.zeros_like(z_lv))) loss['log p(x_l)'] = tf.reduce_mean( tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits( logits=slim.flatten(labeled['xh_sig_logit']), targets=slim.flatten(x_l)), 1)) loss['Labeled'] = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=labeled['y_logit_pred'], labels=y_l)) with tf.name_scope('Unlabeled'): z_mu = unlabel['z_mu'] z_lv = unlabel['z_lv'] loss['KL(z_u)'] = tf.reduce_mean( GaussianKLD(z_mu, z_lv, tf.zeros_like(z_mu), tf.zeros_like(z_lv))) loss['log p(x_u)'] = tf.reduce_mean( tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits( logits=slim.flatten(unlabel['xh_sig_logit']), targets=slim.flatten(x_u)), 1)) y_prior = tf.ones_like( unlabel['y_sample']) / self.arch['y_dim'] '''Eric Jang's code # loss and train ops kl_tmp = tf.reshape(q_y*(log_q_y-tf.log(1.0/K)),[-1,N,K]) KL = tf.reduce_sum(kl_tmp,[1,2]) elbo=tf.reduce_sum(p_x.log_prob(x),1) - KL ''' # https://github.com/ericjang/gumbel-softmax/blob/master/Categorical%20VAE.ipynb # J: I chose not to use 'tf.nn.softmax_cross_entropy' # because it takes logits as arguments but we need # to subtract `log p` before `mul` p loss['H(y)'] = tf.reduce_mean( tf.reduce_sum( tf.mul( unlabel['y_sample'], tf.log(unlabel['y_sample'] + EPS) - tf.log(y_prior)), -1)) # Using Gumbel-Softmax Distribution: # 1. Incorrect because p(y..y) is a scalar-- unless we can get # the parametic form of the H(Y). # 2. The numerical value can be VERY LARGE, causing trouble! # 3. You should regard 'Gumbel-Softmax' as a `sampling step` # log_qy = GumbelSoftmaxLogDensity( # y=unlabel['y_sample'], # p=unlabel['y_pred'], # tau=self.tau) # # loss['H(y)'] = tf.reduce_mean(- tf.mul(tf.exp(log_qy), log_qy)) # loss['H(y)'] = tf.reduce_mean(- log_qy) # # [TODO] How to define this term? log p(y) # loss['log p(y)'] = - tf.nn.softmax_cross_entropy_with_logits( loss['log p(y)'] = 0.0 loss['KL(z)'] = loss['KL(z_l)'] + loss['KL(z_u)'] loss['Dis'] = loss['log p(x_l)'] + loss['log p(x_u)'] loss['H(y)'] = loss['H(y)'] + loss['log p(y)'] # For summaries with tf.name_scope('Summary'): # tf.summary.scalar('DKL_x', loss['KL(x)']) tf.summary.scalar('DKL_z', loss['KL(z)']) tf.summary.scalar('MMSE', loss['Dis']) tf.summary.histogram('z_s', unlabel['z']) tf.summary.histogram('z_mu_s', unlabel['z_mu']) tf.summary.histogram('z_lv_s', unlabel['z_lv']) # tf.summary.histogram('z_lv_t', t['z_lv']) # tf.summary.histogram('y_logit', unlabel['y_logit']) # tf.summary.histogram('y', unlabel['y']) return loss
def forward(self): temp = tf.transpose( self.inp.out, [0,3,1,2]) self.out = slim.flatten( temp, scope = self.scope)
def _head_to_tail(self, pool5, is_training, reuse=None): with tf.variable_scope(self._scope, self._scope, reuse=reuse): end_point = 'MaxPool3d_5a_2x2' end_points={} net=pool5 net = tf.nn.max_pool3d(net, ksize=[1, 2, 2, 2, 1], strides=[1, 2, 2, 2, 1], padding=snt.SAME, name=end_point) end_points[end_point] = net if self._final_endpoint == end_point: return net, end_points end_point = 'Mixed_5b' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = Unit3D(output_channels=256, kernel_shape=[1, 1, 1], name='Conv3d_0a_1x1')(net, is_training=is_training) with tf.variable_scope('Branch_1'): branch_1 = Unit3D(output_channels=160, kernel_shape=[1, 1, 1], name='Conv3d_0a_1x1')(net, is_training=is_training) branch_1 = Unit3D(output_channels=320, kernel_shape=[3, 3, 3], name='Conv3d_0b_3x3')(branch_1, is_training=is_training) with tf.variable_scope('Branch_2'): branch_2 = Unit3D(output_channels=32, kernel_shape=[1, 1, 1], name='Conv3d_0a_1x1')(net, is_training=is_training) branch_2 = Unit3D(output_channels=128, kernel_shape=[3, 3, 3], name='Conv3d_0a_3x3')(branch_2, is_training=is_training) with tf.variable_scope('Branch_3'): branch_3 = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], strides=[1, 1, 1, 1, 1], padding=snt.SAME, name='MaxPool3d_0a_3x3') branch_3 = Unit3D(output_channels=128, kernel_shape=[1, 1, 1], name='Conv3d_0b_1x1')(branch_3, is_training=is_training) net = tf.concat([branch_0, branch_1, branch_2, branch_3], 4) end_points[end_point] = net if self._final_endpoint == end_point: return net, end_points end_point = 'Mixed_5c' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = Unit3D(output_channels=384, kernel_shape=[1, 1, 1], name='Conv3d_0a_1x1')(net, is_training=is_training) with tf.variable_scope('Branch_1'): branch_1 = Unit3D(output_channels=192, kernel_shape=[1, 1, 1], name='Conv3d_0a_1x1')(net, is_training=is_training) branch_1 = Unit3D(output_channels=384, kernel_shape=[3, 3, 3], name='Conv3d_0b_3x3')(branch_1, is_training=is_training) with tf.variable_scope('Branch_2'): branch_2 = Unit3D(output_channels=48, kernel_shape=[1, 1, 1], name='Conv3d_0a_1x1')(net, is_training=is_training) branch_2 = Unit3D(output_channels=128, kernel_shape=[3, 3, 3], name='Conv3d_0b_3x3')(branch_2, is_training=is_training) with tf.variable_scope('Branch_3'): branch_3 = tf.nn.max_pool3d(net, ksize=[1, 3, 3, 3, 1], strides=[1, 1, 1, 1, 1], padding=snt.SAME, name='MaxPool3d_0a_3x3') branch_3 = Unit3D(output_channels=128, kernel_shape=[1, 1, 1], name='Conv3d_0b_1x1')(branch_3, is_training=is_training) net = tf.concat([branch_0, branch_1, branch_2, branch_3], 4) pool5_flat = slim.flatten(net, scope='flatten') self.print_tensor_infomation(pool5_flat,'pool5_flat') fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') return fc7
def loss(self, net_out): """ Takes net.out and placeholders value returned in batch() func above, to build train_op and loss """ # meta m = self.meta sprob = float(m['class_scale']) sconf = float(m['object_scale']) snoob = float(m['noobject_scale']) scoor = float(m['coord_scale']) S, B, C = m['side'], m['num'], m['classes'] SS = S * S # number of grid cells print('{} loss hyper-parameters:'.format(m['model'])) print('\tside = {}'.format(m['side'])) print('\tbox = {}'.format(m['num'])) print('\tclasses = {}'.format(m['classes'])) print('\tscales = {}'.format([sprob, sconf, snoob, scoor])) size1 = [None, SS, C] size2 = [None, SS, B] # return the below placeholders _probs = tf.placeholder(tf.float32, size1) _confs = tf.placeholder(tf.float32, size2) _coord = tf.placeholder(tf.float32, size2 + [4]) # weights term for L2 loss _proid = tf.placeholder(tf.float32, size1) # material calculating IOU _areas = tf.placeholder(tf.float32, size2) _upleft = tf.placeholder(tf.float32, size2 + [2]) _botright = tf.placeholder(tf.float32, size2 + [2]) self.placeholders = { 'probs': _probs, 'confs': _confs, 'coord': _coord, 'proid': _proid, 'areas': _areas, 'upleft': _upleft, 'botright': _botright } # Extract the coordinate prediction from net.out coords = net_out[:, SS * (C + B):] coords = tf.reshape(coords, [-1, SS, B, 4]) wh = tf.pow(coords[:, :, :, 2:4], 2) * S # unit: grid cell area_pred = wh[:, :, :, 0] * wh[:, :, :, 1] # unit: grid cell^2 centers = coords[:, :, :, 0:2] # [batch, SS, B, 2] floor = centers - (wh * .5) # [batch, SS, B, 2] ceil = centers + (wh * .5) # [batch, SS, B, 2] # calculate the intersection areas intersect_upleft = tf.maximum(floor, _upleft) intersect_botright = tf.minimum(ceil, _botright) intersect_wh = intersect_botright - intersect_upleft intersect_wh = tf.maximum(intersect_wh, 0.0) intersect = tf.multiply(intersect_wh[:, :, :, 0], intersect_wh[:, :, :, 1]) # calculate the best IOU, set 0.0 confidence for worse boxes iou = tf.truediv(intersect, _areas + area_pred - intersect) best_box = tf.equal(iou, tf.reduce_max(iou, [2], True)) best_box = tf.to_float(best_box) confs = tf.multiply(best_box, _confs) # take care of the weight terms conid = snoob * (1. - confs) + sconf * confs weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3) cooid = scoor * weight_coo proid = sprob * _proid # flatten 'em all probs = slim.flatten(_probs) proid = slim.flatten(proid) confs = slim.flatten(confs) conid = slim.flatten(conid) coord = slim.flatten(_coord) cooid = slim.flatten(cooid) self.fetch += [probs, confs, conid, cooid, proid] true = tf.concat([probs, confs, coord], 1) wght = tf.concat([proid, conid, cooid], 1) # print('Building {} loss'.format(m['model'])) loss = tf.pow(net_out - true, 2) loss = tf.multiply(loss, wght) loss = tf.reduce_sum(loss, 1) self.loss = .5 * tf.reduce_mean(loss) tf.summary.scalar('{} loss'.format(m['model']), self.loss)
def mobilenet_v2_base(inputs, min_depth=8, depth_multiplier=1.0, conv_defs=None, scope=None): if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') normalize_channels = lambda d: max(int(d * depth_multiplier), min_depth) end_points = OrderedDict() if conv_defs is None: conv_defs = _CONV_DEFS net = inputs with tf.variable_scope(scope, 'MobilenetV2', [inputs]): with slim.arg_scope([slim.conv2d, slim.separable_conv2d], padding='SAME'): for i, conv_def in enumerate(conv_defs): end_point = '' if isinstance(conv_def, Conv): end_point = 'Conv2d_%d' % i num_channel = normalize_channels(conv_def.channel) net = slim.conv2d(net, num_channel, conv_def.kernel, activation_fn=tf.nn.relu6, stride=conv_def.stride, scope=end_point) end_points[end_point] = net elif isinstance(conv_def, InvertedBottleneck): stride = conv_def.stride if conv_def.repeat <= 0: raise ValueError( 'repeat value of inverted bottleneck should be greater than zero.' ) for j in range(conv_def.repeat): end_point = 'InvertedBottleneck_%d_%d' % (i, j) prev_output = net net = slim.conv2d( net, conv_def.up_sample * net.get_shape().as_list()[-1], [1, 1], activation_fn=tf.nn.relu6, scope=end_point + '_inverted_bottleneck') end_points[end_point + '_inverted_bottleneck'] = net net = slim.separable_conv2d(net, None, [3, 3], depth_multiplier=1, stride=stride, activation_fn=tf.nn.relu6, scope=end_point + '_dwise') end_points[end_point + '_dwise'] = net num_channel = normalize_channels(conv_def.channel) net = slim.conv2d(net, num_channel, [1, 1], activation_fn=None, scope=end_point + '_linear') end_points[end_point + '_linear'] = net if stride == 1: if prev_output.get_shape().as_list( )[-1] != net.get_shape().as_list()[-1]: # Assumption based on previous ResNet papers: If the number of filters doesn't match, # there should be a conv 1x1 operation. # reference(pytorch) : https://github.com/MG2033/MobileNet-V2/blob/master/layers.py#L29 prev_output = slim.conv2d( prev_output, num_channel, [1, 1], activation_fn=None, biases_initializer=None, scope=end_point + '_residual_match') # as described in Figure 4. net = tf.add(prev_output, net, name=end_point + '_residual_add') end_points[end_point + '_residual_add'] = net stride = 1 elif isinstance(conv_def, AvgPool): end_point = 'AvgPool' net = slim.avg_pool2d(net, conv_def.kernel, scope=end_point) net = slim.flatten(net, scope='Flatten') end_points[end_point] = net else: raise ValueError('CONV_DEF is not valid.') return net, end_points
net8 = slim.conv2d( net7, num_outputs = 256, kernel_size = [3,3], stride = [1,1], padding = 'SAME' ) ''' x_image = tf.reshape(x,[-1,28,28,1]) hidden_1 = slim.conv2d(x_image,5,[5,5]) shape_h1 = tf.shape( hidden_1 ) pool_1 = slim.max_pool2d(hidden_1,[2,2]) hidden_2 = slim.conv2d(pool_1,5,[5,5]) pool_2 = slim.max_pool2d(hidden_2,[2,2]) hidden_3 = slim.conv2d(pool_2,20,[5,5]) hidden_3 = slim.dropout(hidden_3,keep_prob) out_y = slim.fully_connected(slim.flatten(hidden_3),10,activation_fn=tf.nn.softmax) ''' #reshape net8 & full connected layer logits = slim.fully_connected( slim.flatten( net8 ), 1470, activation_fn=tf.nn.softmax ) return logits #相交的时候长宽都是四条线,选中间两条 def calc_iou( self, label_boxes, logits_boxes ): label_x = label_boxes[...,0] label_y = label_boxes[...,1] label_w = label_boxes[...,2] label_h = label_boxes[...,3] logits_x = logits_boxes[...,0] logits_y = logits_boxes[...,1] logits_w = logits_boxes[...,2] logits_h = logits_boxes[...,3] #没有交集
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 # net = slim.conv2d(net, 32, 3, padding='VALID', # scope='Conv2d_2a_3x3') # end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 16, [1,3], scope='Conv2d_2b_3x3') net = slim.conv2d(net, 16, [3,1], scope='Conv2d_2c_3x3') end_points['Conv2d_2_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=3, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 64, 1, padding='VALID', scope='Conv2d_3b_1x1') net = slim.max_pool2d(net, 3, stride=3, padding='VALID', scope='MaxPool_3b_3x3') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 # net = slim.conv2d(net, 192, 3, padding='VALID', # scope='Conv2d_4a_3x3') # end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 16, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 1, block35, scale=0.27) end_points['Mixed_5a'] = net # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 96, 48, 96, 64) end_points['Mixed_6a'] = net # # 10 x Inception-Resnet-B # net = slim.repeat(net, 1, block17, scale=0.10) # end_points['Mixed_6b'] = net # # # Reduction-B # with tf.variable_scope('Mixed_7a'): # net = reduction_b(net) # end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 1, block8, scale=0.20) end_points['Mixed_8a'] = net net = block8(net, activation_fn=None) end_points['Mixed_8b'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8')### 修改成max pool net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def _network_slim(self, x): with tf.variable_scope('input_images', 'slim_net', reuse=True): batch_shape = x.shape ix = batch_shape[1].value iy = batch_shape[2].value batch_d = tf.slice(x, (0, 0, 0, 0), (1, -1, -1, -1)) batch_s = tf.slice(x, (64, 0, 0, 0), (1, -1, -1, -1)) batch_d = tf.image.resize_images( images=batch_d, size=(10 * iy, 10 * ix), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) batch_s = tf.image.resize_images( images=batch_s, size=(10 * iy, 10 * ix), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) slim.summary.image("batch_d_input", batch_d) slim.summary.image("batch_s_input", batch_s) conv1 = slim.conv2d(inputs=x, num_outputs=96, kernel_size=[3, 3], scope='conv1', reuse=tf.AUTO_REUSE) pool1 = slim.max_pool2d(inputs=conv1, kernel_size=[3, 3], stride=2, scope='pool1') with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE): b1, b2 = tf.split(pool1, 2, 3) b1 = slim.conv2d(b1, 128, [5, 5], scope='b1') # The original implementation has bias terms for all convolution, but # it actually isn't necessary if the convolution layer is followed by a batch # normalization layer since batch norm will subtract the mean. b2 = slim.conv2d(b2, 128, [5, 5], scope='b2') conv2 = tf.concat([b1, b2], 3) pool2 = slim.max_pool2d(inputs=conv2, kernel_size=[3, 3], stride=2, scope='pool2') conv3 = slim.conv2d(inputs=pool2, num_outputs=384, kernel_size=[3, 3], stride=1, scope='conv3', reuse=tf.AUTO_REUSE) with tf.variable_scope('conv4', reuse=tf.AUTO_REUSE): b1, b2 = tf.split(conv3, 2, 3) b1 = slim.conv2d(b1, 192, [3, 3], 1, scope='b1') b2 = slim.conv2d(b2, 192, [3, 3], 1, scope='b2') conv4 = tf.concat([b1, b2], 3) # Conv 5 with only convolution, has bias with tf.variable_scope('conv5', reuse=tf.AUTO_REUSE): with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None): b1, b2 = tf.split(conv4, 2, 3) b1 = slim.conv2d(b1, 128, [3, 3], 1, scope='b1') b2 = slim.conv2d(b2, 128, [3, 3], 1, scope='b2') conv5 = tf.concat([b1, b2], 3) with tf.variable_scope('out_image', 'slim_net', reuse=tf.AUTO_REUSE): batch_shape = conv5.shape ix = batch_shape[1].value iy = batch_shape[2].value image_d = tf.slice(conv5, (0, 0, 0, 0), (1, -1, -1, -1)) image_s = tf.slice(conv5, (64, 0, 0, 0), (1, -1, -1, -1)) image_d = tf.reshape(image_d, (iy, ix, batch_shape[3].value)) image_s = tf.reshape(image_s, (iy, ix, batch_shape[3].value)) ix += 2 iy += 2 image_d = tf.image.resize_image_with_crop_or_pad(image_d, iy, ix) image_s = tf.image.resize_image_with_crop_or_pad(image_s, iy, ix) image_d = tf.reshape(image_d, (iy, ix, 16, 16)) image_s = tf.reshape(image_s, (iy, ix, 16, 16)) image_d = tf.transpose(image_d, (2, 0, 3, 1)) image_s = tf.transpose(image_s, (2, 0, 3, 1)) image_d = tf.reshape(image_d, (1, 16 * iy, 16 * ix, 1)) image_s = tf.reshape(image_s, (1, 16 * iy, 16 * ix, 1)) image_d = tf.image.resize_images( images=image_d, size=(5 * 16 * iy, 5 * 16 * ix), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) image_s = tf.image.resize_images( images=image_s, size=(5 * 16 * iy, 5 * 16 * ix), method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) tf.summary.image("out_d", image_d) tf.summary.image("out_s", image_s) fc1 = slim.conv2d(inputs=conv5, num_outputs=1024, kernel_size=[1, 1], stride=1, scope='fc1', reuse=tf.AUTO_REUSE) dropout = slim.dropout( fc1, scope='dropout', # keep_prob=self.dropout_prob, is_training=self.training) fc2 = slim.conv2d(inputs=dropout, num_outputs=128, kernel_size=[1, 1], stride=1, scope='fc2', reuse=tf.AUTO_REUSE) flatten = slim.flatten(fc2, scope='flatten') return flatten
def create_tensor_graph(self, model_input_params, class_count, algorithm_params): with tf.device(model_input_params.device_id): with slim.arg_scope([slim.conv2d, slim.fully_connected]): level0_filter_count = algorithm_params["filter_count"] data_format = None # 'NHWC' if data_format == 'NCHW': net0 = tf.transpose(model_input_params.x, [0, 3, 1, 2]) # Convert input to NCHW else: net0 = model_input_params.x net0_1x1 = slim.conv2d(net0, level0_filter_count, [1, 1], scope='conv0_1x1', data_format=data_format) net0_3x3 = slim.conv2d(net0, level0_filter_count, [3, 3], scope='conv0_3x3', data_format=data_format) net0_5x5 = slim.conv2d(net0, level0_filter_count, [5, 5], scope='conv0_5x5', data_format=data_format) net0_out = tf.concat(axis=3, values=[net0_1x1, net0_3x3, net0_5x5]) net0_out = tf.nn.local_response_normalization(net0_out) level1_filter_count = level0_filter_count * 3 net11 = slim.conv2d(net0_out, level1_filter_count, [1, 1], scope='conv11', data_format=data_format) net11 = tf.nn.local_response_normalization(net11) net12 = slim.conv2d(net11, level1_filter_count, [1, 1], scope='conv12', data_format=data_format) net13 = slim.conv2d(net12, level1_filter_count, [1, 1], scope='conv13', data_format=data_format) net13 = net13 + net11 level2_filter_count = level1_filter_count net21 = slim.conv2d(net13, level2_filter_count, [1, 1], scope='conv21', data_format=data_format) net22 = slim.conv2d(net21, level2_filter_count, [1, 1], scope='conv22', data_format=data_format) net22 = net22 + net13 level3_filter_count = level2_filter_count net31 = slim.conv2d(net22, level3_filter_count, [1, 1], scope='conv31', data_format=data_format) net31 = slim.dropout( net31, algorithm_params["drop_out_ratio"], is_training=model_input_params.is_training) net32 = slim.conv2d(net31, level3_filter_count, [1, 1], scope='conv32', data_format=data_format) net32 = slim.dropout( net32, algorithm_params["drop_out_ratio"], is_training=model_input_params.is_training) net33 = slim.conv2d(net32, level3_filter_count, [1, 1], scope='conv33', data_format=data_format) net_33 = slim.flatten(net33) net_fc = slim.fully_connected(net_33, class_count, activation_fn=None, scope='fc') return ModelOutputTensors(y_conv=net_fc, image_output=None, image_original=None, histogram_tensors=[])
def _basic_fc_layers(cls_layer_sizes, off_layer_sizes, ang_layer_sizes, input_rois, input_weights, fusion_method, l2_weight_decay, keep_prob, num_final_classes, box_rep, is_training): if l2_weight_decay > 0: weights_regularizer = slim.l2_regularizer(l2_weight_decay) else: weights_regularizer = None if len(input_rois) == 2: # Feature fusion fused_features = avod_fc_layer_utils.feature_fusion(fusion_method, input_rois, input_weights) else: fused_features = input_rois[0] output_names = ['cls', 'off', 'ang'] cls_logits = None offsets = None angles = None with slim.arg_scope( [slim.fully_connected], weights_regularizer=weights_regularizer): for output in output_names: # Flatten fc_drop = slim.flatten(fused_features, scope=output + '_flatten') if output == 'cls': num_layers = len(cls_layer_sizes) for layer_idx in range(num_layers): fc_name_idx = 6 + layer_idx fc_layer = slim.fully_connected( fc_drop, cls_layer_sizes[layer_idx], scope=output + '_fc{}'.format(fc_name_idx)) fc_drop = slim.dropout( fc_layer, keep_prob=keep_prob, is_training=is_training, scope=output + '_fc{}_drop'.format(fc_name_idx)) fc_name_idx += 1 cls_logits = build_output_layers(fc_drop, num_final_classes, box_rep, output) elif output == 'off': num_layers = len(off_layer_sizes) for layer_idx in range(num_layers): fc_name_idx = 6 + layer_idx fc_layer = slim.fully_connected( fc_drop, off_layer_sizes[layer_idx], scope=output + '_fc{}'.format(fc_name_idx)) fc_drop = slim.dropout( fc_layer, keep_prob=keep_prob, is_training=is_training, scope=output + '_fc{}_drop'.format(fc_name_idx)) fc_name_idx += 1 offsets = build_output_layers(fc_drop, num_final_classes, box_rep, output) elif output == 'ang': num_layers = len(ang_layer_sizes) for layer_idx in range(num_layers): fc_name_idx = 6 + layer_idx fc_layer = slim.fully_connected( fc_drop, ang_layer_sizes[layer_idx], scope=output + '_fc{}'.format(fc_name_idx)) fc_drop = slim.dropout( fc_layer, keep_prob=keep_prob, is_training=is_training, scope=output + '_fc{}_drop'.format(fc_name_idx)) fc_name_idx += 1 angles = build_output_layers(fc_drop, num_final_classes, box_rep, output) return cls_logits, offsets, angles
def construct_segellreg_v8(images, is_training): batch_norm_params = { 'is_training': is_training, 'decay': 0.999, 'updates_collections': None, 'center': True, 'scale': True, 'trainable': True } # Normalize the image inputs (map_fn used to do a "per batch" calculation) norm_imgs = tf.map_fn(lambda img: tf.image.per_image_standardization(img), images) kern_size = [5, 5] filter_size = 8 # except for the first layer and two last layers, others layers are freezed. with tf.variable_scope('SegmentEncoder'): with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, padding='SAME', weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): c1 = slim.conv2d(norm_imgs, filter_size, kern_size) p1 = slim.max_pool2d(c1, [2, 2], scope='pool1') #240x240 c2 = slim.conv2d(p1, filter_size * 2, kern_size) p2 = slim.max_pool2d(c2, [2, 2], scope='pool2') #120x120 c3 = slim.conv2d(p2, filter_size * 4, kern_size) p3 = slim.max_pool2d(c3, [2, 2], scope='pool3') #60x60 c4 = slim.conv2d(p3, filter_size * 8, kern_size) p4 = slim.max_pool2d(c4, [2, 2], scope='pool4') # 30x30 c5 = slim.conv2d(p4, filter_size * 16, kern_size) p5 = slim.max_pool2d(c5, [2, 2], scope='pool5') # 15x15 c6 = slim.conv2d(p5, filter_size * 32, kern_size) p6 = slim.max_pool2d(c6, [3, 3], stride=3, scope='pool6') # 5x5 c7 = slim.conv2d(p6, filter_size * 64, kern_size) with tf.variable_scope('SegmentDecoder'): upscale = 2 # Undo the pools once at a time mynet = slim.conv2d_transpose(c7, filter_size * 32, kern_size, stride=[3, 3], activation_fn=None) mynet = tf.add(mynet, c6) mynet = slim.conv2d_transpose(mynet, filter_size * 16, kern_size, stride=[upscale, upscale], activation_fn=None) mynet = tf.add(mynet, c5) mynet = slim.conv2d_transpose(mynet, filter_size * 8, kern_size, stride=[upscale, upscale], activation_fn=None) mynet = tf.add(mynet, c4) mynet = slim.conv2d_transpose(mynet, filter_size * 4, kern_size, stride=[upscale, upscale], activation_fn=None) mynet = tf.add(mynet, c3) mynet = slim.conv2d_transpose(mynet, filter_size * 2, kern_size, stride=[upscale, upscale], activation_fn=None) mynet = tf.add(mynet, c2) mynet = slim.conv2d_transpose(mynet, filter_size, kern_size, stride=[upscale, upscale], activation_fn=None) mynet = tf.add(mynet, c1) seg = slim.conv2d(mynet, 2, [1, 1], scope='seg') with tf.variable_scope('Ellfit'): seg_morph = tf.slice(tf.nn.softmax( seg, -1), [0, 0, 0, 0], [-1, -1, -1, 1]) - tf.slice( tf.nn.softmax(seg, -1), [0, 0, 0, 1], [-1, -1, -1, 1]) # And was kept here to just assist in the ellipse-fit for any unwanted noise filter1 = tf.expand_dims( tf.constant(morph.iterate_structure( morph.generate_binary_structure(2, 1), 4), dtype=tf.float32), -1) seg_morph = tf.nn.dilation2d( tf.nn.erosion2d(seg_morph, filter1, [1, 1, 1, 1], [1, 1, 1, 1], "SAME"), filter1, [1, 1, 1, 1], [1, 1, 1, 1], "SAME") filter2 = tf.expand_dims( tf.constant(morph.iterate_structure( morph.generate_binary_structure(2, 1), 5), dtype=tf.float32), -1) seg_morph = tf.nn.erosion2d( tf.nn.dilation2d(seg_morph, filter2, [1, 1, 1, 1], [1, 1, 1, 1], "SAME"), filter2, [1, 1, 1, 1], [1, 1, 1, 1], "SAME") node_act = tf.constant(0.0, dtype=tf.float32) # Fit the ellipse from the segmentation mask algorithmically ellfit = tf.map_fn(lambda mask: fitEllFromSeg(mask, node_act), seg_morph) with tf.variable_scope('AngleFix'): mynet = slim.conv2d( c7, 128, kern_size, activation_fn=tf.nn.relu, padding='SAME', weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params) mynet = slim.conv2d( mynet, 64, kern_size, activation_fn=tf.nn.relu, padding='SAME', weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params) mynet = slim.flatten(mynet) angle_bins = slim.fully_connected(mynet, 4, activation_fn=None, normalizer_fn=None, normalizer_params=None, scope='angle_bin') angles = tf.add( tf.multiply(tf.slice(ellfit, [0, 4], [-1, 2]), scale[4:5]), means[4:5]) # Extract angles to fix them sin_angles = tf.slice(angles, [0, 0], [-1, 1]) # Unmorph the sin(angles) ang_bins_max = tf.argmax(angle_bins, 1) # Note: This is from 0-3, not 1-4 angles = tf.where(tf.equal(ang_bins_max, 2), -angles, angles) # Bin 3 always wrong angles = tf.where( tf.logical_and(tf.equal(ang_bins_max, 1), tf.squeeze(tf.less(sin_angles, 0.0))), -angles, angles ) # Bin 2 is wrong when sin(ang) < np.sin(np.pi/4.) ... Some bleedover, so < 0.0 angles = tf.where( tf.logical_and(tf.equal(ang_bins_max, 3), tf.squeeze(tf.greater(sin_angles, 0.0))), -angles, angles ) # Bin 4 is wrong when sin(ang) > -np.sin(np.pi/4.) ... Some bleedover, so > 0.0 angles = tf.divide(tf.subtract(angles, means[4:5]), scale[4:5]) original = tf.slice(ellfit, [0, 0], [-1, 4]) ellfit = tf.concat([original, angles], 1) return seg, ellfit, angle_bins
def __init__(self, s_size, a_size, scope, trainer): with tf.variable_scope(scope): self.inputs = tf.placeholder(shape=[None, s_size], dtype=tf.float32) self.imageIn = tf.reshape(self.inputs, shape=[-1, 84, 84, 1]) self.conv1 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.imageIn, num_outputs=16, kernel_size=[8, 8], stride=[4, 4], padding='VALID') self.conv2 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.conv1, num_outputs=32, kernel_size=[4, 4], stride=[2, 2], padding='VALID') hidden = slim.fully_connected(slim.flatten(self.conv2), 256, activation_fn=tf.nn.elu) self.policy = slim.fully_connected( hidden, a_size, activation_fn=tf.nn.softmax, weights_initializer=normalized_columns_initializer(0.01), biases_initializer=None) self.value = slim.fully_connected( hidden, 1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None) if scope != 'global': self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, a_size, dtype=tf.float32) self.target_v = tf.placeholder(shape=[None], dtype=tf.float32) self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) self.responsible_outputs = tf.reduce_sum( self.policy * self.actions_onehot, [1]) self.value_loss = 0.5 * tf.reduce_sum( tf.square(self.target_v - tf.reshape(self.value, [-1]))) self.entropy = -tf.reduce_sum( self.policy * tf.log(self.policy)) self.policy_loss = -tf.reduce_sum( tf.log(self.responsible_outputs) * self.advantages) self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01 local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm( self.gradients, 40.0) global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients( zip(grads, global_vars))
def bulid_tiny_yolo_networks(image, output_size, alpha, keep_prob, is_training): """ 定义前向传播过程 :param image:待输入的样本图片 :param output_size: 网络最终输出向量的大小 :param alpha: leaky_relu函数的参数 :param keep_prob: drop_out层的参数 :param is_training: 区分是否进行训练 :return: 网络最终的输出 """ with tf.variable_scope('tiny_yolo'): with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=leaky_relu(alpha), weights_regularizer=slim.l2_regularizer(0.0005), weights_initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.01)): net = slim.conv2d(image, 16, 3, 2, padding='SAME', scope='conv_1') # 224x224x16 net = slim.max_pool2d(net, 2, 2, padding='SAME', scope='pooling_2') # 112x112x16 net = slim.conv2d(net, 32, 3, 1, padding='SAME', scope='conv_3') # 112x112x32 net = slim.max_pool2d(net, 2, 2, padding='SAME', scope='pooling_4') # 56x56x32 net = slim.conv2d(net, 64, 3, 1, padding='SAME', scope='conv_5') # 56x56x64 net = slim.max_pool2d(net, 2, 2, padding='SAME', scope='pooling_6') # 28x28x64 net = slim.conv2d(net, 128, 3, 1, padding='SAME', scope='conv_7') # 28x28x128 net = slim.max_pool2d(net, 2, 2, padding='SAME', scope='pooling_8') # 14x14x128 net = slim.conv2d(net, 256, 3, 1, padding='SAME', scope='conv_9') # 14x14x256 net = slim.max_pool2d(net, 2, 2, padding='SAME', scope='pooling_10') # 7x7x256 net = slim.conv2d(net, 512, 3, 1, padding='SAME', scope='conv_11') # 7x7x512 net = slim.max_pool2d(net, 2, 1, padding='SAME', scope='pooling_12') # 7x7x512 net = slim.conv2d(net, 1024, 3, 1, padding='SAME', scope='conv_13') # 7x7x1024 # 将上一层输出的张量展平为一维向量 net = slim.flatten(net, scope='flat_14') net = slim.fully_connected(net, 4096, scope='fc_15') # 使用dropout避免过拟合 net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training, scope='dropout_16') # 最后一层全连接层输出最后的结果[cell_size*cell_size*(5*box_per_cell+class_num)] net = slim.fully_connected(net, output_size, activation_fn=None, scope='fc_17') return net
def network(inputs): #def alexnet_v2(inputs, num_classes=1000,is_training=True,dropout_keep_prob=0.5,spatial_squeeze=True,scope='alexnet_v2'): num_classes = 13 is_training = True dropout_keep_prob = 0.5 spatial_squeeze = True scope = 'alexnet_v2' with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=[end_points_collection]): net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') net = slim.max_pool2d(net, [3, 3], 2, scope='pool1') net = slim.conv2d(net, 192, [5, 5], scope='conv2') net = slim.max_pool2d(net, [3, 3], 2, scope='pool2') net = slim.conv2d(net, 384, [3, 3], scope='conv3') net = slim.conv2d(net, 384, [3, 3], scope='conv4') net = slim.conv2d(net, 256, [3, 3], scope='conv5') net = slim.max_pool2d(net, [3, 3], 2, scope='pool5') # Use conv2d instead of fully_connected layers. with slim.arg_scope( [slim.conv2d], weights_initializer=trunc_normal(0.005), biases_initializer=tf.constant_initializer(0.1)): net = slim.conv2d(net, 4096, [5, 5], padding='VALID', scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, biases_initializer=tf.zeros_initializer, scope='fc8') net = slim.flatten(net) net = slim.fully_connected(net, 64, scope='fc') net = slim.fully_connected(net, 13, activation_fn=None, scope='output') return net #, end_points # Convert end_points_collection into a end_point dict. #end_points = slim.utils.convert_collection_to_dict(end_points_collection) #if spatial_squeeze: # net = tf.squeeze(net, [], name='fc8/squeezed') ### doesn't like # end_points[sc.name + '/fc8'] = net net = slim.flatten(net) net = slim.fully_connected(net, 64, scope='fc') net = slim.fully_connected(net, 13, activation_fn=None, scope='output') return net #, end_points
def dfb(input_images, keep_prob, is_training=True, weight_decay=5e-5, batch_norm_decay=0.99, batch_norm_epsilon=0.001): with tf.variable_scope("Teacher_model"): net, endpoints = resnet_v2(inputs=input_images, num_classes=M, is_training=True, scope='resnet_v2') base_var_list = slim.get_model_variables('Teacher_model/resnet_v2') part_feature = endpoints["InvertedResidual_{}_{}".format(1024, 3)] object_feature = endpoints["InvertedResidual_{}_{}".format(1024, 5)] object_feature_h = object_feature.get_shape().as_list()[1] object_feature_w = object_feature.get_shape().as_list()[2] fc_obj = slim.max_pool2d(object_feature, (object_feature_h, object_feature_w), scope="GMP1") batch_norm_params = { 'center': True, 'scale': True, 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, } fc_obj = slim.conv2d(fc_obj, M, [1, 1], activation_fn=None, weights_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), biases_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), scope='fc_obj') fc_obj = tf.nn.dropout(fc_obj, keep_prob=keep_prob) fc_obj = slim.flatten(fc_obj) fc_part = slim.conv2d(part_feature, M * k, #卷积核个数 [1, 1], #卷积核高宽 activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, # 标准化器设置为BN normalizer_params=batch_norm_params, weights_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), biases_regularizer=tf.contrib.layers.l2_regularizer(weight_decay) ) fc_part_h = fc_part.get_shape().as_list()[1] fc_part_w = fc_part.get_shape().as_list()[2] fc_part = slim.max_pool2d(fc_part, (fc_part_h, fc_part_w), scope="GMP2") ft_list = tf.split(fc_part, num_or_size_splits=M, axis=-1) #最后一维度(C) cls_list = [] for i in range(M): ft = tf.transpose(ft_list[i], [0, 1, 3, 2]) cls = layers_lib.pool(ft, [1, k], "AVG") cls = layers.flatten(cls) cls_list.append(cls) fc_ccp = tf.concat(cls_list, axis=-1) #cross_channel_pooling (N, M) fc_part = slim.conv2d(fc_part, M, [1, 1], activation_fn=None, weights_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), biases_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), scope="fc_part") fc_part = tf.nn.dropout(fc_part, keep_prob=keep_prob) fc_part = slim.flatten(fc_part) t_var_list = slim.get_model_variables() return fc_obj, fc_part, fc_ccp, base_var_list, t_var_list
def implicit_quantile_network(num_actions, quantile_embedding_dim, network_type, state, num_quantiles): """The Implicit Quantile ConvNet. Args: num_actions: int, number of actions. quantile_embedding_dim: int, embedding dimension for the quantile input. network_type: namedtuple, collection of expected values to return. state: `tf.Tensor`, contains the agent's current state. num_quantiles: int, number of quantile inputs. Returns: net: _network_type object containing the tensors output by the network. """ weights_initializer = contrib_slim.variance_scaling_initializer( factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True) state_net = tf.cast(state, tf.float32) state_net = tf.div(state_net, 255.) state_net = contrib_slim.conv2d(state_net, 32, [8, 8], stride=4, weights_initializer=weights_initializer) state_net = contrib_slim.conv2d(state_net, 64, [4, 4], stride=2, weights_initializer=weights_initializer) state_net = contrib_slim.conv2d(state_net, 64, [3, 3], stride=1, weights_initializer=weights_initializer) state_net = contrib_slim.flatten(state_net) state_net_size = state_net.get_shape().as_list()[-1] state_net_tiled = tf.tile(state_net, [num_quantiles, 1]) batch_size = state_net.get_shape().as_list()[0] quantiles_shape = [num_quantiles * batch_size, 1] quantiles = tf.random_uniform(quantiles_shape, minval=0, maxval=1, dtype=tf.float32) quantile_net = tf.tile(quantiles, [1, quantile_embedding_dim]) pi = tf.constant(math.pi) quantile_net = tf.cast(tf.range(1, quantile_embedding_dim + 1, 1), tf.float32) * pi * quantile_net quantile_net = tf.cos(quantile_net) quantile_net = contrib_slim.fully_connected( quantile_net, state_net_size, weights_initializer=weights_initializer) # Hadamard product. net = tf.multiply(state_net_tiled, quantile_net) net = contrib_slim.fully_connected(net, 512, weights_initializer=weights_initializer) quantile_values = contrib_slim.fully_connected( net, num_actions, activation_fn=None, weights_initializer=weights_initializer) return network_type(quantile_values=quantile_values, quantiles=quantiles)
def inception_resnet_v2(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV2'): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') end_points['MaxPool_5a_3x3'] = net # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) end_points['Mixed_5b'] = net net = slim.repeat(net, 10, block35, scale=0.17) # 17 x 17 x 1024 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) end_points['Mixed_6a'] = net net = slim.repeat(net, 20, block17, scale=0.10) with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) end_points['Mixed_7a'] = net net = slim.repeat(net, 9, block8, scale=0.20) net = block8(net, activation_fn=None) net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') end_points['Conv2d_7b_1x1'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def build_network(self, sess, is_training=True): with tf.variable_scope('vgg_16', 'vgg_16'): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer( mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') self._act_summaries.append(net) self._layers['head'] = net # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer( rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer( rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer( rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net, rois, "pool5") else: raise NotImplementedError pool5_flat = slim.flatten(pool5, scope='flatten') fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def __init__(self, h_size, env, LEARNING_RATE, n_step): # The network recieves a frame from the game, flattened into an array. # It then resizes it and processes it through four convolutional layers. WINDOW_SIZE = env.win_size CONV_FILTER_SIZE_X = [9,6,3] CONV_FILTER_SIZE_Y = [9,6,3] CONV_STRIDE_X = [3,3,1] CONV_STRIDE_Y = [3,3,1] CONV_LAYER_NUM = 4 CONV_FILTER_NUM = [32,64,64] IMAGE_SIZE = [3*(WINDOW_SIZE+2),12,3] LAST_CONV_FILTER = [np.ceil((WINDOW_SIZE+2)/3),2] self.scalarInput = tf.placeholder(shape=[None, IMAGE_SIZE[0]*IMAGE_SIZE[1]*IMAGE_SIZE[2]], dtype=tf.float32) self.imageIn = tf.reshape(self.scalarInput, shape=[-1, IMAGE_SIZE[0], IMAGE_SIZE[1], IMAGE_SIZE[2]]) self.conv1 = slim.conv2d( \ inputs=self.imageIn, num_outputs=CONV_FILTER_NUM[0], kernel_size=[CONV_FILTER_SIZE_X[0], CONV_FILTER_SIZE_Y[0]], stride=[CONV_STRIDE_X[0], CONV_STRIDE_Y[0]], padding='SAME', biases_initializer=None) print(np.shape(self.conv1)) self.conv2 = slim.conv2d( \ inputs=self.conv1, num_outputs=CONV_FILTER_NUM[1], kernel_size=[CONV_FILTER_SIZE_X[1], CONV_FILTER_SIZE_Y[1]], stride=[CONV_STRIDE_X[1], CONV_STRIDE_Y[1]], padding='SAME', biases_initializer=None) print(np.shape(self.conv2)) self.conv3 = slim.conv2d( \ inputs=self.conv2, num_outputs=CONV_FILTER_NUM[2], kernel_size=[CONV_FILTER_SIZE_X[2], CONV_FILTER_SIZE_Y[2]], stride=[CONV_STRIDE_X[2], CONV_STRIDE_Y[2]], padding='SAME', biases_initializer=None) print(np.shape(self.conv3)) self.conv4 = slim.conv2d( \ inputs=self.conv3, num_outputs=h_size, kernel_size=[LAST_CONV_FILTER[0], LAST_CONV_FILTER[1]], stride=[1,1], padding='VALID', biases_initializer=None) #depthwise_filter4 = tf.get_variable(shape=(LAST_CONV_FILTER[0], LAST_CONV_FILTER[1], CONV_FILTER print(np.shape(self.conv4)) # We take the output from the final convolutional layer and split it into separate advantage and value streams. self.streamAC, self.streamVC = tf.split(self.conv4, 2, 3) self.streamA = slim.flatten(self.streamAC) self.streamV = slim.flatten(self.streamVC) xavier_init = tf.contrib.layers.xavier_initializer() self.AW = tf.Variable(xavier_init([h_size // 2, env.actions])) self.VW = tf.Variable(xavier_init([h_size // 2, 1])) print(self.conv4) print(self.streamA) print(self.AW) self.Advantage = tf.matmul(self.streamA, self.AW) self.Value = tf.matmul(self.streamV, self.VW) # Then combine them together to get our final Q-values. self.Qout = self.Value + tf.subtract(self.Advantage, tf.reduce_mean(self.Advantage, axis=1, keep_dims=True)) self.predict = tf.argmax(self.Qout, 1) # Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values. self.targetQ = tf.placeholder(shape=[None], dtype=tf.float32) self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, env.actions, dtype=tf.float32) self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis=1) self.td_error = tf.square(self.targetQ - self.Q) self.loss = tf.reduce_mean(self.td_error) self.trainer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE) self.updateModel = self.trainer.minimize(self.loss)
def content_extractor(self, images, reuse=False): # images: (batch, 32, 32, 3) or (batch, 32, 32, 1) if images.get_shape()[3] == 1: # For mnist dataset, replicate the gray scale image 3 times. images = tf.image.grayscale_to_rgb(images) with tf.variable_scope('content_extractor', reuse=reuse): with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=None, weights_initializer=tf.contrib.layers. xavier_initializer()): with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, activation_fn=tf.nn.relu, is_training=(self.mode == 'train' or self.mode == 'pretrain')): net = slim.conv2d( images, 64, [3, 3], stride=1, scope='conv1_1') # (batch_size, 32, 32, 64) net = slim.batch_norm(net, scope='bn1_1') net = slim.conv2d( net, 64, [3, 3], stride=2, scope='conv1_2') # (batch_size, 16, 16, 64) net = slim.batch_norm(net, scope='bn1_2') net = slim.conv2d( net, 128, [3, 3], stride=1, scope='conv2_1') # (batch_size, 16, 16, 128) net = slim.batch_norm(net, scope='bn2_1') net = slim.conv2d( net, 128, [3, 3], stride=2, scope='conv2_2') # (batch_size, 8, 8, 128) net = slim.batch_norm(net, scope='bn2_2') net = slim.conv2d( net, 256, [3, 3], stride=1, scope='conv3_1') # (batch_size, 8, 8, 256) net = slim.batch_norm(net, scope='bn3_1') net = slim.conv2d( net, 256, [3, 3], stride=2, scope='conv3_2') # (batch_size, 4, 4, 256) net = slim.batch_norm(net, scope='bn3_2') net = slim.conv2d( net, 512, [3, 3], stride=1, scope='conv4_1') # (batch_size, 4, 4, 512) net = slim.batch_norm(net, scope='bn4_1') net = slim.conv2d( net, 512, [4, 4], stride=2, padding='VALID', scope='conv4_2') # (batch_size, 1, 1, 512) net = slim.batch_norm(net, activation_fn=tf.nn.tanh, scope='bn4_2') if self.mode == 'pretrain': net = slim.conv2d(net, self.num_classes, [1, 1], padding='VALID', scope='out') net = slim.flatten(net) return net
def res_3d_net(self, x, bn_func): params = { 'padding': 'SAME', 'activation_fn': tf.nn.relu, 'normalizer_fn': bn_func } x = tf.reshape(x, (self.batch_size, self.time_step, 28, 28, 96)) with slim.arg_scope([slim.conv3d], **params): with tf.variable_scope('res3a'): res3a_2n = slim.conv3d(x, 128, kernel_size=[3, 3, 3], stride=[1, 1, 1], scope='conv3d1') with tf.variable_scope('res3b_1'): res3b_1 = slim.conv3d(res3a_2n, 128, kernel_size=[3, 3, 3], stride=[1, 1, 1], scope='conv3d2') with tf.variable_scope('res3b_2'): res3b_2 = slim.conv3d(res3b_1, 128, kernel_size=[3, 3, 3], stride=[1, 1, 1], activation_fn = None, normalizer_fn = None, scope='conv3d1') with tf.variable_scope('res3b'): res3b = tf.add(res3a_2n, res3b_2) res3b_bn = bn_func(res3b) res3b_relu = tf.nn.relu(res3b_bn) with tf.variable_scope('res4a_down'): res4a_down = slim.conv3d(res3b_relu, 256, kernel_size=[3, 3, 3], stride=[2, 2, 2], activation_fn = None, normalizer_fn = None, scope='conv3d1') with tf.variable_scope('res4a_1'): res4a1 = slim.conv3d(res3b_relu, 256, kernel_size=[3, 3, 3], stride=[2, 2, 2], scope='conv3d1') with tf.variable_scope('res4a_2'): res4a2 = slim.conv3d(res4a1, 256, kernel_size=[3, 3, 3], stride=[1, 1, 1], activation_fn = None, normalizer_fn = None, scope='conv3d2') with tf.variable_scope('res4a'): res4a = tf.add(res4a_down, res4a2) res4a_bn = bn_func(res4a) res4a_relu = tf.nn.relu(res4a_bn) with tf.variable_scope('res4b_1'): res4b1 = slim.conv3d(res4a_relu, 256, kernel_size=[3, 3, 3], stride=[1, 1, 1], scope='conv3d1') with tf.variable_scope('res4b_2'): res4b2 = slim.conv3d(res4b1, 256, kernel_size=[3, 3, 3], stride=[1, 1, 1], activation_fn = None, normalizer_fn = None, scope='conv3d2') with tf.variable_scope('res4b'): res4b = tf.add(res4a, res4b2) res4b_bn = bn_func(res4b) res4b_relu = tf.nn.relu(res4b_bn) with tf.variable_scope('res5a_down'): res5a_down = slim.conv3d(res4b_relu, 512, kernel_size=[3, 3, 3], stride=[2, 2, 2], activation_fn = None, normalizer_fn = None, scope='conv3d1') with tf.variable_scope('res5a_1'): res5a1 = slim.conv3d(res4b_relu, 512, kernel_size=[3, 3, 3], stride=[2, 2, 2], scope='conv3d1') with tf.variable_scope('res5a_2'): res5a2 = slim.conv3d(res5a1, 512, kernel_size=[3, 3, 3], stride=[1, 1, 1], activation_fn = None, normalizer_fn = None, scope='conv3d2') with tf.variable_scope('res5a'): res5a = tf.add(res5a_down, res5a2) res5a_bn = bn_func(res5a) res5a_relu = tf.nn.relu(res5a_bn) with tf.variable_scope('res5b_1'): res5b1 = slim.conv3d(res5a_relu, 512, kernel_size=[3, 3, 3], stride=[1, 1, 1], scope='conv3d1') with tf.variable_scope('res5b_2'): res5b2 = slim.conv3d(res5b1, 512, kernel_size=[3, 3, 3], stride=[1, 1, 1], activation_fn = None, normalizer_fn = None, scope='conv3d2') with tf.variable_scope('res5b'): res5b = tf.add(res5a, res5b2) res5b_bn = bn_func(res5b) res5b_relu = tf.nn.relu(res5b_bn) with tf.variable_scope('global_avg'): logits = slim.avg_pool3d(res5b_relu, kernel_size = [4,7,7], stride = [1,1,1]) with tf.variable_scope('res_logits'): logits = slim.flatten(logits) logits = tf.nn.dropout(logits, self.dropout) return logits
def build_heads(pyramid, py_scope, slim_scope, image_height, image_width, num_classes, base_anchors, is_training=False, gt_boxes=None): """Build the 3-way outputs, i.e., class, box and mask in the pyramid Algo ---- For each layer: 1. Build anchor layer 2. Process the results of anchor layer, decode the output into rois 3. Sample rois 4. Build roi layer 5. Process the results of roi layer, decode the output into boxes 6. Build the mask layer 7. Build losses """ outputs = {} # if _BN is True: # if is_training is True: # arg_scope = _extra_conv_arg_scope_with_bn() # else: # arg_scope = _extra_conv_arg_scope_with_bn(batch_norm_decay=0.0) # # arg_scope = _extra_conv_arg_scope_with_bn(is_training=is_training) # else: # arg_scope = _extra_conv_arg_scope(activation_fn=tf.nn.relu) with tf.name_scope(py_scope) as py_scope: with slim.arg_scope(slim_scope) as slim_scope: ### for p in pyramid outputs['rpn'] = {} for i in range(5, 1, -1): p = 'P%d' % i stride = 2**i """Build RPN head RPN takes features from each layer of pyramid network. strides are respectively set to [4, 8, 16, 32] for pyramid feature layer P2,P3,P4,P5 anchor_scales are set to [2 **(i-2), 2 ** (i-1), 2 **(i)] in all pyramid layers (*This is probably inconsistent with original paper where the only scale is 8) It generates 2 outputs. box: an array of shape (1, pyramid_height, pyramid_width, num_anchorx4). box regression values [shift_x, shift_y, scale_width, scale_height] are stored in the last dimension of the array. cls: an array of shape (1, pyramid_height, pyramid_width, num_anchorx2). Note that this value is before softmax """ shape = tf.shape(pyramid[p]) height, width = shape[1], shape[2] rpn = slim.conv2d(pyramid[p], 256, [3, 3], stride=1, activation_fn=tf.nn.relu, scope='pyramid/%s/rpn' % p) box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='pyramid/%s/rpn/box' % p, \ weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=None, normalizer_fn=None) cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='pyramid/%s/rpn/cls' % p, \ weights_initializer=tf.truncated_normal_initializer(stddev=0.01), activation_fn=None, normalizer_fn=None) anchor_scales = [8] #[2 **(i-2), 2 ** (i-1), 2 **(i)] print("anchor_scales = ", anchor_scales) all_anchors = gen_all_anchors(height, width, stride, anchor_scales) outputs['rpn'][p] = { 'box': box, 'cls': cls, 'anchor': all_anchors, 'shape': shape } ### gather boxes, clses, anchors from all pyramid layers rpn_boxes = [ tf.reshape(outputs['rpn']['P%d' % p]['box'], [-1, 4]) for p in range(5, 1, -1) ] rpn_clses = [ tf.reshape(outputs['rpn']['P%d' % p]['cls'], [-1, 1]) for p in range(5, 1, -1) ] rpn_anchors = [ tf.reshape(outputs['rpn']['P%d' % p]['anchor'], [-1, 4]) for p in range(5, 1, -1) ] rpn_boxes = tf.concat(values=rpn_boxes, axis=0) rpn_clses = tf.concat(values=rpn_clses, axis=0) rpn_anchors = tf.concat(values=rpn_anchors, axis=0) ### softmax to get probability rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2])) ### decode anchors and box regression values into proposed bounding boxes rpn_final_boxes, rpn_final_clses, rpn_final_scores = anchor_decoder( rpn_boxes, rpn_probs, rpn_anchors, image_height, image_width) outputs['rpn_boxes'] = rpn_boxes outputs['rpn_clses'] = rpn_clses outputs['rpn_anchor'] = rpn_anchors outputs['rpn_final_boxes'] = rpn_final_boxes outputs['rpn_final_clses'] = rpn_final_clses outputs['rpn_final_scores'] = rpn_final_scores if is_training is True: ### for training, rcnn and maskrcnn take rpn proposed bounding boxes as inputs rpn_rois_to_rcnn, rpn_scores_to_rcnn, rpn_batch_inds_to_rcnn, rpn_rois_to_mask, rpn_scores_to_mask, rpn_batch_inds_to_mask = \ sample_rpn_outputs_with_gt(rpn_final_boxes, rpn_final_scores, gt_boxes, is_training=is_training, only_positive=False)#True else: ### for testing, only rcnn takes rpn boxes as inputs. maskrcnn takes rcnn boxes as inputs rpn_rois_to_rcnn, rpn_scores_to_rcnn, rpn_batch_inds_to_rcnn = sample_rpn_outputs( rpn_final_boxes, rpn_final_scores, only_positive=False) ### assign pyramid layer indexs to rcnn network's ROIs. [rcnn_assigned_rois, rcnn_assigned_batch_inds, rcnn_assigned_layer_inds] = \ assign_boxes(rpn_rois_to_rcnn, [rpn_rois_to_rcnn, rpn_batch_inds_to_rcnn], [2, 3, 4, 5]) ### crop features from pyramid using ROIs. Note that this will change order of the ROIs, so ROIs are also reordered. rcnn_cropped_features = [] rcnn_ordered_rois = [] for i in range(5, 1, -1): p = 'P%d' % i rcnn_splitted_roi = rcnn_assigned_rois[i - 2] rcnn_batch_ind = rcnn_assigned_batch_inds[i - 2] rcnn_cropped_feature, rcnn_rois_to_crop_and_resize = ROIAlign( pyramid[p], rcnn_splitted_roi, rcnn_batch_ind, image_height, image_width, stride=2**i, pooled_height=14, pooled_width=14) rcnn_cropped_features.append(rcnn_cropped_feature) rcnn_ordered_rois.append(rcnn_splitted_roi) rcnn_cropped_features = tf.concat(values=rcnn_cropped_features, axis=0) rcnn_ordered_rois = tf.concat(values=rcnn_ordered_rois, axis=0) """Build rcnn head rcnn takes cropped features and generates 2 outputs. rcnn_boxes: an array of shape (num_ROIs, num_classes x 4). Box regression values of each classes [shift_x, shift_y, scale_width, scale_height] are stored in the last dimension of the array. rcnn_clses: an array of shape (num_ROIs, num_classes). Class prediction values (before softmax) are stored """ rcnn = slim.max_pool2d(rcnn_cropped_features, [3, 3], stride=2, padding='SAME') rcnn = slim.flatten(rcnn) rcnn = slim.fully_connected( rcnn, 1024, activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( stddev=0.001), scope="pyramid/fully_connected") rcnn = slim.dropout(rcnn, keep_prob=0.75, is_training=is_training) #is_training rcnn = slim.fully_connected( rcnn, 1024, activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( stddev=0.001), scope="pyramid/fully_connected_1") rcnn = slim.dropout(rcnn, keep_prob=0.75, is_training=is_training) #is_training rcnn_clses = slim.fully_connected( rcnn, num_classes, activation_fn=None, normalizer_fn=None, weights_initializer=tf.truncated_normal_initializer( stddev=0.001), scope="pyramid/fully_connected_2") rcnn_boxes = slim.fully_connected( rcnn, num_classes * 4, activation_fn=None, normalizer_fn=None, weights_initializer=tf.truncated_normal_initializer( stddev=0.001), scope="pyramid/fully_connected_3") ### softmax to get probability rcnn_scores = tf.nn.softmax(rcnn_clses) ### decode ROIs and box regression values into bounding boxes rcnn_final_boxes, rcnn_final_classes, rcnn_final_scores = roi_decoder( rcnn_boxes, rcnn_scores, rcnn_ordered_rois, image_height, image_width) outputs['rcnn_ordered_rois'] = rcnn_ordered_rois outputs['rcnn_cropped_features'] = rcnn_cropped_features tf.add_to_collection('__CROPPED__', rcnn_cropped_features) outputs['rcnn_boxes'] = rcnn_boxes outputs['rcnn_clses'] = rcnn_clses outputs['rcnn_scores'] = rcnn_scores outputs['rcnn_final_boxes'] = rcnn_final_boxes outputs['rcnn_final_clses'] = rcnn_final_classes outputs['rcnn_final_scores'] = rcnn_final_scores if is_training: ### assign pyramid layer indexs to mask network's ROIs [mask_assigned_rois, mask_assigned_batch_inds, mask_assigned_layer_inds] = \ assign_boxes(rpn_rois_to_mask, [rpn_rois_to_mask, rpn_batch_inds_to_mask], [2, 3, 4, 5]) ### crop features from pyramid using ROIs. Again, this will change order of the ROIs, so ROIs are reordered. mask_cropped_features = [] mask_ordered_rois = [] ### crop features from pyramid for mask network for i in range(5, 1, -1): p = 'P%d' % i mask_splitted_roi = mask_assigned_rois[i - 2] mask_batch_ind = mask_assigned_batch_inds[i - 2] mask_cropped_feature, mask_rois_to_crop_and_resize = ROIAlign( pyramid[p], mask_splitted_roi, mask_batch_ind, image_height, image_width, stride=2**i, pooled_height=14, pooled_width=14) mask_cropped_features.append(mask_cropped_feature) mask_ordered_rois.append(mask_splitted_roi) mask_cropped_features = tf.concat(values=mask_cropped_features, axis=0) mask_ordered_rois = tf.concat(values=mask_ordered_rois, axis=0) else: ### for testing, mask network takes rcnn boxes as inputs rcnn_rois_to_mask, rcnn_clses_to_mask, rcnn_scores_to_mask, rcnn_batch_inds_to_mask = sample_rcnn_outputs( rcnn_final_boxes, rcnn_final_classes, rcnn_scores, class_agnostic=False) [mask_assigned_rois, mask_assigned_clses, mask_assigned_scores, mask_assigned_batch_inds, mask_assigned_layer_inds] =\ assign_boxes(rcnn_rois_to_mask, [rcnn_rois_to_mask, rcnn_clses_to_mask, rcnn_scores_to_mask, rcnn_batch_inds_to_mask], [2, 3, 4, 5]) mask_cropped_features = [] mask_ordered_rois = [] mask_ordered_clses = [] mask_ordered_scores = [] for i in range(5, 1, -1): p = 'P%d' % i mask_splitted_roi = mask_assigned_rois[i - 2] mask_splitted_cls = mask_assigned_clses[i - 2] mask_splitted_score = mask_assigned_scores[i - 2] mask_batch_ind = mask_assigned_batch_inds[i - 2] mask_cropped_feature, mask_rois_to_crop_and_resize = ROIAlign( pyramid[p], mask_splitted_roi, mask_batch_ind, image_height, image_width, stride=2**i, pooled_height=14, pooled_width=14) mask_cropped_features.append(mask_cropped_feature) mask_ordered_rois.append(mask_splitted_roi) mask_ordered_clses.append(mask_splitted_cls) mask_ordered_scores.append(mask_splitted_score) mask_cropped_features = tf.concat(values=mask_cropped_features, axis=0) mask_ordered_rois = tf.concat(values=mask_ordered_rois, axis=0) mask_ordered_clses = tf.concat(values=mask_ordered_clses, axis=0) mask_ordered_scores = tf.concat(values=mask_ordered_scores, axis=0) outputs['mask_final_clses'] = mask_ordered_clses outputs['mask_final_scores'] = mask_ordered_scores """Build mask rcnn head mask rcnn takes cropped features and generates masks for each classes. m: an array of shape (28, 28, num_classes). Note that this value is before sigmoid. """ m = mask_cropped_features m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu, scope="pyramid/Conv") m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu, scope="pyramid/Conv_1") m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu, scope="pyramid/Conv_2") m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu, scope="pyramid/Conv_3") m = slim.conv2d_transpose(m, 256, 2, stride=2, padding='VALID', activation_fn=tf.nn.relu, scope="pyramid/Conv2d_transpose") tf.add_to_collection('__TRANSPOSED__', m) m = slim.conv2d(m, num_classes, [1, 1], stride=1, padding='VALID', activation_fn=None, normalizer_fn=None, scope="pyramid/Conv_4") outputs['mask_ordered_rois'] = mask_ordered_rois outputs['mask_cropped_features'] = mask_cropped_features outputs['mask_mask'] = m outputs['mask_final_mask'] = tf.nn.sigmoid(m) return outputs, py_scope, slim_scope
def discriminator(bottom, cat_list, conts, reuse=False): initializer = tf.truncated_normal_initializer(stddev=0.02) dis1 = slim.convolution2d(bottom, 32, [3, 3], padding="SAME", biases_initializer=None, activation_fn=lrelu, reuse=reuse, scope='d_conv1', weights_initializer=initializer) dis1 = tf.space_to_depth(dis1, 2) dis2 = slim.convolution2d(dis1, 64, [3, 3], padding="SAME", normalizer_fn=slim.batch_norm, activation_fn=lrelu, reuse=reuse, scope='d_conv2', weights_initializer=initializer) dis2 = tf.space_to_depth(dis2, 2) dis3 = slim.convolution2d(dis2, 128, [3, 3], padding="SAME", normalizer_fn=slim.batch_norm, activation_fn=lrelu, reuse=reuse, scope='d_conv3', weights_initializer=initializer) dis3 = tf.space_to_depth(dis3, 2) dis4 = slim.fully_connected(slim.flatten(dis3), 1024, activation_fn=lrelu, reuse=reuse, scope='d_fc1', weights_initializer=initializer) d_out = slim.fully_connected(dis4, 1, activation_fn=tf.nn.sigmoid, reuse=reuse, scope='d_out', weights_initializer=initializer) q_a = slim.fully_connected(dis4, 128, normalizer_fn=slim.batch_norm, reuse=reuse, scope='q_fc1', weights_initializer=initializer) # Here we define the unique layers used for the q-network. The number of outputs depends on the number of # latent variables we choose to define. q_cat_outs = [] for idx, var in enumerate(cat_list): q_outA = slim.fully_connected(q_a, var, activation_fn=tf.nn.softmax, reuse=reuse, scope='q_out_cat_' + str(idx), weights_initializer=initializer) q_cat_outs.append(q_outA) q_cont_outs = None if conts > 0: q_cont_outs = slim.fully_connected(q_a, conts, activation_fn=tf.nn.tanh, reuse=reuse, scope='q_out_cont_' + str(conts), weights_initializer=initializer) return d_out, q_cat_outs, q_cont_outs