def _create_transformation(self, input, n_output, reuse, scope_prefix): """Create the deterministic transformation between stochastic layers. If self.hparam.nonlinear: 2 x tanh layers Else: 1 x linear layer """ if self.hparams.nonlinear: h = slim.fully_connected(input, self.hparams.n_hidden, reuse=reuse, activation_fn=tf.nn.tanh, scope='%s_nonlinear_1' % scope_prefix) h = slim.fully_connected(h, self.hparams.n_hidden, reuse=reuse, activation_fn=tf.nn.tanh, scope='%s_nonlinear_2' % scope_prefix) h = slim.fully_connected(h, n_output, reuse=reuse, activation_fn=None, scope='%s' % scope_prefix) else: h = slim.fully_connected(input, n_output, reuse=reuse, activation_fn=None, scope='%s' % scope_prefix) return h
def __init__(self): # policy network self.observations = tf.placeholder(tf.float32, [None, 4], name='input_x') self.input_y = tf.placeholder(tf.float32, [None, 1], name='input_y') self.reward = tf.placeholder(tf.float32, name='reward_signal') l1 = slim.fully_connected(self.observations, hidden, biases_initializer=None, activation_fn=tf.nn.relu) self.score = slim.fully_connected(l1, 1, biases_initializer=None) self.probability = tf.nn.sigmoid(self.score) loglike = tf.log(self.input_y * (self.input_y - self.probability) + (1 - self.input_y) * (self.input_y + self.probability)) loss = -tf.reduce_mean(loglike * self.reward) self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) self.w1grad = tf.placeholder(tf.float32, name='batch_grad1') self.w2grad = tf.placeholder(tf.float32, name='batch_grad2') batch_grad = [self.w1grad, self.w2grad] self.tvars = tf.trainable_variables() self.newgrads = tf.gradients(loss, self.tvars) self.update = self.optimizer.apply_gradients(zip(batch_grad, self.tvars))
def fprop(self, x, **kwargs): del kwargs with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE): net = slim.fully_connected(x, 60) logits = slim.fully_connected(net, 10, activation_fn=None) return {self.O_LOGITS: logits, self.O_PROBS: tf.nn.softmax(logits)}
def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('encoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = images net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.flatten(net) fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2
def discriminative_network(x): """Outputs probability in logits.""" h0 = slim.fully_connected(x, H * 2, activation_fn=tf.tanh) h1 = slim.fully_connected(h0, H * 2, activation_fn=tf.tanh) h2 = slim.fully_connected(h1, H * 2, activation_fn=tf.tanh) h3 = slim.fully_connected(h2, 1, activation_fn=None) return h3
def _build_graph(self): normalized_input = tf.div(self._input, 255.0) #d = tf.divide(1.0, tf.sqrt(8. * 8. * 4.)) conv1 = slim.conv2d(normalized_input, 16, [8, 8], activation_fn=tf.nn.relu, padding='VALID', stride=4, biases_initializer=None) # weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) #d = tf.divide(1.0, tf.sqrt(4. * 4. * 16.)) conv2 = slim.conv2d(conv1, 32, [4, 4], activation_fn=tf.nn.relu, padding='VALID', stride=2, biases_initializer=None) #weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) flattened = slim.flatten(conv2) #d = tf.divide(1.0, tf.sqrt(2592.)) fc1 = slim.fully_connected(flattened, 256, activation_fn=tf.nn.relu, biases_initializer=None) #weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) #d = tf.divide(1.0, tf.sqrt(256.)) # estimate of the value function self.value_func_prediction = slim.fully_connected(fc1, 1, activation_fn=None, biases_initializer=None) #weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) # softmax output with one entry per action representing the probability of taking an action self.policy_predictions = slim.fully_connected(fc1, self.output_size, activation_fn=tf.nn.softmax, biases_initializer=None)
def localization_VGG16(self,inputs): with tf.variable_scope('localization_network'): with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn = tf.nn.relu, weights_initializer = tf.constant_initializer(0.0)): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') shape = int(np.prod(net.get_shape()[1:])) net = slim.fully_connected(tf.reshape(net, [-1, shape]), 4096, scope='fc6') net = slim.fully_connected(net, 1024, scope='fc7') identity = np.array([[1., 0., 0.], [0., 1., 0.]]) identity = identity.flatten() net = slim.fully_connected(net, 6, biases_initializer = tf.constant_initializer(identity) , scope='fc8') return net
def __init__(self, actions, td_discount_rate = 0.99, learningRate= 0.0001, epsilonGreedy = 0.1): self.learningRate = learningRate self.td_discount_rate = td_discount_rate self.epsilonGreedy = epsilonGreedy self.input = tf.placeholder('float', shape=[None,4]) x1 = slim.fully_connected(self.input, 32, scope='fc/fc_1') x1 = tf.nn.relu(x1) self.Qout = slim.fully_connected(x1, actions) self.predict = tf.argmax(self.Qout,1) self.logQVal = tf.summary.scalar('QVal', tf.reduce_mean(self.predict) ) # get the best action q values self.newQout = tf.placeholder(shape=[None,2],dtype=tf.float32) self.epsilonInput = tf.placeholder(dtype=tf.float32, name="epsilonInput") self.newstateReward = tf.placeholder(shape=[None],dtype=tf.float32) self.tdTarget = self.newstateReward + td_discount_rate * np.amax(self.newQout) self.td_error = tf.square(self.tdTarget - np.amax(self.Qout)) # trun into single scalar value self.loss = tf.reduce_mean(self.td_error) self.tdLogger= tf.summary.scalar('tdLoss', self.loss) self.tdTargetLogger= tf.summary.histogram('tdTarget', self.tdTarget) self.epsilonLogger= tf.summary.scalar('epsilon', self.epsilonInput) # minimize the loess (mean of td errors) self.trainer = tf.train.AdamOptimizer(learning_rate=self.learningRate) self.updateModel = self.trainer.minimize(self.loss) self.memory = Memory(memory_capacity)
def create_network(self, name): with tf.variable_scope(name) as scope: inputs = tf.placeholder(fl32, [None, self.state_dim], 'inputs') actions = tf.placeholder(fl32, [None, self.action_dim], 'actions') with slim.arg_scope( [slim.fully_connected], activation_fn=relu, weights_initializer=uniform, weights_regularizer=None ): net = tf.concat(1, [inputs, actions]) net = slim.fully_connected(net, 400) net = slim.fully_connected(net, 300) '''net = slim.fully_connected(inputs, 400) w1 = tf.get_variable( "w1", shape=[400, 300], initializer=uniform ) w2 = tf.get_variable( "w2", shape=[self.action_dim, 300], initializer=uniform ) b = tf.get_variable( "b", shape=[300], initializer=constant ) net = relu(tf.matmul(net, w1) + tf.matmul(actions, w2) + b)''' out = slim.fully_connected(net, 1, activation_fn=None) return (inputs, actions, out, scope.name)
def _build_layers(self, inputs, num_outputs, options): """Process the flattened inputs. Note that dict inputs will be flattened into a vector. To define a model that processes the components separately, use _build_layers_v2(). """ hiddens = options.get("fcnet_hiddens") activation = get_activation_fn(options.get("fcnet_activation")) with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=activation, scope=label) i += 1 label = "fc_out" output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope=label) return output, last_layer
def build_decoder_rnn(self, first_step): with tf.variable_scope("cnn"): image_emb = slim.fully_connected(self.fc7, self.input_encoding_size, reuse=True, activation_fn=None, scope='encode_image') with tf.variable_scope("rnnlm"): if first_step: rnn_input = image_emb # At the first step, the input is the embedded image else: # The input of later time step, is the embedding of the previous word # The previous word is a placeholder self.decoder_prev_word = tf.placeholder(tf.int32, [None]) rnn_input = tf.nn.embedding_lookup(self.Wemb, self.decoder_prev_word) batch_size = tf.shape(rnn_input)[0] tf.get_variable_scope().reuse_variables() if not first_step: # If not first step, the states are also placeholders. self.decoder_initial_state = initial_state = utils.get_placeholder_state(self.cell.state_size) self.decoder_flattened_state = utils.flatten_state(initial_state) else: # The states for the first step are zero. initial_state = self.cell.zero_state(batch_size, tf.float32) outputs, state = tf.contrib.legacy_seq2seq.rnn_decoder([rnn_input], initial_state, self.cell) logits = slim.fully_connected(outputs[0], self.vocab_size + 1, activation_fn = None, scope = 'logit') decoder_probs = tf.reshape(tf.nn.softmax(logits), [batch_size, self.vocab_size + 1]) decoder_state = utils.flatten_state(state) # output the current word distribution and states return [decoder_probs, decoder_state]
def _init(self, inputs, num_outputs, options): hiddens = options.get("fcnet_hiddens", [256, 256]) fcnet_activation = options.get("fcnet_activation", "tanh") if fcnet_activation == "tanh": activation = tf.nn.tanh elif fcnet_activation == "relu": activation = tf.nn.relu with tf.name_scope("fc_net"): i = 1 last_layer = inputs for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=activation, scope=label) i += 1 label = "fc_out" output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope=label) return output, last_layer
def build_arch_baseline(input, is_train: bool, num_classes: int): bias_initializer = tf.truncated_normal_initializer( mean=0.0, stddev=0.01) # tf.constant_initializer(0.0) # The paper didnot mention any regularization, a common l2 regularizer to weights is added here weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04) tf.logging.info('input shape: {}'.format(input.get_shape())) # weights_initializer=initializer, with slim.arg_scope([slim.conv2d, slim.fully_connected], trainable=is_train, biases_initializer=bias_initializer, weights_regularizer=weights_regularizer): with tf.variable_scope('relu_conv1') as scope: output = slim.conv2d(input, num_outputs=32, kernel_size=[ 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu) output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer1') tf.logging.info('output shape: {}'.format(output.get_shape())) with tf.variable_scope('relu_conv2') as scope: output = slim.conv2d(output, num_outputs=64, kernel_size=[ 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu) output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer2') tf.logging.info('output shape: {}'.format(output.get_shape())) output = slim.flatten(output) output = slim.fully_connected(output, 1024, scope='relu_fc3', activation_fn=tf.nn.relu) tf.logging.info('output shape: {}'.format(output.get_shape())) output = slim.dropout(output, 0.5, scope='dp') output = slim.fully_connected(output, num_classes, scope='final_layer', activation_fn=None) tf.logging.info('output shape: {}'.format(output.get_shape())) return output
def cross_ent_loss(output, x, y): loss = tf.losses.sparse_softmax_cross_entropy(labels=y, logits=output) loss = tf.reduce_mean(loss) num_class = int(output.get_shape()[-1]) data_size = int(x.get_shape()[1]) # reconstruction loss y = tf.one_hot(y, num_class, dtype=tf.float32) y = tf.expand_dims(y, axis=2) output = tf.expand_dims(output, axis=2) output = tf.reshape(tf.multiply(output, y), shape=[cfg.batch_size, -1]) tf.logging.info("decoder input value dimension:{}".format(output.get_shape())) with tf.variable_scope('decoder'): output = slim.fully_connected(output, 512, trainable=True) output = slim.fully_connected(output, 1024, trainable=True) output = slim.fully_connected(output, data_size * data_size, trainable=True, activation_fn=tf.sigmoid) x = tf.reshape(x, shape=[cfg.batch_size, -1]) reconstruction_loss = tf.reduce_mean(tf.square(output - x)) # regularization loss regularization = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss_all = tf.add_n([loss] + [0.0005 * reconstruction_loss] + regularization) return loss_all, reconstruction_loss, output
def network_det(self,inputs,reuse=False): if reuse: tf.get_variable_scope().reuse_variables() with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn = tf.nn.relu, weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)): conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1') max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1') conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2') max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2') conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3') conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4') conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5') pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5') shape = int(np.prod(pool5.get_shape()[1:])) fc6 = slim.fully_connected(tf.reshape(pool5, [-1, shape]), 4096, scope='fc6') fc_detection = slim.fully_connected(fc6, 512, scope='fc_det1') out_detection = slim.fully_connected(fc_detection, 2, scope='fc_det2', activation_fn = None) return out_detection
def __init__(self, lr, s_size, a_size, h_size): # These lines established the feed-forward part of the network. The agent takes a state and produces an action. self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32) hidden = slim.fully_connected(self.state_in, h_size, biases_initializer=None, activation_fn=tf.nn.relu) self.output = slim.fully_connected(hidden, a_size, activation_fn=tf.nn.softmax, biases_initializer=None) self.chosen_action = tf.argmax(self.output, 1) # The next six lines establish the training proceedure. We feed the reward and chosen action into the network # to compute the loss, and use it to update the network. self.reward_holder = tf.placeholder(shape=[None], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32) self.indexes = tf.range(0, tf.shape(self.output)[0]) * tf.shape(self.output)[1] + self.action_holder self.responsible_outputs = tf.gather(tf.reshape(self.output, [-1]), self.indexes) self.loss = -tf.reduce_mean(tf.log(self.responsible_outputs) * self.reward_holder) tvars = tf.trainable_variables() self.gradient_holders = [] for idx2, var in enumerate(tvars): placeholder = tf.placeholder(tf.float32, name=str(idx2) + '_holder') self.gradient_holders.append(placeholder) self.gradients = tf.gradients(self.loss, tvars) optimizer = tf.train.AdamOptimizer(learning_rate=lr) self.update_batch = optimizer.apply_gradients(zip(self.gradient_holders, tvars))
def __init__(self, env, hidden_size=8, learning_rate=0.01, gamma=0.99): self.state_dim = env.observation_space.shape[0] self.action_dim = env.action_space.n self.gamma = gamma self.history = [] # Define network self.state_in = tf.placeholder(shape=[None, self.state_dim], dtype=tf.float32) hidden = slim.fully_connected(self.state_in, hidden_size, biases_initializer=None, activation_fn=tf.nn.relu) self.output = slim.fully_connected(hidden, self.action_dim, biases_initializer=None, activation_fn=tf.nn.softmax) self.reward = tf.placeholder(shape=[None], dtype=tf.float32) self.actual_action = tf.placeholder(shape=[None], dtype=tf.int32) self.indexes = tf.range(0, tf.shape(self.output)[0]) * self.action_dim \ + self.actual_action self.actual_output = tf.gather(tf.reshape(self.output, [-1]), self.indexes) self.loss = -tf.reduce_mean(tf.log(self.actual_output)*self.reward) self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) self.train_op = slim.learning.create_train_op(self.loss, self.optimizer) self.session = tf.InteractiveSession() self.session.run(tf.initialize_all_variables())
def build_generator(self): """ Generator for generating captions Support sample max or sample from distribution No Beam search here; beam search is in decoder """ # Variables for the sample setting self.sample_max = tf.Variable(True, trainable = False, name = "sample_max") self.sample_temperature = tf.Variable(1.0, trainable = False, name = "temperature") self.generator = [] with tf.variable_scope("rnnlm"): flattened_ctx = tf.reshape(self.context, [self.batch_size, 196, 512]) ctx_mean = tf.reduce_mean(flattened_ctx, 1) tf.get_variable_scope().reuse_variables() initial_state = utils.get_initial_state(ctx_mean, self.cell.state_size) #projected context # This is used in attention module; do this outside the loop to reduce redundant computations # with tf.variable_scope("attention"): if self.att_hid_size == 0: pctx = slim.fully_connected(flattened_ctx, 1, activation_fn = None, scope = 'ctx_att') # (batch) * 196 * 1 else: pctx = slim.fully_connected(flattened_ctx, self.att_hid_size, activation_fn = None, scope = 'ctx_att') # (batch) * 196 * att_hid_size rnn_input = tf.nn.embedding_lookup(self.Wemb, tf.zeros([self.batch_size], tf.int32)) prev_h = utils.last_hidden_vec(initial_state) self.g_alphas = [] outputs = [] state = initial_state for ind in range(MAX_STEPS): with tf.variable_scope("attention"): alpha = self.get_alpha(prev_h, pctx) self.g_alphas.append(alpha) weighted_context = tf.reduce_sum(flattened_ctx * tf.expand_dims(alpha, 2), 1) output, state = self.cell(tf.concat(axis=1, values=[weighted_context, rnn_input]), state) outputs.append(output) prev_h = output # Get the input of next timestep prev_logit = slim.fully_connected(prev_h, self.vocab_size + 1, activation_fn = None, scope = 'logit') prev_symbol = tf.stop_gradient(tf.cond(self.sample_max, lambda: tf.argmax(prev_logit, 1), # pick the word with largest probability as the input of next time step lambda: tf.squeeze( tf.multinomial(tf.nn.log_softmax(prev_logit) / self.sample_temperature, 1), 1))) # Sample from the distribution self.generator.append(prev_symbol) rnn_input = tf.nn.embedding_lookup(self.Wemb, prev_symbol) self.g_output = output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, self.rnn_size]) # outputs[1:], because we don't calculate loss on time 0. self.g_logits = logits = slim.fully_connected(output, self.vocab_size + 1, activation_fn = None, scope = 'logit') self.g_probs = probs = tf.reshape(tf.nn.softmax(logits), [self.batch_size, MAX_STEPS, self.vocab_size + 1]) self.generator = tf.transpose(tf.reshape(tf.concat(axis=0, values=self.generator), [MAX_STEPS, -1]))
def neural_network(self, X): """pi, mu, sigma = NN(x; theta)""" hidden1 = slim.fully_connected(X, 25) hidden2 = slim.fully_connected(hidden1, 25) self.pi = slim.fully_connected(hidden2, self.K, activation_fn=tf.nn.softmax) self.mus = slim.fully_connected(hidden2, self.K, activation_fn=None) self.sigmas = slim.fully_connected(hidden2, self.K, activation_fn=tf.nn.softplus)
def neural_network(X): """loc, scale, logits = NN(x; theta)""" # 2 hidden layers with 15 hidden units hidden1 = slim.fully_connected(X, 15) hidden2 = slim.fully_connected(hidden1, 15) locs = slim.fully_connected(hidden2, K, activation_fn=None) scales = slim.fully_connected(hidden2, K, activation_fn=tf.exp) logits = slim.fully_connected(hidden2, K, activation_fn=None) return locs, scales, logits
def build_graph(top_k): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch') labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch') is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag') with tf.device('/gpu:0'): with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params={'is_training': is_training}): conv3_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv3_1') max_pool_1 = slim.max_pool2d(conv3_1, [2, 2], [2, 2], padding='SAME', scope='pool1') conv3_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv3_2') max_pool_2 = slim.max_pool2d(conv3_2, [2, 2], [2, 2], padding='SAME', scope='pool2') conv3_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3_3') max_pool_3 = slim.max_pool2d(conv3_3, [2, 2], [2, 2], padding='SAME', scope='pool3') conv3_4 = slim.conv2d(max_pool_3, 512, [3, 3], padding='SAME', scope='conv3_4') conv3_5 = slim.conv2d(conv3_4, 512, [3, 3], padding='SAME', scope='conv3_5') max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2], padding='SAME', scope='pool4') flatten = slim.flatten(max_pool_4) fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.relu, scope='fc1') logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2') loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) loss = control_flow_ops.with_dependencies([updates], loss) global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=0.1) train_op = slim.learning.create_train_op(loss, optimizer, global_step=global_step) probabilities = tf.nn.softmax(logits) tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) merged_summary_op = tf.summary.merge_all() predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=top_k) accuracy_in_top_k = tf.reduce_mean(tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32)) return {'images': images, 'labels': labels, 'keep_prob': keep_prob, 'top_k': top_k, 'global_step': global_step, 'train_op': train_op, 'loss': loss, 'is_training': is_training, 'accuracy': accuracy, 'accuracy_top_k': accuracy_in_top_k, 'merged_summary_op': merged_summary_op, 'predicted_distribution': probabilities, 'predicted_index_top_k': predicted_index_top_k, 'predicted_val_top_k': predicted_val_top_k}
def make_tower(net): net = slim.conv2d(net, 20, [5, 5], padding='VALID', scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool1') net = slim.conv2d(net, 50, [5, 5], padding='VALID', scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool2') net = slim.flatten(net) net = slim.fully_connected(net, 500, scope='fc1') net = slim.fully_connected(net, 2, activation_fn=None, scope='fc2') return net
def build_model(self): with tf.name_scope("batch_size"): # Get batch_size from the first dimension of self.images self.batch_size = tf.shape(self.images)[0] with tf.variable_scope("cnn"): image_emb = slim.fully_connected(self.fc7, self.input_encoding_size, activation_fn=None, scope='encode_image') with tf.variable_scope("rnnlm"): # Replicate self.seq_per_img times for each image embedding image_emb = tf.reshape(tf.tile(tf.expand_dims(image_emb, 1), [1, self.seq_per_img, 1]), [self.batch_size * self.seq_per_img, self.input_encoding_size]) # rnn_inputs is a list of input, each element is the input of rnn at each time step # time step 0 is the image embedding rnn_inputs = tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=tf.nn.embedding_lookup(self.Wemb, self.labels[:,:self.seq_length + 1])) rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs] rnn_inputs = [image_emb] + rnn_inputs # The initial sate is zero initial_state = self.cell.zero_state(self.batch_size * self.seq_per_img, tf.float32) outputs, last_state = tf.contrib.legacy_seq2seq.rnn_decoder(rnn_inputs, initial_state, self.cell, loop_function=None) outputs = tf.concat(axis=0, values=outputs[1:]) self.logits = slim.fully_connected(outputs, self.vocab_size + 1, activation_fn = None, scope = 'logit') self.logits = tf.split(axis=0, num_or_size_splits=len(rnn_inputs) - 1, value=self.logits) with tf.variable_scope("loss"): loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(self.logits, [tf.squeeze(label, [1]) for label in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.labels[:, 1:])], # self.labels[:,1:] is the target [tf.squeeze(mask, [1]) for mask in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.masks[:, 1:])]) self.cost = tf.reduce_mean(loss) self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) self.cnn_lr = tf.Variable(0.0, trainable=False) # Collect the rnn variables, and create the optimizer of rnn tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='rnnlm') grads = utils.clip_by_value(tf.gradients(self.cost, tvars), -self.opt.grad_clip, self.opt.grad_clip) #grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), # self.opt.grad_clip) optimizer = utils.get_optimizer(self.opt, self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # Collect the cnn variables, and create the optimizer of cnn cnn_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='cnn') cnn_grads = utils.clip_by_value(tf.gradients(self.cost, cnn_tvars), -self.opt.grad_clip, self.opt.grad_clip) #cnn_grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, cnn_tvars), # self.opt.grad_clip) cnn_optimizer = utils.get_cnn_optimizer(self.opt, self.cnn_lr) self.cnn_train_op = cnn_optimizer.apply_gradients(zip(cnn_grads, cnn_tvars)) tf.summary.scalar('training loss', self.cost) tf.summary.scalar('learning rate', self.lr) tf.summary.scalar('cnn learning rate', self.cnn_lr) self.summaries = tf.summary.merge_all()
def neural_network(X): """loc, scale, logits = NN(x; theta)""" # 2 hidden layers with 15 hidden units hidden1 = slim.fully_connected(X, 60,normalizer_fn=slim.batch_norm) hidden2 = slim.fully_connected(hidden1, 60,normalizer_fn=slim.batch_norm) locs = slim.fully_connected(hidden2, K, activation_fn=None) o_scales = slim.fully_connected(hidden2, K, activation_fn=tf.exp) scales = tf.minimum(20.,tf.maximum(0.0001,o_scales)) logits = slim.fully_connected(hidden2, K, activation_fn=None) return locs, scales, logits, hidden1
def _contruct_network(self, inputs): # Actor network and critic network share all shallow layers conv1 = slim.conv2d(inputs=inputs, num_outputs=16, activation_fn=tf.nn.relu, kernel_size=[8, 8], stride=[4, 4], padding='VALID') conv2 = slim.conv2d(inputs=conv1, num_outputs=32, activation_fn=tf.nn.relu, kernel_size=[4, 4], stride=[2, 2], padding='VALID') hidden = slim.fully_connected(inputs=slim.flatten(conv2), num_outputs=256, activation_fn=tf.nn.relu) # Recurrent network for temporal dependencies lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=256) c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) self.state_in = (c_in, h_in) rnn_in = tf.expand_dims(hidden, [0]) step_size = tf.shape(inputs)[:1] state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in) lstm_out, lstm_state = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_out, [-1, 256]) # output for policy and value estimations self.policy = slim.fully_connected( inputs=rnn_out, num_outputs=self.a_dim, activation_fn=tf.nn.softmax, weights_initializer=normalized_columns_initializer(0.01), biases_initializer=None) self.value = slim.fully_connected( inputs=rnn_out, num_outputs=1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None)
def build_NN_two_hidden_layers_sguada(x, is_training): batch_norm_params = {'is_training': is_training, 'decay': 0.9, 'updates_collections': None} with slim.arg_scope([slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.constant_initializer(0.1), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): net = slim.fully_connected(x, 50, scope='A1') net = slim.fully_connected(net, 49, scope='A2') y = slim.fully_connected(net, 10, activation_fn=tf.nn.softmax, normalizer_fn=None, scope='A3') return y
def inference(inputs): x = tf.reshape(inputs,[-1,28,28,1]) conv_1 = tf.nn.relu(slim.conv2d(x,32,[3,3])) #28 * 28 * 32 pool_1 = slim.max_pool2d(conv_1,[2,2]) # 14 * 14 * 32 block_1 = res_identity(pool_1,32,[3,3],'layer_2') block_2 = res_change(block_1,64,[3,3],'layer_3') block_3 = res_identity(block_2,64,[3,3],'layer_4') block_4 = res_change(block_3,32,[3,3],'layer_5') net_flatten = slim.flatten(block_4,scope='flatten') fc_1 = slim.fully_connected(slim.dropout(net_flatten,0.8),200,activation_fn=tf.nn.tanh,scope='fc_1') output = slim.fully_connected(slim.dropout(fc_1,0.8),10,activation_fn=None,scope='output_layer') return output
def _build_network(self, sess, is_training=True): with tf.variable_scope('vgg_16', 'vgg_16'): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') self._act_summaries.append(net) self._layers['head'] = net # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net, rois, "pool5") else: raise NotImplementedError pool5_flat = slim.flatten(pool5, scope='flatten') fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') # region classification cls_prob, bbox_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def create_model(self, model_input, vocab_size, num_mixtures=None, l2_penalty=1e-8, **unused_params): """Creates a Mixture of (Logistic) Experts model. The model consists of a per-class softmax distribution over a configurable number of logistic classifiers. One of the classifiers in the mixture is not trained, and always predicts 0. Args: model_input: 'batch_size' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_mixtures: The number of mixtures (excluding a dummy 'expert' that always predicts the non-existence of an entity). l2_penalty: How much to penalize the squared magnitudes of parameter values. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes. """ num_mixtures = num_mixtures or FLAGS.moe_num_mixtures gate_activations = slim.fully_connected( model_input, vocab_size * (num_mixtures + 1), activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates") expert_activations = slim.fully_connected( model_input, vocab_size * num_mixtures, activation_fn=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="experts") gating_distribution = tf.nn.softmax(tf.reshape( gate_activations, [-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1) expert_distribution = tf.nn.sigmoid(tf.reshape( expert_activations, [-1, num_mixtures])) # (Batch * #Labels) x num_mixtures final_probabilities_by_class_and_batch = tf.reduce_sum( gating_distribution[:, :num_mixtures] * expert_distribution, 1) final_probabilities = tf.reshape(final_probabilities_by_class_and_batch, [-1, vocab_size]) return {"predictions": final_probabilities}
def __init__(self,is_training): self.input_image = tf.placeholder(dtype=tf.float32,shape=[None,3,128],name='input_image') self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label') self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_nlcd') self.keep_prob = tf.placeholder(tf.float32) weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay) flatten_hist = tf.reshape(self.input_image,[-1,3*128]) flatten_hist = tf.concat([flatten_hist,self.input_nlcd],1) x = slim.fully_connected(flatten_hist, 512,weights_regularizer=weights_regularizer,scope='decoder/fc_1') x = slim.fully_connected(x, 256,weights_regularizer=weights_regularizer, scope='decoder/fc_2') flatten_hist = slim.fully_connected(x, 256,weights_regularizer=weights_regularizer, scope='decoder/fc_3') all_logits = [] all_output = [] for i in range(100): if i == 0 : current_input_x = flatten_hist else: current_output = tf.concat(all_output,1) current_input_x = tf.concat([flatten_hist,current_output],1) x = slim.fully_connected(current_input_x, 100,weights_regularizer=weights_regularizer) #x = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer) #x = slim.fully_connected(x, 17,weights_regularizer=weights_regularizer) x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training) all_logits.append(slim.fully_connected(x, 1, activation_fn=None, weights_regularizer=weights_regularizer)) all_output.append(tf.sigmoid(all_logits[i])) final_logits = tf.concat(all_logits,1) final_output = tf.sigmoid(final_logits) self.output = final_output self.ce_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.input_label,logits=final_logits),1)) slim.losses.add_loss(self.ce_loss) tf.summary.scalar('ce_loss',self.ce_loss) # l2 loss self.l2_loss = tf.add_n(slim.losses.get_regularization_losses()) tf.summary.scalar('l2_loss',self.l2_loss) #total loss self.total_loss = slim.losses.get_total_loss() tf.summary.scalar('total_loss',self.total_loss)
def graph(): x_ = tf.placeholder(tf.float32, [None, 20]) slim.fully_connected(inputs=x_, num_outputs=1)
data_handler = toyDataSet() sess = tf.InteractiveSession() #(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) #config=tf.ConfigProto(gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.4))) # Model num_classes = 3 l_rate_min = 0.001 l_rate_max = 0.01 _x = tf.placeholder(tf.float32, shape=[None, 2]) _y = tf.placeholder(tf.int64, shape=[None]) _h1 = slim.fully_connected(_x, 5, activation_fn=tf.nn.relu, scope='hidden1') _h2 = slim.fully_connected(_h1, 10, activation_fn=tf.nn.relu, scope='hidden2') _h3 = slim.fully_connected(_h2, 2, activation_fn=tf.nn.relu, scope='hidden3') _logits = slim.fully_connected(_h3, num_classes, activation_fn=None, scope='logits') _x_hat = tf.get_variable( 'x_hat', [data_handler.batch_size, 2 ]) # 128 is the size of the dataset, should be a variable! _x_hat_assign_op = _x_hat.assign(_x) # to assign a value to the variable _h1_hat = slim.fully_connected(_x_hat, 5, activation_fn=tf.nn.relu,
def build_network(self, sess, is_training=True): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 101: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 152: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS < 4) if cfg.RESNET.FIXED_BLOCKS == 3: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1( net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, _ = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1( net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, _ = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) self._act_summaries.append(net_conv4) self._layers['head'] = net_conv4 with tf.variable_scope(self._resnet_scope, self._resnet_scope): # build the anchors for the image self._anchor_component() # rpn rpn = slim.conv2d(net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer( rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer( rpn_cls_prob, rpn_bbox_pred, "rois") rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer( rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net_conv4, rois, "pool5") else: raise NotImplementedError with slim.arg_scope(resnet_arg_scope(is_training=is_training)): fc7, _ = resnet_v1.resnet_v1(pool5, blocks[-1:], global_pool=False, include_root_block=False, scope=self._resnet_scope) with tf.variable_scope(self._resnet_scope, self._resnet_scope): # Average pooling done by reduce_mean fc7 = tf.reduce_mean(fc7, axis=[1, 2]) cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def mobilenet(inputs, num_classes=1000, is_training=True, width_multiplier=3, scope='MobileNet'): """ MobileNet More detail, please refer to Google's paper(https://arxiv.org/abs/1704.04861). Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. scope: Optional scope for the variables. Returns: logits: the pre-softmax activations, a tensor of size [batch_size, `num_classes`] end_points: a dictionary from components of the network to the corresponding activation. """ def _depthwise_separable_conv(inputs, num_pwc_filters, width_multiplier, sc, downsample=False): """ Helper function to build the depth-wise separable convolution layer. """ num_pwc_filters = round(num_pwc_filters * width_multiplier) _stride = 2 if downsample else 1 # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=_stride, depth_multiplier=1, kernel_size=[3, 3], scope=sc + '/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm') return bn with tf.variable_scope(scope) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope([slim.convolution2d, slim.separable_convolution2d], activation_fn=None, outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, activation_fn=tf.nn.relu, fused=True): net = slim.convolution2d(inputs, round(32 * width_multiplier), [3, 3], stride=2, padding='SAME', scope='conv_1') net = slim.batch_norm(net, scope='conv_1/batch_norm') net = _depthwise_separable_conv(net, 64, width_multiplier, sc='conv_ds_2') net = _depthwise_separable_conv(net, 128, width_multiplier, downsample=True, sc='conv_ds_3') net = _depthwise_separable_conv(net, 128, width_multiplier, sc='conv_ds_4') net = _depthwise_separable_conv(net, 256, width_multiplier, downsample=True, sc='conv_ds_5') net = _depthwise_separable_conv(net, 256, width_multiplier, sc='conv_ds_6') net = _depthwise_separable_conv(net, 512, width_multiplier, downsample=True, sc='conv_ds_7') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_8') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_9') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_10') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_11') net = _depthwise_separable_conv(net, 512, width_multiplier, sc='conv_ds_12') net = _depthwise_separable_conv(net, 1024, width_multiplier, downsample=True, sc='conv_ds_13') net = _depthwise_separable_conv(net, 1024, width_multiplier, sc='conv_ds_14') net = slim.avg_pool2d(net, [7, 7], scope='avg_pool_15') def get_tensor_aliases(tensor): """Get a list with the aliases of the input tensor. If the tensor does not have any alias, it would default to its its op.name or its name. Args: tensor: A `Tensor`. Returns: A list of strings with the aliases of the tensor. """ if hasattr(tensor, 'aliases'): aliases = tensor.aliases else: if tensor.name[-2:] == ':0': # Use op.name for tensor ending in :0 aliases = [tensor.op.name] else: aliases = [tensor.name] return aliases from tensorflow.python.framework import ops for tensor in ops.get_collection(end_points_collection): for alias in get_tensor_aliases(tensor): # print(alias) pass end_points = slim.utils.convert_collection_to_dict( end_points_collection) net = tf.squeeze(net, [0], name='SpatialSqueeze') end_points['squeeze'] = net logits = slim.fully_connected(net, num_classes, activation_fn=None, scope='fc_16') predictions = slim.softmax(logits, scope='Predictions') end_points['Logits'] = logits end_points['Predictions'] = predictions return logits, end_points
def _build_planner(self, scaled_beliefs, m={}): debug = self._debug is_training = self._is_training batch_size = tf.shape(scaled_beliefs[0])[0] image_scaler = self._upscale_image estimate_size = self._estimate_size value_map_size = (estimate_size, estimate_size, 1) num_actions = self._num_actions num_iterations = self._num_iterations def _fuse_belief(belief): with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.elu, weights_initializer=tf.truncated_normal_initializer( stddev=1), biases_initializer=tf.constant_initializer(0), stride=1, padding='SAME', reuse=tf.AUTO_REUSE): net = slim.conv2d(belief, 1, [1, 1], scope='fuser_combine') return net class HierarchicalVINCell(tf.nn.rnn_cell.RNNCell): @property def state_size(self): return tf.TensorShape(value_map_size) @property def output_size(self): return self.state_size def __call__(self, inputs, state, scope=None): # Upscale previous value map state = image_scaler(state) estimate, _, values = [ tf.expand_dims(layer, axis=3) for layer in tf.unstack(inputs, axis=3) ] with slim.arg_scope([slim.conv2d], reuse=tf.AUTO_REUSE): rewards_map = _fuse_belief( tf.concat([estimate, values, state], axis=3)) actions_map = slim.conv2d( rewards_map, num_actions, [3, 3], weights_initializer=tf.truncated_normal_initializer( stddev=0.42), biases_initializer=tf.constant_initializer(0), scope='VIN_actions_initial') values_map = tf.reduce_max(actions_map, axis=3, keep_dims=True) with slim.arg_scope([slim.conv2d], reuse=tf.AUTO_REUSE): for i in xrange(num_iterations - 1): rv = tf.concat([rewards_map, values_map], axis=3) actions_map = slim.conv2d( rv, num_actions, [3, 3], weights_initializer=tf. truncated_normal_initializer(stddev=0.42), biases_initializer=tf.constant_initializer(0), scope='VIN_actions') values_map = tf.reduce_max(actions_map, axis=3, keep_dims=True) return values_map, values_map beliefs = tf.stack([ slim.batch_norm(belief, is_training=is_training) for belief in scaled_beliefs ], axis=1) vin_cell = HierarchicalVINCell() interm_values_map, final_values_map = tf.nn.dynamic_rnn( vin_cell, beliefs, initial_state=vin_cell.zero_state(batch_size, tf.float32), swap_memory=True) m['value_map'] = interm_values_map values_features = slim.flatten(final_values_map) actions_logit = slim.fully_connected( values_features, num_actions**2, weights_initializer=tf.truncated_normal_initializer(stddev=0.03), biases_initializer=tf.constant_initializer(0), activation_fn=tf.nn.elu, scope='logit_output_1') actions_logit = slim.fully_connected( actions_logit, num_actions, weights_initializer=tf.truncated_normal_initializer(stddev=0.5), biases_initializer=tf.constant_initializer(1.0 / num_actions), scope='logit_output_2') return actions_logit
import numpy as np from sklearn import datasets from sklearn.datasets import make_circles, make_moons, make_classification X, y = make_circles(noise=0.1, factor=0.5, random_state=1, n_samples=10000) y_r = np.zeros([len(X), 2]) print(y_r) #create CNN x_tf = tf.placeholder(tf.float32, [None,2]) label_tf = tf.placeholder(tf.float32, [None,2]) x2x = tf.concat([x_tf, x_tf[:,:1]*x_tf[:,1:2], x_tf**2],axis=1) y_tf = slim.fully_connected(x2x, 2, scope='full', activation_fn = tf.nn.sigmoid, reuse= False) ce = tf.nn.softmax_cross_entropy_with_logits(labels=label_tf, logits=y_tf)) loss = tf.reduce_mean(ce) train_step = tf.train.AdamOptimizer(0.005).minimize(loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) for itr in range(6000): sess.run(train_step, feed_dict={x_tf: X, label_tf: y_r}) #imvovle matplotlib for chart import matplotlib.pyplot as plt import matplotlib as mpl import numpy as np
def net(self, spec_batch, name='net'): #assert spec_batch.shape.as_list() == [None, None, 229, 3] inputs = tf.concat(tf.split(value=spec_batch, num_or_size_splits=2, axis=-1), axis=0) assert inputs.shape.as_list() == [None, None, 229, 1] x = slim.conv2d(inputs=inputs, num_outputs=32, kernel_size=(3, 7), activation_fn=nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=dict(decay=0.99, is_training=self.is_training), scope='conv_x_1') # outputs.shape: (?, ?, 229, 32) x = slim.conv2d(inputs=x, num_outputs=32, kernel_size=3, activation_fn=nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=dict(decay=0.99, is_training=self.is_training), scope='conv_x_2') x = slim.max_pool2d( inputs=x, kernel_size=[1, 2], stride=[1, 2], scope='maxpool_x_1') # outputs.shape: (?, ?, 114, 32) x = slim.dropout(inputs=x, keep_prob=0.75, is_training=self.is_training, scope='dropout_x_1') #x = self.gaussian_noise(x, std=0.1) x_new = self.TCM(x, name='TCM1') x_new = slim.conv2d(inputs=x_new, num_outputs=32, kernel_size=3, activation_fn=nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=dict( decay=0.99, is_training=self.is_training), scope='conv_xnew_1') x_new = slim.dropout(inputs=x_new, keep_prob=0.75, is_training=self.is_training, scope='dropout_xnew_1') outputs = self.SFE(x_new) outputs = slim.conv2d(inputs=outputs, num_outputs=64, kernel_size=3, normalizer_fn=slim.batch_norm, normalizer_params=dict( decay=0.99, is_training=self.is_training), scope='conv_2') outputs = slim.max_pool2d(inputs=outputs, kernel_size=[1, 2], stride=[1, 2], scope='maxpool_1') outputs = slim.dropout(inputs=outputs, keep_prob=0.75, is_training=self.is_training, scope='dropout_2') dims = tf.shape(outputs) outputs = tf.reshape(tensor=outputs, shape=[ dims[0], dims[1], outputs.shape[2].value * outputs.shape[3].value ], name='flatten_3') # outputs.shape: (?, ?, 57*64) outputs = slim.fully_connected(inputs=outputs, num_outputs=512, normalizer_fn=slim.batch_norm, normalizer_params=dict( decay=0.99, is_training=self.is_training), scope='fc_1') #assert outputs.shape.as_list() == [None, None, 512] outputs = slim.dropout(inputs=outputs, keep_prob=0.5, is_training=self.is_training, scope='dropout_3') outputs = slim.fully_connected(inputs=outputs, num_outputs=88, activation_fn=None, scope='output') return outputs
def _build_layers_v2(self, input_dict, num_outputs, options): # Parse options inputs = input_dict["obs"] convs = [[16, [2, 2], 4], [32, [2, 2], 3], [32, [2, 2], 2], [128, [1, 1], 1]] hiddens = [128, 128] fcnet_activation = options.get("fcnet_activation", "tanh") if fcnet_activation == "tanh": activation = tf.nn.tanh elif fcnet_activation == "relu": activation = tf.nn.relu vision_in = inputs["boards"] metrics_in = inputs["states"] # Setup vision layers with tf.name_scope("pommer_vision"): for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1): vision_in = slim.conv2d(vision_in, out_size, kernel, stride, scope="conv{}".format(i)) out_size, kernel, stride = convs[-1] vision_in = slim.conv2d(vision_in, out_size, kernel, stride, padding="VALID", scope="conv_out") vision_in = tf.squeeze(vision_in, [1, 2]) # Setup metrics layer with tf.name_scope("pommer_metrics"): metrics_in = slim.fully_connected( metrics_in, 64, weights_initializer=xavier_initializer(), activation_fn=activation, scope="metrics_out", ) with tf.name_scope("pommer_out"): i = 1 last_layer = tf.concat([vision_in, metrics_in], axis=1) for size in hiddens: last_layer = slim.fully_connected( last_layer, size, weights_initializer=xavier_initializer(), activation_fn=activation, scope="fc{}".format(i), ) i += 1 output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out", ) return output, last_layer
def atari_network(num_actions, num_atoms, support, network_type, state, representation_layer=10): """The convolutional network used to compute agent's Q-value distributions. Args: num_actions: int, number of actions. num_atoms: int, the number of buckets of the value function distribution. support: tf.linspace, the support of the Q-value distribution. network_type: namedtuple, collection of expected values to return. state: `tf.Tensor`, contains the agent's current state. representation_layer: int, the layer which will be used as the representation for computing the bisimulation distances. Defaults to a high value, which defaults to the penultimate layer. Returns: net: _network_type object containing the tensors output by the network. """ weights_initializer = contrib_slim.variance_scaling_initializer( factor=1.0 / np.sqrt(3.0), mode='FAN_IN', uniform=True) curr_layer = 1 net = tf.cast(state, tf.float32) net = tf.div(net, 255.) representation = None if representation_layer <= curr_layer: representation = contrib_slim.flatten(net) net = contrib_slim.conv2d( net, 32, [8, 8], stride=4, weights_initializer=weights_initializer, trainable=False) curr_layer += 1 if representation is None and representation_layer <= curr_layer: representation = contrib_slim.flatten(net) net = contrib_slim.conv2d( net, 64, [4, 4], stride=2, weights_initializer=weights_initializer, trainable=False) curr_layer += 1 if representation is None and representation_layer <= curr_layer: representation = contrib_slim.flatten(net) net = contrib_slim.conv2d( net, 64, [3, 3], stride=1, weights_initializer=weights_initializer, trainable=False) net = contrib_slim.flatten(net) curr_layer += 1 if representation is None and representation_layer <= curr_layer: representation = net net = contrib_slim.fully_connected( net, 512, weights_initializer=weights_initializer, trainable=False) curr_layer += 1 if representation is None: representation = net net = contrib_slim.fully_connected( net, num_actions * num_atoms, activation_fn=None, weights_initializer=weights_initializer, trainable=False) logits = tf.reshape(net, [-1, num_actions, num_atoms]) probabilities = contrib_layers.softmax(logits) q_values = tf.reduce_sum(support * probabilities, axis=2) return network_type(q_values, logits, probabilities, representation)
def create_ds_cnn_model(fingerprint_input, model_settings, model_size_info, is_training): """Builds a model with depthwise separable convolutional neural network Model definition is based on https://arxiv.org/abs/1704.04861 and Tensorflow implementation: https://github.com/Zehaos/MobileNet model_size_info: defines number of layers, followed by the DS-Conv layer parameters in the order {number of conv features, conv filter height, width and stride in y,x dir.} for each of the layers. Note that first layer is always regular convolution, but the remaining layers are all depthwise separable convolutions. """ def ds_cnn_arg_scope(weight_decay=0): """Defines the default ds_cnn argument scope. Args: weight_decay: The weight decay to use for regularizing the model. Returns: An `arg_scope` to use for the DS-CNN model. """ with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay)) as sc: return sc def _depthwise_separable_conv(inputs, num_pwc_filters, sc, kernel_size, stride): """ Helper function to build the depth-wise separable convolution layer. """ # skip pointwise by setting num_outputs=None depthwise_conv = slim.separable_convolution2d(inputs, num_outputs=None, stride=stride, depth_multiplier=1, kernel_size=kernel_size, scope=sc + '/depthwise_conv') bn = slim.batch_norm(depthwise_conv, scope=sc + '/dw_batch_norm') pointwise_conv = slim.convolution2d(bn, num_pwc_filters, kernel_size=[1, 1], scope=sc + '/pointwise_conv') bn = slim.batch_norm(pointwise_conv, scope=sc + '/pw_batch_norm') return bn if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') label_count = model_settings['label_count'] input_frequency_size = model_settings['dct_coefficient_count'] input_time_size = model_settings['spectrogram_length'] fingerprint_4d = tf.reshape(fingerprint_input, [-1, input_time_size, input_frequency_size, 1]) t_dim = input_time_size f_dim = input_frequency_size # Extract model dimensions from model_size_info num_layers = model_size_info[0] conv_feat = [None] * num_layers conv_kt = [None] * num_layers conv_kf = [None] * num_layers conv_st = [None] * num_layers conv_sf = [None] * num_layers i = 1 for layer_no in range(0, num_layers): conv_feat[layer_no] = model_size_info[i] i += 1 conv_kt[layer_no] = model_size_info[i] i += 1 conv_kf[layer_no] = model_size_info[i] i += 1 conv_st[layer_no] = model_size_info[i] i += 1 conv_sf[layer_no] = model_size_info[i] i += 1 scope = 'DS-CNN' with tf.variable_scope(scope) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], activation_fn=None, weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), outputs_collections=[end_points_collection]): with slim.arg_scope([slim.batch_norm], is_training=is_training, decay=0.96, updates_collections=None, activation_fn=tf.nn.relu): for layer_no in range(0, num_layers): if layer_no == 0: net = slim.convolution2d(fingerprint_4d, conv_feat[layer_no], \ [conv_kt[layer_no], conv_kf[layer_no]], stride=[conv_st[layer_no], conv_sf[layer_no]], padding='SAME', scope='conv_1') net = slim.batch_norm(net, scope='conv_1/batch_norm') else: net = _depthwise_separable_conv(net, conv_feat[layer_no], \ kernel_size=[conv_kt[layer_no], conv_kf[layer_no]], \ stride=[conv_st[layer_no], conv_sf[layer_no]], sc='conv_ds_' + str(layer_no)) t_dim = math.ceil(t_dim / float(conv_st[layer_no])) f_dim = math.ceil(f_dim / float(conv_sf[layer_no])) net = slim.avg_pool2d(net, [t_dim, f_dim], scope='avg_pool') net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') logits = slim.fully_connected(net, label_count, activation_fn=None, scope='fc1') if is_training: return logits, dropout_prob else: return logits
def __init__(self, window_size, channel_size, num_goals, num_actions, history_steps, scope='global'): with tf.variable_scope(scope): self.state = tf.placeholder(shape=[ None, history_steps, window_size, window_size, channel_size ], dtype=tf.float32) self.goal = tf.placeholder(shape=[None, num_goals], dtype=tf.float32) result = self.conv3d( scope_name='conv3d', input=self.state, filter_size=[history_steps, 1, 1, channel_size, 1]) result = tf.reshape(result, [-1, window_size, window_size, 1]) conv_layers = [(50, [3, 3]), (100, [3, 3])] pool_layers = [[2, 2], [2, 2]] for i in range(len(conv_layers)): num_filters, kernel_size = conv_layers[i] result = slim.conv2d(inputs=result, num_outputs=num_filters, kernel_size=kernel_size, stride=1, padding='SAME', scope='conv_%d' % i) if pool_layers[i] is not None: result = slim.max_pool2d(inputs=result, kernel_size=pool_layers[i], scope='pool_%d' % i) flatten = slim.flatten(result) # flatten = tf.concat([flatten, self.goal], 1) hidden_embed = slim.fully_connected( inputs=flatten, num_outputs=100, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='embed') qvalues = slim.fully_connected( inputs=hidden_embed, num_outputs=num_actions, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='qvalue') self.qvalues = qvalues # training if scope != 'global': self.chosen_actions = tf.placeholder(shape=[None], dtype=tf.int32) self.target_q_values = tf.placeholder(shape=[None], dtype=tf.float32) self.lr = tf.placeholder(dtype=tf.float32) actions_onehot = tf.one_hot(self.chosen_actions, num_actions, dtype=tf.float32) qvalues_for_chosen_actions = tf.reduce_sum(self.qvalues * actions_onehot, axis=1) td_error = tf.square(self.target_q_values - qvalues_for_chosen_actions) self.qvalue_loss = 0.5 * tf.reduce_mean(td_error) params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) gradients = tf.gradients(self.qvalue_loss, params) norm_gradients, _ = tf.clip_by_global_norm(gradients, 40.0) trainer = tf.train.RMSPropOptimizer(learning_rate=self.lr) global_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.update = trainer.apply_gradients( zip(norm_gradients, global_params))
def model_fn(features, labels, mode, params): is_chief = not tf.get_variable_scope().reuse is_training = mode == tf.estimator.ModeKeys.TRAIN batch_size = tf.shape(features)[0] with slim.arg_scope(inception_v4.inception_v4_arg_scope()): net, _ = inception_v4.inception_v4(features, None, is_training=False) net = tf.squeeze(net, [1, 2]) inc_saver = tf.train.Saver(tf.global_variables('InceptionV4')) with tf.variable_scope('Generator'): feat = slim.fully_connected(net, FLAGS.mem_dim, activation_fn=None) feat = tf.nn.l2_normalize(feat, axis=1) sentence, ls = labels['sentence'], labels['len'] targets = sentence[:, 1:] sentence = sentence[:, :-1] ls -= 1 embedding = tf.get_variable(name='embedding', shape=[FLAGS.vocab_size, FLAGS.emb_dim], initializer=tf.random_uniform_initializer( -0.08, 0.08)) softmax_w = tf.matrix_transpose(embedding) softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) sentence = tf.nn.embedding_lookup(embedding, sentence) cell = tf.nn.rnn_cell.BasicLSTMCell(params.mem_dim) if is_training: cell = tf.nn.rnn_cell.DropoutWrapper(cell, params.keep_prob, params.keep_prob) zero_state = cell.zero_state(batch_size, tf.float32) _, state = cell(feat, zero_state) tf.get_variable_scope().reuse_variables() out, state = tf.nn.dynamic_rnn(cell, sentence, ls, state) out = tf.reshape(out, [-1, FLAGS.mem_dim]) logits = tf.nn.bias_add(tf.matmul(out, softmax_w), softmax_b) logits = tf.reshape(logits, [batch_size, -1, FLAGS.vocab_size]) predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) mask = tf.sequence_mask(ls, tf.shape(sentence)[1]) targets = tf.boolean_mask(targets, mask) logits = tf.boolean_mask(logits, mask) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits) loss = tf.reduce_mean(loss) opt = tf.train.AdamOptimizer(params.lr) if params.multi_gpu: opt = tf.contrib.estimator.TowerOptimizer(opt) grads = opt.compute_gradients(loss, tf.trainable_variables('Generator')) grads[2] = (tf.convert_to_tensor(grads[2][0]), grads[2][1]) for i in range(2, len(grads)): grads[i] = (grads[i][0] * 0.1, grads[i][1]) grads = transform_grads_fn(grads) train_op = opt.apply_gradients(grads, global_step=tf.train.get_global_step()) train_hooks = None if is_chief: with open('data/word_counts.txt', 'r') as f: dic = list(f) dic = [i.split()[0] for i in dic] end_id = dic.index('</S>') dic.append('<unk>') dic = tf.convert_to_tensor(dic) sentence = crop_sentence(predictions[0], end_id) sentence = tf.gather(dic, sentence) tf.summary.text('fake', sentence) tf.summary.image('im', features[None, 0]) for variable in tf.trainable_variables(): tf.summary.histogram(variable.op.name, variable) predictions = tf.boolean_mask(predictions, mask) metrics = {'acc': tf.metrics.accuracy(targets, predictions)} gen_var = tf.trainable_variables('Generator')[2:] gen_saver = tf.train.Saver(gen_var) def init_fn(scaffold, session): inc_saver.restore(session, FLAGS.inc_ckpt) if FLAGS.o2s_ckpt: gen_saver.restore(session, FLAGS.o2s_ckpt) scaffold = tf.train.Scaffold(init_fn=init_fn) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold=scaffold, training_hooks=train_hooks, eval_metric_ops=metrics)
def __init__(self, window_size, history_steps, scope): with tf.variable_scope('highlevel'): with tf.variable_scope(scope): self.visions = tf.placeholder( shape=[None, history_steps * window_size * window_size, 1], dtype=tf.float32) self.depths = tf.placeholder( shape=[None, history_steps * window_size * window_size, 1], dtype=tf.float32) visions = slim.flatten(self.visions) depths = slim.flatten(self.depths) hidden_visions = slim.fully_connected( inputs=visions, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='vision_hidden') hidden_depths = slim.fully_connected( inputs=depths, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='depth_hidden') vision_depth_feature = tf.concat( [hidden_visions, hidden_depths], 1) embed_feature = slim.fully_connected( inputs=vision_depth_feature, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='embed') q_values = slim.fully_connected( inputs=embed_feature, num_outputs=1, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='qvalue') self.q_values = q_values # highlevel training if not scope.startswith('global'): self.chosen_objects = tf.placeholder(shape=[None], dtype=tf.int32) self.target_q_values = tf.placeholder(shape=[None], dtype=tf.float32) self.highlevel_lr = tf.placeholder(dtype=tf.float32) # objects_onehot = tf.one_hot(self.chosen_objects, num_labels, dtype=tf.float32) # q_values_for_chosen_objects = tf.reduce_sum(self.q_values*objects_onehot, axis=1) # td_error = tf.square(self.target_q_values - q_values_for_chosen_objects) td_error = tf.square(self.target_q_values - self.q_values) self.qvalue_loss = 0.5 * tf.reduce_mean(td_error) highlevel_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'highlevel/%s' % scope) gradients = tf.gradients(self.qvalue_loss, highlevel_params) norm_gradients, _ = tf.clip_by_global_norm(gradients, 40.0) highlevel_trainer = tf.train.RMSPropOptimizer( learning_rate=self.highlevel_lr) global_highlevel_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'highlevel/global/main') self.highlevel_update = highlevel_trainer.apply_gradients( zip(norm_gradients, global_highlevel_params))
def __init__(self, window_size, action_size, history_steps, scope): with tf.variable_scope('lowlevel'): with tf.variable_scope(scope): self.visions = tf.placeholder( shape=[None, history_steps * window_size * window_size, 1], dtype=tf.float32) self.depths = tf.placeholder( shape=[None, history_steps * window_size * window_size, 1], dtype=tf.float32) visions = slim.flatten(self.visions) depths = slim.flatten(self.depths) hidden_visions = slim.fully_connected( inputs=visions, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='vision_hidden') hidden_depths = slim.fully_connected( inputs=depths, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='depth_hidden') vision_depth_feature = tf.concat( [hidden_visions, hidden_depths], 1) embed_feature = slim.fully_connected( inputs=vision_depth_feature, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='embed') # policy estimation hidden_policy = slim.fully_connected( inputs=embed_feature, num_outputs=20, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='policy_hidden') self.policy = slim.fully_connected( inputs=hidden_policy, num_outputs=action_size, activation_fn=tf.nn.softmax, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='policy') # value estimation hidden_value = slim.fully_connected( inputs=embed_feature, num_outputs=20, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='value_hidden') self.value = slim.fully_connected( inputs=hidden_value, num_outputs=1, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='value') # Lowlevel training if not scope.startswith('global'): self.chosen_actions = tf.placeholder(shape=[None], dtype=tf.int32) self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) self.target_values = tf.placeholder(shape=[None], dtype=tf.float32) self.lowlevel_lr = tf.placeholder(dtype=tf.float32) actions_onehot = tf.one_hot(self.chosen_actions, action_size, dtype=tf.float32) log_policy = tf.log( tf.clip_by_value(self.policy, 0.000001, 0.999999)) log_pi_for_action = tf.reduce_sum(tf.multiply( log_policy, actions_onehot), axis=1) self.value_loss = 0.5 * tf.reduce_mean( tf.square(self.target_values - self.value)) self.policy_loss = -tf.reduce_mean( log_pi_for_action * self.advantages) self.entropy_loss = -tf.reduce_mean( tf.reduce_sum(self.policy * (-log_policy), axis=1)) self.lowlevel_loss = self.value_loss + self.policy_loss + 0.01 * self.entropy_loss local_lowlevel_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'lowlevel/%s' % scope) gradients = tf.gradients(self.lowlevel_loss, local_lowlevel_params) norm_gradients, _ = tf.clip_by_global_norm(gradients, 40.0) lowlevel_trainer = tf.train.RMSPropOptimizer( learning_rate=self.lowlevel_lr) global_lowlevel_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'lowlevel/global') self.lowlevel_update = lowlevel_trainer.apply_gradients( zip(norm_gradients, global_lowlevel_params))
def __init__(self, is_training): z_dim = FLAGS.z_dim self.input_image = tf.placeholder(dtype=tf.float32, shape=[None, 3, 128], name='input_image') self.input_label = tf.placeholder(dtype=tf.float32, shape=[None, 17], name='input_label') self.keep_prob = tf.placeholder(tf.float32) weights_regularizer = slim.l2_regularizer(FLAGS.weight_decay) flatten_hist = tf.reshape(self.input_image, [-1, 3 * 128]) # x = slim.fully_connected(flatten_hist, 256,weights_regularizer=weights_regularizer,scope='encoder/hist/fc_1') # x = slim.fully_connected(x, 256,weights_regularizer=weights_regularizer, scope='encoder/hist/fc_2') # x = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer, scope='encoder/hist/fc_3') # self.image_feature_encoder = x self.image_feature_encoder = flatten_hist #self.image_feature_encoder = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training) ############## Q(z|X) ############### # input_x = tf.concat([self.image_feature_encoder,self.input_label],1) # #input_x = tf.concat([self.input_nlcd,self.input_label],1) # #input_x = slim.dropout(input_x,keep_prob=self.keep_prob,is_training=is_training) # x = slim.fully_connected(input_x, 512,weights_regularizer=weights_regularizer,scope='encoder/fc_1') # x = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer, scope='encoder/fc_2') # # x = slim.fully_connected(x, 499,weights_regularizer=weights_regularizer, scope='encoder/fc_3') # #x = x+input_x # #dropout # #x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training) # self.z_miu = slim.fully_connected(x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer,scope='encoder/z_miu') # z_logvar = slim.fully_connected(x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer,scope='encoder/z_logvar') ############## Sample_z ############### # eps = tf.random_normal(shape=tf.shape(z_miu)) # sample_z = z_miu + tf.exp(z_logvar / 2) * eps #condition = tf.concat([self.input_nlcd,self.image_feature_encoder],1) condition = self.image_feature_encoder x = slim.fully_connected(condition, 512, weights_regularizer=weights_regularizer, scope='condition/fc_1') x = slim.fully_connected(x, 100, weights_regularizer=weights_regularizer, scope='condition/fc_2') # x = slim.fully_connected(x, 399,weights_regularizer=weights_regularizer, scope='condition/fc_3') #x = x+condition self.condition_miu = slim.fully_connected( x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer, scope='condition/z_miu') condition_logvar = slim.fully_connected( x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer, scope='condition/z_logvar') ############## Sample_z ############### eps = tf.random_normal(shape=tf.shape(self.condition_miu)) self.sample_z = self.condition_miu + tf.exp(condition_logvar / 2) * eps ############## P(X|z) ############### flatten_hist = tf.reshape(self.input_image, [-1, 3 * 128]) self.image_feature_decoder = flatten_hist input_x = tf.concat([self.image_feature_decoder, self.sample_z], 1) #x = tf.concat([self.input_nlcd,sample_z],1) x = slim.fully_connected(input_x, 512, weights_regularizer=weights_regularizer, scope='decoder/fc_1') x = slim.fully_connected(x, 100, weights_regularizer=weights_regularizer, scope='decoder/fc_2') # x = slim.fully_connected(x, 499,weights_regularizer=weights_regularizer, scope='decoder/fc_3') #x = x+input_x #dropout x = slim.dropout(x, keep_prob=self.keep_prob, is_training=is_training) self.logits = slim.fully_connected( x, 17, activation_fn=None, weights_regularizer=weights_regularizer, scope='decoder/logits') self.output = tf.sigmoid(self.logits, name='decoder/output') # E[log P(X|z)] self.recon_loss = tf.reduce_mean( tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits( logits=self.logits, labels=self.input_label), 1)) tf.summary.scalar('recon_loss', self.recon_loss)
def get_embd(inputs, config, reuse=tf.AUTO_REUSE, scope='embd_extractor'): is_training_dropout = False is_training_bn = False with tf.variable_scope(scope, reuse=reuse): net = inputs end_points = {} if config['backbone_type'].startswith('resnet_v2_m'): arg_sc = modifiedResNet_v2.resnet_arg_scope( weight_decay=config['weight_decay'], batch_norm_decay=config['bn_decay']) with slim.arg_scope(arg_sc): if config['backbone_type'] == 'resnet_v2_m_50': net, end_points = modifiedResNet_v2.resnet_v2_m_50( net, is_training=is_training_bn, return_raw=True) elif config['backbone_type'] == 'resnet_v2_m_101': net, end_points = modifiedResNet_v2.resnet_v2_m_101( net, is_training=is_training_bn, return_raw=True) elif config['backbone_type'] == 'resnet_v2_m_152': net, end_points = modifiedResNet_v2.resnet_v2_m_152( net, is_training=is_training_bn, return_raw=True) elif config['backbone_type'] == 'resnet_v2_m_200': net, end_points = modifiedResNet_v2.resnet_v2_m_200( net, is_training=is_training_bn, return_raw=True) else: raise ValueError('Invalid backbone type.') elif config['backbone_type'].startswith('resnet_v2'): arg_sc = ResNet_v2.resnet_arg_scope( weight_decay=config['weight_decay'], batch_norm_decay=config['bn_decay']) with slim.arg_scope(arg_sc): if config['backbone_type'] == 'resnet_v2_50': net, end_points = ResNet_v2.resnet_v2_50( net, is_training=is_training_bn, return_raw=True) elif config['backbone_type'] == 'resnet_v2_101': net, end_points = ResNet_v2.resnet_v2_101( net, is_training=is_training_bn, return_raw=True) elif config['backbone_type'] == 'resnet_v2_152': net, end_points = ResNet_v2.resnet_v2_152( net, is_training=is_training_bn, return_raw=True) elif config['backbone_type'] == 'resnet_v2_200': net, end_points = ResNet_v2.resnet_v2_200( net, is_training=is_training_bn, return_raw=True) else: raise ValueError('Invalid backbone type.') if config['out_type'] == 'E': with slim.arg_scope(arg_sc): net = slim.batch_norm(net, activation_fn=None, is_training=is_training_bn) net = slim.dropout(net, keep_prob=config['keep_prob'], is_training=is_training_dropout) net = slim.flatten(net) net = slim.fully_connected(net, config['embd_size'], normalizer_fn=None, activation_fn=None) net = slim.batch_norm(net, scale=False, activation_fn=None, is_training=is_training_bn) end_points['embds'] = net else: raise ValueError('Invalid out type.') return net, end_points
def init_mobilenet_v1(self, param): resolution_multiplier = param['resolution_multiplier'] width_multiplier = param['width_multiplier'] depth_multiplier = param['depth_multiplier'] if 'input_dim' in param: input_dim = param['input_dim'] else: H_W = int(224 * resolution_multiplier) input_dim = [1, H_W, H_W, 3] if 'output_dim' in param: out_dim = param['output_dim'] else: out_dim = 1000 # Define the resolution based on resolution multiplier # [1, 0.858, 0.715, 0.572 ] = [224, 192, 160, 128] input = tf.placeholder(tf.float32, input_dim, name='input_tensor') layer_1_conv = slim.convolution2d(input, round(32 * width_multiplier), [3, 3], stride=2, padding='SAME', scope='conv_1') #layer_1_bn = slim.batch_norm(layer_1_conv, scope='conv_1/batch_norm') layer_2_dw = self.dw_separable(layer_1_conv, 64, width_multiplier, depth_multiplier, sc='conv_ds_2') layer_3_dw = self.dw_separable(layer_2_dw, 128, width_multiplier, depth_multiplier, downsample=True, sc='conv_ds_3') layer_4_dw = self.dw_separable(layer_3_dw, 128, width_multiplier, depth_multiplier, sc='conv_ds_4') layer_5_dw = self.dw_separable(layer_4_dw, 256, width_multiplier, depth_multiplier, downsample=True, sc='conv_ds_5') layer_6_dw = self.dw_separable(layer_5_dw, 256, width_multiplier, depth_multiplier, sc='conv_ds_6') layer_7_dw = self.dw_separable(layer_6_dw, 512, width_multiplier, depth_multiplier, downsample=True, sc='conv_ds_7') # repeatable layers can be put inside a loop layer_8_12_dw = layer_7_dw for i in range(8, 13): layer_8_12_dw = self.dw_separable(layer_8_12_dw, 512, width_multiplier, depth_multiplier, sc='conv_ds_' + str(i)) layer_13_dw = self.dw_separable(layer_8_12_dw, 1024, width_multiplier, depth_multiplier, downsample=True, sc='conv_ds_13') layer_14_dw = self.dw_separable(layer_13_dw, 1024, width_multiplier, depth_multiplier, sc='conv_ds_14') # Pool and reduce to output dimension global_pool = tf.reduce_mean(layer_14_dw, [1, 2], keep_dims=True, name='global_pool') spatial_reduction = tf.squeeze(global_pool, [1, 2], name='SpatialSqueeze') logits = slim.fully_connected(spatial_reduction, out_dim, activation_fn=None, scope='fc_16') output = slim.softmax(logits, scope='Predictions') output = tf.identity(output, name="output_tensor") return {'input': input, 'output': output, 'logits': logits}
labels_t = np.zeros([len(datas_t), len(c_name)]) labels_t[:, idx] = 1 train_data.append(datas_t[:30]) train_data_label.append(labels_t[:30]) test_data.append(datas_t[30:]) test_data_label.append(labels_t[30:]) train_data = np.concatenate(train_data) train_data_label = np.concatenate(train_data_label) test_data = np.concatenate(test_data) test_data_label = np.concatenate(test_data_label) x = tf.placeholder(tf.float32, [None, 4], name="input_x") label = tf.placeholder(tf.float32, [None, 3], name="input_y") # 对于sigmoid激活函数而言,效果可能并不理想 net = slim.fully_connected(x, 4, activation_fn=tf.nn.sigmoid, scope='full1', reuse=False) net = tf.contrib.layers.batch_norm(net) net = slim.fully_connected(net, 8, activation_fn=tf.nn.sigmoid, scope='full2', reuse=False) net = tf.contrib.layers.batch_norm(net) net = slim.fully_connected(net, 8, activation_fn=tf.nn.sigmoid, scope='full3', reuse=False) net = tf.contrib.layers.batch_norm(net)
def build_fastrcnn(self, feature_to_cropped, rois, img_shape): with tf.variable_scope('Fast-RCNN'): # 5. ROI Pooling with tf.variable_scope('rois_pooling'): pooled_features = self.roi_pooling( feature_maps=feature_to_cropped, rois=rois, img_shape=img_shape) # 6. inferecne rois in Fast-RCNN to obtain fc_flatten features if self.base_network_name.startswith('resnet'): fc_flatten = resnet.restnet_head( input=pooled_features, is_training=self.is_training, scope_name=self.base_network_name) elif self.base_network_name.startswith('MobilenetV2'): fc_flatten = mobilenet_v2.mobilenetv2_head( inputs=pooled_features, is_training=self.is_training) else: raise NotImplementedError('only support resnet and mobilenet') # 7. cls and reg in Fast-RCNN with tf.variable_scope('horizen_branch'): with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): cls_score_h = slim.fully_connected( fc_flatten, num_outputs=cfgs.CLASS_NUM + 1, weights_initializer=cfgs.INITIALIZER, activation_fn=None, trainable=self.is_training, scope='cls_fc_h') bbox_pred_h = slim.fully_connected( fc_flatten, num_outputs=(cfgs.CLASS_NUM + 1) * 4, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, trainable=self.is_training, scope='reg_fc_h') # for convient. It also produce (cls_num +1) bboxes cls_score_h = tf.reshape(cls_score_h, [-1, cfgs.CLASS_NUM + 1]) bbox_pred_h = tf.reshape(bbox_pred_h, [-1, 4 * (cfgs.CLASS_NUM + 1)]) with tf.variable_scope('rotation_branch'): with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): cls_score_r = slim.fully_connected( fc_flatten, num_outputs=cfgs.CLASS_NUM + 1, weights_initializer=cfgs.INITIALIZER, activation_fn=None, trainable=self.is_training, scope='cls_fc_r') bbox_pred_r = slim.fully_connected( fc_flatten, num_outputs=(cfgs.CLASS_NUM + 1) * 5, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, trainable=self.is_training, scope='reg_fc_r') # for convient. It also produce (cls_num +1) bboxes cls_score_r = tf.reshape(cls_score_r, [-1, cfgs.CLASS_NUM + 1]) bbox_pred_r = tf.reshape(bbox_pred_r, [-1, 5 * (cfgs.CLASS_NUM + 1)]) return bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r
def netgate(bev_proposal_rois, img_proposal_rois, is_training): ## inputs are bev_rois and img_rois and model config, is_training, and ## it will return the netgate output #size of the roi proposals: #proposal_roi_crop_size = [rpn_config.rpn_proposal_roi_crop_size] * 2 print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") print("bev_proposal_rois is:", bev_proposal_rois) l2_weight_decay_ng = 0.0005 keep_prob_ng = 0.5 num_of_anchors = bev_proposal_rois.get_shape().as_list()[0] ## not sure whether we need this ## set up the regularizer if l2_weight_decay_ng > 0: weights_regularizer_ng = slim.l2_regularizer(l2_weight_decay_ng) else: weights_regularizer_ng = None fused_data = [] #flatten the bev and img rois to be 1*9 bev_roi_flat = slim.flatten(bev_proposal_rois) img_roi_flat = slim.flatten(img_proposal_rois) ##fc layer for bev ##issue: what is the output size of the fully connected layer? input is 3*3 => 1*9 layer_sizes_1 = 9 layer_sizes_2 = 18 layer_sizes_3 = 2 ##issue: what is the def of scope? with slim.arg_scope([slim.fully_connected], weights_regularizer=weights_regularizer_ng): bev_fc_layer = slim.fully_connected(bev_roi_flat, layer_sizes_1, scope='fc_bev') bev_fc_drop = slim.dropout(bev_fc_layer, keep_prob=keep_prob_ng, is_training=is_training, scope='fc_bev') ##fc layer for img img_fc_layer = slim.fully_connected(img_roi_flat, layer_sizes_1, scope='fc_img') img_fc_drop = slim.dropout(img_fc_layer, keep_prob=keep_prob_ng, is_training=is_training, scope='fc_img') ##concatenate bird view and image data concat_bev_img = tf.concat([bev_fc_drop, img_fc_drop], axis=1) ##TODO: another fc layer to output a reduced dimension concat_fc_layer = slim.fully_connected(concat_bev_img, layer_sizes_2, scope='fc_concat') concat_fc_drop = slim.dropout(concat_fc_layer, keep_prob=keep_prob_ng, is_training=is_training, scope='fc_concat') ##issue: what is Rectified Linear Unit in the Netgate paper? Ans: ReLu ##TODO: another fc layer to output a 2-D scalar vector s_b, s_i scalar_fc_layer = slim.fully_connected(concat_fc_drop, layer_sizes_3, scope='fc_scalar') scalar_fc_drop = slim.dropout(scalar_fc_layer, keep_prob=keep_prob_ng, is_training=is_training, scope='fc_scalar') ##TODO: fused data= s_b * bev_fc_drop + s_i * img_fc_drop scalar_0 = scalar_fc_drop[:, 0] scalar_1 = scalar_fc_drop[:, 1] scalar_0 = tf.expand_dims(scalar_0, axis=-1) scalar_1 = tf.expand_dims(scalar_1, axis=-1) fused_data = tf.multiply(scalar_0, bev_fc_drop) + tf.multiply( scalar_1, img_fc_drop) ##v_1 version, reshape to [80k,1,1,512] # fused_data=tf.expand_dims(fused_data,axis=1) # fused_data=tf.expand_dims(fused_data,axis=2) ##v_2 version, reshape to [80k,3,3,1] fused_data = tf.reshape(fused_data, [-1, 3, 3, 1]) # reshape_axis=fused_data.get_shape() # reshape_axis=list(map(int, reshape_axis)) # fused_data=tf.reshape(fused_data,[?,1,1,512]) # bev_result=tf.expand_dims(tf.multiply(scalar_fc_drop[0,0],bev_fc_drop[0]),0) # img_result=tf.expand_dims(tf.multiply(scalar_fc_drop[0,0],img_fc_drop[0]),0) # fused_result=tf.add(bev_result,img_result) # for i in range(1,num_of_anchors): # bev_res=tf.expand_dims(tf.multiply(scalar_fc_drop[i,0],bev_fc_drop[i]),0) # img_res=tf.expand_dims(tf.multiply(scalar_fc_drop[i,0],img_fc_drop[i]),0) # fused_res=tf.add(bev_res,img_res) # fused_result=packup_tensors(fused_result,fused_res) netgate_out = fused_data return netgate_out
def __init__(self, scope, sess, feature_size, globalAC=None): self.sess = sess self.actor_optimizer = tf.train.AdamOptimizer( learning_rate=LR_A, name='RMSPropA') # optimizer for the actor self.critic_optimizer = tf.train.AdamOptimizer( learning_rate=LR_C, name='RMSPropC') # optimizer for the critic with tf.variable_scope(scope): self.inputs = tf.placeholder(shape=[None, feature_size], dtype=tf.float32) l_ac = slim.fully_connected(self.inputs, 256, activation_fn=tf.nn.relu6, biases_initializer=None) self.lstm_cell = tf.nn.rnn_cell.LSTMCell(num_units=256, state_is_tuple=True) c_init = np.zeros((1, self.lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, self.lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, self.lstm_cell.state_size.c], name='c_in') h_in = tf.placeholder(tf.float32, [1, self.lstm_cell.state_size.h], name='h_in') self.state_in = (c_in, h_in) rnn_in = tf.expand_dims(l_ac, [0]) state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn( self.lstm_cell, rnn_in, initial_state=state_in, time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_outputs, [-1, 256]) w_init = tf.random_normal_initializer(0., .1) tanh_init = tf.random_normal_initializer(0., 0.001) with tf.variable_scope('actor'): #Tanh activation function should be initialized with lower weight due to tanh function self.mu = tf.layers.dense(rnn_out, N_A, tf.nn.tanh, kernel_initializer=tanh_init, name='mu') # estimated action value self.sigma = tf.layers.dense( rnn_out, N_A, tf.nn.softplus, kernel_initializer=w_init, name='sigma') # estimated variance with tf.variable_scope('critic'): self.v = tf.layers.dense(rnn_out, 1, kernel_initializer=w_init, name='v') # estimated value for state self.a_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) + tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/actor') self.c_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) + tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/critic') if scope != GLOBAL_NET_SCOPE: # get global network self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget') self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A') self.td = tf.subtract(self.v_target, self.v, name='TD_error') self.c_loss = tf.reduce_mean(tf.square(self.td)) self.mu, self.sigma = tf.squeeze( self.mu * 1), tf.squeeze(self.sigma + 0.1) normal_dist = tf.contrib.distributions.Normal( self.mu, self.sigma) log_prob = normal_dist.log_prob(self.a_his) exp_v = log_prob * self.td entropy = normal_dist.entropy() # encourage exploration self.exp_v = ENTROPY_BETA * entropy + exp_v self.a_loss = tf.reduce_mean( (-self.exp_v) + (self.a_his * 0.01)**2) #this should penalize big changes #self.a_loss = tf.reduce_mean(-self.exp_v) self.A = tf.clip_by_value( tf.squeeze(normal_dist.sample(1), axis=0), A_BOUND[0], A_BOUND[1]) # sample a action from distribution self.a_grads = tf.gradients( self.a_loss, self.a_params ) # calculate gradients for the network weights self.c_grads = tf.gradients(self.c_loss, self.c_params) self.pull_a_params_op = [ l_p.assign(g_p) for l_p, g_p in zip(self.a_params, globalAC.a_params) ] self.pull_c_params_op = [ l_p.assign(g_p) for l_p, g_p in zip(self.c_params, globalAC.c_params) ] self.update_a_op = self.actor_optimizer.apply_gradients( zip(self.a_grads, globalAC.a_params)) self.update_c_op = self.critic_optimizer.apply_gradients( zip(self.c_grads, globalAC.c_params))
def Discriminator_separable_rotations( poses, shapes, weight_decay, ): """ 23 Discriminators on each joint + 1 for all joints + 1 for shape. To share the params on rotations, this treats the 23 rotation matrices as a "vertical image": Do 1x1 conv, then send off to 23 independent classifiers. Input: - poses: N x 23 x 1 x 9, NHWC ALWAYS!! - shapes: N x 10 - weight_decay: float Outputs: - prediction: N x (1+23) or N x (1+23+1) if do_joint is on. - variables: tf variables """ data_format = "NHWC" with tf.name_scope(None, "Discriminator_sep_rotations", [poses, shapes]): with tf.variable_scope("D") as scope: with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_regularizer=slim.l2_regularizer(weight_decay)): with slim.arg_scope([slim.conv2d], data_format=data_format): poses = slim.conv2d(poses, 32, [1, 1], scope='D_conv1') poses = slim.conv2d(poses, 32, [1, 1], scope='D_conv2') theta_out = [] for i in range(0, 23): theta_out.append( slim.fully_connected( poses[:, i, :, :], 1, activation_fn=None, scope="pose_out_j%d" % i)) theta_out_all = tf.squeeze(tf.stack(theta_out, axis=1)) # Do shape on it's own: shapes = slim.stack( shapes, slim.fully_connected, [10, 5], scope="shape_fc1") shape_out = slim.fully_connected( shapes, 1, activation_fn=None, scope="shape_final") """ Compute joint correlation prior!""" nz_feat = 1024 poses_all = slim.flatten(poses, scope='vectorize') poses_all = slim.fully_connected( poses_all, nz_feat, scope="D_alljoints_fc1") poses_all = slim.fully_connected( poses_all, nz_feat, scope="D_alljoints_fc2") poses_all_out = slim.fully_connected( poses_all, 1, activation_fn=None, scope="D_alljoints_out") out = tf.concat([theta_out_all, poses_all_out, shape_out], 1) variables = tf.contrib.framework.get_variables(scope) return out, variables
def _build_network_graph(self): with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, padding='SAME', weights_initializer=tf.truncated_normal_initializer( self.mu, self.sigma), weights_regularizer=slim.l2_regularizer(0.0005)): # 112 * 112 * 64 net = slim.conv2d(self.x, 64, [7, 7], stride=2, scope='conv1') # 56 * 56 * 64 net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') temp = net # 第一残差块 net = slim.conv2d(net, 64, [3, 3], scope='conv2_1_1') net = slim.conv2d(net, 64, [3, 3], scope='conv2_1_2') # 残差相加 net = tf.nn.relu(tf.add(temp, net)) temp = net # 残差块 net = slim.conv2d(net, 64, [3, 3], scope='conv2_2_1') net = slim.conv2d(net, 64, [3, 3], scope='conv2_2_2') # 残差相加 net = tf.nn.relu(tf.add(temp, net)) temp = net # 28 * 28 * 128 temp = slim.conv2d(temp, 128, [1, 1], stride=2, scope='r1') # 第二残差块 net = slim.conv2d(net, 128, [3, 3], stride=2, scope='conv3_1_1') net = slim.conv2d(net, 128, [3, 3], scope='conv3_1_2') # 残差相加 net = tf.nn.relu(tf.add(temp, net)) temp = net # 残差块 net = slim.conv2d(net, 128, [3, 3], scope='conv3_2_1') net = slim.conv2d(net, 128, [3, 3], scope='conv3_2_2') # 残差相加 net = tf.nn.relu(tf.add(temp, net)) temp = net # 14 * 14 * 256 temp = slim.conv2d(temp, 256, [1, 1], stride=2, scope='r2') # 第三残差块 net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv4_1_1') net = slim.conv2d(net, 256, [3, 3], scope='conv4_1_2') # 残差相加 net = tf.nn.relu(tf.add(temp, net)) temp = net # 残差块 net = slim.conv2d(net, 256, [3, 3], scope='conv4_2_1') net = slim.conv2d(net, 256, [3, 3], scope='conv4_2_2') # 残差相加 net = tf.nn.relu(tf.add(temp, net)) temp = net # 7 * 7 * 512 temp = slim.conv2d(temp, 512, [1, 1], stride=2, scope='r3') # 第四残差块 net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv5_1_1') net = slim.conv2d(net, 512, [3, 3], scope='conv5_1_2') # 残差相加 net = tf.nn.relu(tf.add(temp, net)) temp = net # 残差块 net = slim.conv2d(net, 512, [3, 3], scope='conv5_2_1') net = slim.conv2d(net, 512, [3, 3], scope='conv5_2_2') # 残差相加 net = tf.nn.relu(tf.add(temp, net)) net = slim.avg_pool2d(net, [7, 7], stride=1, scope='pool2') net = slim.flatten(net, scope='flatten') fc1 = slim.fully_connected(net, 1000, scope='fc1') self.logits = slim.fully_connected(fc1, 101, activation_fn=None, scope='fc2') self.y_predicted = tf.nn.softmax(self.logits)
def __init__(self, scope, trainer): with tf.variable_scope(scope): self.input_image = tf.placeholder( shape=[None, a3c_constants.OBSERVATION_SIZE], dtype=tf.float32) #self.input_goals = tf.placeholder(shape=[None, a3c_constants.GOAL_SIZE], dtype=tf.float32) self.input_vars = tf.placeholder( shape=[None, a3c_constants.VAR_SIZE], dtype=tf.float32) self.imageIn = tf.reshape(self.input_image, shape=[ -1, a3c_constants.FRAME_SIZE[0], a3c_constants.FRAME_SIZE[1], a3c_constants.FRAME_SIZE[2] ]) ''' # Input and visual encoding layers self.conv1 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.imageIn, num_outputs=16, kernel_size=[8, 8], stride=[4, 4], padding='VALID') self.conv2 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.conv1, num_outputs=32, kernel_size=[4, 4], stride=[2, 2], padding='VALID') hidden = slim.fully_connected(slim.flatten(self.conv2), 256, activation_fn=tf.nn.elu) ''' self.conv1 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.imageIn, num_outputs=32, kernel_size=[8, 8], stride=[4, 4], padding='VALID') self.conv2 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.conv1, num_outputs=64, kernel_size=[4, 4], stride=[2, 2], padding='VALID') self.conv3 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.conv1, num_outputs=64, kernel_size=[3, 3], stride=[1, 1], padding='VALID') self.conv_flat = slim.flatten(self.conv3) #self.concat = tf.concat([self.conv_flat, self.input_goals, self.input_vars], 1) self.concat = tf.concat([self.conv_flat, self.input_vars], 1) self.hidden = slim.fully_connected(self.concat, 1024, activation_fn=tf.nn.elu) # Recurrent network for temporal dependencies lstm_cell = tf.contrib.rnn.BasicLSTMCell(1024, state_is_tuple=True) c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) self.state_in = (c_in, h_in) rnn_in = tf.expand_dims(self.hidden, [0]) step_size = tf.shape(self.imageIn)[:1] state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn( lstm_cell, rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_outputs, [-1, 1024]) # Output layers for policy and value estimations self.policy = slim.fully_connected( rnn_out, a3c_constants.ACTIONS_SIZE, activation_fn=tf.nn.softmax, weights_initializer=a3c_helpers.normalized_columns_initializer( 0.01), biases_initializer=None) self.value = slim.fully_connected( rnn_out, 1, activation_fn=None, weights_initializer=a3c_helpers.normalized_columns_initializer( 1.0), biases_initializer=None) # Only the worker network need ops for loss functions and gradient updating. if scope != 'global': self.actions = tf.placeholder(shape=[None], dtype=tf.int32) self.actions_onehot = tf.one_hot(self.actions, a3c_constants.ACTIONS_SIZE, dtype=tf.float32) self.target_v = tf.placeholder(shape=[None], dtype=tf.float32) self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) self.responsible_outputs = tf.reduce_sum( self.policy * self.actions_onehot, [1]) # Loss functions self.value_loss = 0.5 * tf.reduce_sum( tf.square(self.target_v - tf.reshape(self.value, [-1]))) self.entropy = -tf.reduce_sum( self.policy * tf.log(self.policy)) self.policy_loss = -tf.reduce_sum( tf.log(self.responsible_outputs) * self.advantages) self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01 # Get gradients from local network using local losses local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm( self.gradients, 40.0) # Apply local gradients to global network global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients( zip(grads, global_vars))
def inception_resnet(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnet'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnet', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 5, block35, scale=0.17) # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 192, 192, 256, 384) end_points['Mixed_6a'] = net # 10 x Inception-Resnet-B net = slim.repeat(net, 10, block17, scale=0.10) # Reduction-B with tf.variable_scope('Mixed_7a'): net = reduction_b(net) end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 5, block8, scale=0.20) net = block8(net, activation_fn=None) with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def apply_lstm(train_size, data, captions, weights, dim_hidden, dim_emb, dim_embed2, lstm, n_lstm_steps, n_words, wordtoix, embedding, class_inf, net): data = da.transform_data(data, is_training=True) data = apply_network_img(data, tf.shape(data)[0], is_training_net=False, is_training_drop=False, net=net) mid = data.get_shape()[1] data = tf.reshape(data, [-1, mid * mid, int(data.shape[3])]) state = get_initial_lstm(tf.reduce_mean(data, 1), dim_hidden, True) current_caption_ind = tf.contrib.lookup.string_to_index(captions, wordtoix) embedding_map = tf.get_variable( name="map", shape=[n_words, dim_embed2], initializer=tf.constant_initializer(embedding), trainable=False) word_embedding = tf.zeros([tf.shape(data)[0], dim_embed2]) total_loss = 0.0 with tf.variable_scope("RNN", reuse=tf.AUTO_REUSE): image_embedding, att_weights = att.attention(data, state, int(data.shape[2]), Flags.ratio, 1, mid) current_embedding = tf.concat([image_embedding, word_embedding], axis=-1) for i in range(1, n_lstm_steps - 1): _, state = lstm(current_embedding, state) logits = slim.dropout(state, 0.5, is_training=True, scope='drop') with tf.variable_scope("Caption", reuse=tf.AUTO_REUSE): #perform a softmax classification to generate the next word in the caption if class_inf == 1: logit = slim.fully_connected(logits, n_words, activation_fn=None, normalizer_fn=None, scope='word_encoding') else: logit = slim.fully_connected(logits, dim_embed2, activation_fn=None, normalizer_fn=None, scope='state_encoding') context = slim.fully_connected(image_embedding, dim_embed2, activation_fn=None, normalizer_fn=None, scope='context') logit += context logit = tf.nn.tanh(logit) logit = slim.dropout(logit, 0.5, is_training=True, scope='drop_combo') logit = slim.fully_connected(logit, n_words, activation_fn=None, normalizer_fn=None, scope='word_encoding') ## onehot = tf.one_hot( tf.squeeze( tf.slice(current_caption_ind, [0, i], [tf.shape(data)[0], 1]), 1), n_words) # if i == 1: one_hot_path = onehot probs_path = tf.nn.softmax(logit) else: one_hot_path += onehot probs_path += tf.nn.softmax(logit) weight = tf.matmul(onehot, tf.cast(tf.expand_dims(weights, 1), tf.float32), transpose_b=False) weight = train_size / weight weight = tf.squeeze(weight / tf.reduce_mean(weight)) # xentropy = tf.losses.softmax_cross_entropy( onehot, logits=logit, weights=1.0, loss_collection=None, reduction=tf.losses.Reduction.NONE) total_loss += (xentropy) if i == 2: error = tf.argmax(logit, 1) gt = tf.squeeze( tf.slice(current_caption_ind, [0, i], [tf.shape(data)[0], 1]), 1) word_embedding = tf.nn.embedding_lookup( embedding_map, tf.squeeze( tf.slice(current_caption_ind, [0, i], [tf.shape(data)[0], 1]), 1)) image_embedding, att_weights = att.attention( data, state, int(data.shape[2]), Flags.ratio, True, mid) current_embedding = tf.concat([image_embedding, word_embedding], axis=-1) one_hot_path = tf.nn.l2_normalize(one_hot_path, 1) probs_path = tf.nn.l2_normalize(probs_path, 1) total_loss = tf.reduce_mean(total_loss) / (n_lstm_steps - 2) path_loss = tf.losses.cosine_distance(one_hot_path, probs_path, axis=1, loss_collection=None) total_loss += path_loss return total_loss, error, gt
def main(args): network = importlib.import_module(args.model_def) image_size = (args.image_size, args.image_size) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) stat_file_name = os.path.join(log_dir, 'stat.h5') # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory src_path,_ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) dataset = facenet.get_dataset(args.data_dir) if args.filter_filename: dataset = filter_dataset(dataset, os.path.expanduser(args.filter_filename), args.filter_percentile, args.filter_min_nrof_images_per_class) if args.validation_set_split_ratio>0.0: train_set, val_set = facenet.split_dataset(dataset, args.validation_set_split_ratio, args.min_nrof_val_images_per_class, 'SPLIT_IMAGES') else: train_set, val_set = dataset, [] nrof_classes = len(train_set) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) assert len(image_list)>0, 'The training set should not be empty' val_image_list, val_label_list = facenet.get_image_paths_and_labels(val_set) # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many(args.batch_size*args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels') control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control') nrof_preprocess_threads = 4 input_queue = data_flow_ops.FIFOQueue(capacity=2000000, dtypes=[tf.string, tf.int32, tf.int32], shapes=[(1,), (1,), (1,)], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder, control_placeholder], name='enqueue_op') image_batch, label_batch = facenet.create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print('Number of classes in training set: %d' % nrof_classes) print('Number of examples in training set: %d' % len(image_list)) print('Number of classes in validation set: %d' % len(val_set)) print('Number of examples in validation set: %d' % len(val_image_list)) print('Building training graph') # Build the inference graph prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) logits = slim.fully_connected(prelogits, len(train_set), activation_fn=None, weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Norm for the prelogits eps = 1e-4 prelogits_norm = tf.reduce_mean(tf.norm(tf.abs(prelogits)+eps, ord=args.prelogits_norm_p, axis=1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_norm * args.prelogits_norm_loss_factor) # # Add center loss # prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes) # tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # # Calculate the average cross entropy loss across the batch # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( # labels=tf.reshape(label_batch, label_batch.get_shape()[1], label_batch.get_shape()[2]), logits=logits, name='cross_entropy_per_example') # cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') # tf.add_to_collection('losses', cross_entropy_mean) #TODO calculate cos_loss cos_loss = facenet.cos_loss(prelogits,label_batch, len(train_set), alpha=args.cos_loss_alfa,scale=args.cos_loss_scale) tf.add_to_collection('losses', cos_loss) correct_prediction = tf.cast(tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) accuracy = tf.reduce_mean(correct_prediction) # Calculate the total losses regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cos_loss] + regularization_losses, name='total_loss') # lining add start############################## # output trainable_variables # for ele1 in tf.trainable_variables(): # print("trainable_variables_00: " + ele1.name) # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) saver.restore(sess, pretrained_model) # Training and validation loop print('Running training') nrof_steps = args.max_nrof_epochs*args.epoch_size nrof_val_samples = int(math.ceil(args.max_nrof_epochs / args.validate_every_n_epochs)) # Validate every validate_every_n_epochs as well as in the last epoch stat = { 'loss': np.zeros((nrof_steps,), np.float32), 'center_loss': np.zeros((nrof_steps,), np.float32), 'reg_loss': np.zeros((nrof_steps,), np.float32), 'xent_loss': np.zeros((nrof_steps,), np.float32), 'prelogits_norm': np.zeros((nrof_steps,), np.float32), 'accuracy': np.zeros((nrof_steps,), np.float32), 'val_loss': np.zeros((nrof_val_samples,), np.float32), 'val_xent_loss': np.zeros((nrof_val_samples,), np.float32), 'val_accuracy': np.zeros((nrof_val_samples,), np.float32), 'lfw_accuracy': np.zeros((args.max_nrof_epochs,), np.float32), 'lfw_valrate': np.zeros((args.max_nrof_epochs,), np.float32), 'learning_rate': np.zeros((args.max_nrof_epochs,), np.float32), 'time_train': np.zeros((args.max_nrof_epochs,), np.float32), 'time_validate': np.zeros((args.max_nrof_epochs,), np.float32), 'time_evaluate': np.zeros((args.max_nrof_epochs,), np.float32), 'prelogits_hist': np.zeros((args.max_nrof_epochs, 1000), np.float32), } for epoch in range(1,args.max_nrof_epochs+1): step = sess.run(global_step, feed_dict=None) # Train for one epoch t = time.time() cont = train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, stat, cos_loss, accuracy, learning_rate, prelogits, args.random_rotate, args.random_crop, args.random_flip, prelogits_norm, args.prelogits_hist_max, args.use_fixed_image_standardization) stat['time_train'][epoch-1] = time.time() - t if not cont: break t = time.time() if len(val_image_list)>0 and ((epoch-1) % args.validate_every_n_epochs == args.validate_every_n_epochs-1 or epoch==args.max_nrof_epochs): validate(args, sess, epoch, val_image_list, val_label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, phase_train_placeholder, batch_size_placeholder, stat, total_loss, regularization_losses, cos_loss, accuracy, args.validate_every_n_epochs, args.use_fixed_image_standardization) stat['time_validate'][epoch-1] = time.time() - t # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, epoch) # Evaluate on LFW t = time.time() if args.lfw_dir: evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, stat, epoch, args.lfw_distance_metric, args.lfw_subtract_mean, args.lfw_use_flipped_images, args.use_fixed_image_standardization) stat['time_evaluate'][epoch-1] = time.time() - t print('Saving statistics') with h5py.File(stat_file_name, 'w') as f: for key, value in stat.iteritems(): f.create_dataset(key, data=value) return model_dir
def build_model(self): """ :return: """ """ Helper Variables """ #self.global_step_tensor = tf.Variable(0, trainable=False, name='global_step') #self.global_step_inc = self.global_step_tensor.assign(self.global_step_tensor + 1) self.global_epoch_tensor = tf.Variable(0, trainable=False, name='global_epoch') self.global_epoch_inc = self.global_epoch_tensor.assign( self.global_epoch_tensor + 1) """ Inputs to the network """ with tf.variable_scope('inputs'): self.x, self.y = self.data_loader.get_input() self.is_training = tf.placeholder(tf.bool, name='Training_flag') tf.add_to_collection('inputs', self.x) tf.add_to_collection('inputs', self.y) tf.add_to_collection('inputs', self.is_training) """ Network Architecture """ with tf.variable_scope('network'): net = slim.conv2d(self.x, 20, [5, 5], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.conv2d(net, 50, [5, 5], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.flatten(net, scope='flatten3') net = slim.fully_connected(net, 500, scope='fc4') with tf.variable_scope('out'): self.out = slim.fully_connected(net, self.num_classes, activation_fn=None) tf.add_to_collection('out', self.out) with tf.variable_scope('out_argmax'): self.out_argmax = tf.argmax(self.out, axis=-1, output_type=tf.int64, name='out_argmax') with tf.variable_scope('loss-acc'): self.loss = tf.losses.sparse_softmax_cross_entropy(labels=self.y, logits=self.out) self.acc = tf.reduce_mean( tf.cast(tf.equal(self.y, self.out_argmax), tf.float32)) with tf.variable_scope('train_step'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_step = self.optimizer.minimize( self.loss, global_step=self.global_step_tensor) tf.add_to_collection('train', self.train_step) tf.add_to_collection('train', self.loss) tf.add_to_collection('train', self.acc)
def _estimate(image): def _xavier_init(num_in, num_out): stddev = np.sqrt(4. / (num_in + num_out)) return tf.truncated_normal_initializer(stddev=stddev) def _constrain_confidence(belief): estimate, confidence = tf.unstack(belief, axis=3) return tf.stack([estimate, tf.nn.sigmoid(confidence)], axis=3) beliefs = [] net = image with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.conv2d_transpose], activation_fn=tf.nn.elu, biases_initializer=tf.constant_initializer(0), reuse=tf.AUTO_REUSE): last_output_channels = 3 with slim.arg_scope([slim.conv2d], stride=1, padding='VALID'): for index, output in enumerate([(32, [7, 7]), (48, [7, 7]), (64, [5, 5]), (64, [5, 5])]): channels, filter_size = output net = slim.conv2d(net, channels, filter_size, scope='mapper_conv_{}'.format(index), weights_initializer=_xavier_init( np.prod(filter_size) * last_output_channels, channels)) last_output_channels = channels net = slim.fully_connected( net, 200, scope='mapper_fc', weights_initializer=_xavier_init( last_output_channels, 200)) last_output_channels = 200 with slim.arg_scope([slim.conv2d_transpose], stride=1, padding='SAME'): for index, output in enumerate((64, 32, 2)): net = slim.conv2d_transpose( net, output, [7, 7], scope='mapper_deconv_{}'.format(index), weights_initializer=_xavier_init( 7 * 7 * last_output_channels, output)) last_output_channels = output beliefs.append(net) for i in xrange(estimate_scale - 1): net = slim.conv2d_transpose( net, 2, [6, 6], weights_initializer=_xavier_init( 6 * 6 * last_output_channels, 2), scope='mapper_upscale_{}'.format(i)) last_output_channels = 2 beliefs.append(self._upscale_image(net)) return [_constrain_confidence(belief) for belief in beliefs]
def __init__(self, s_size, a_size, scope, trainer): with tf.variable_scope(scope): print("Scope", scope) # Input and visual encoding layers self.inputs = tf.placeholder(shape=[None, s_size], dtype=tf.float32) self.imageIn = tf.reshape(self.inputs, shape=[-1, 84, 84, 1]) self.conv1 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.imageIn, num_outputs=16, kernel_size=[8, 8], stride=[4, 4], padding='VALID') self.conv2 = slim.conv2d(activation_fn=tf.nn.elu, inputs=self.conv1, num_outputs=32, kernel_size=[4, 4], stride=[2, 2], padding='VALID') hidden = slim.fully_connected(slim.flatten(self.conv2), 256, activation_fn=tf.nn.elu) # Recurrent network for temporal dependencies lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(256, state_is_tuple=True) c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) self.state_in = (c_in, h_in) rnn_in = tf.expand_dims(hidden, [0]) step_size = tf.shape(self.imageIn)[:1] state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn( lstm_cell, rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_outputs, [-1, 256]) self.policy = slim.fully_connected( rnn_out, a_size, activation_fn=tf.nn.softmax, weights_initializer=normalized_columns_initializer(0.01), biases_initializer=None) self.value = slim.fully_connected( rnn_out, 1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None) # Only the worker network need ops for loss functions and gradient updating. if scope != 'global': self.actions = tf.placeholder( shape=[None], dtype=tf.int32) # Index of actions taken self.actions_onehot = tf.one_hot( self.actions, a_size, dtype=tf.float32) # 1-hot tensor of actions taken self.target_v = tf.placeholder( shape=[None], dtype=tf.float32) # Target Value self.advantages = tf.placeholder( shape=[None], dtype=tf.float32) # temporary difference (R-V) self.log_policy = tf.log( tf.clip_by_value(self.policy, 1e-20, 1.0) ) # avoid NaN with clipping when value in policy becomes zero self.responsible_outputs = tf.reduce_sum( self.log_policy * self.actions_onehot, [1]) # Get policy*actions influence self.r_minus_v = self.target_v - tf.reshape( self.value, [-1]) # difference between target value and actual value # Loss functions self.value_loss = 0.5 * tf.reduce_sum(tf.square( self.r_minus_v)) # same as tf.nn.l2_loss(r_minus_v) self.entropy = -tf.reduce_sum( self.policy * self.log_policy) # policy entropy self.policy_loss = -tf.reduce_sum( self.responsible_outputs * self.advantages) # policy loss # Learning rate for Critic is half of Actor's, so value_loss/2 + policy loss self.loss = 0.5 * self.value_loss + self.policy_loss - self.entropy * 0.01 # Get gradients from local network using local losses self.local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss, self.local_vars) self.var_norms = tf.global_norm(self.local_vars) grads, self.grad_norms = tf.clip_by_global_norm( self.gradients, 40.0) # Apply local gradients to global network global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients( zip(grads, global_vars))