def sequence_rnn(rnn_cell, word_list, word_model, first_train=True): feed_dict = {} state = rnn_cell.zero_state(1, tf.float32) counter = 0 outputs = [] states = [] inputs = [] for word in word_list: word_vec = word_model.get_vector(word) if word_vec is not None: counter += 1 if counter > 1 or first_train==False: tf.get_variable_scope().reuse_variables() input = tf.placeholder('float', (1, word_model.dim)) word_vec = word_vec.reshape((1, word_model.dim)) feed_dict[input] = word_vec output_state = rnn_cell(input, state) #inputs.append(input) (output, state) = output_state states.append(state) outputs.append(output) return states[-1], feed_dict
def testAtrousFullyConvolutionalValues(self): """Verify dense feature extraction with atrous convolution.""" nominal_stride = 32 for output_stride in [4, 8, 16, 32, None]: with slim.arg_scope(resnet_utils.resnet_arg_scope()): with tf.Graph().as_default(): with self.test_session() as sess: tf.set_random_seed(0) inputs = create_test_input(2, 81, 81, 3) # Dense feature extraction followed by subsampling. output, _ = self._resnet_small(inputs, None, is_training=False, global_pool=False, output_stride=output_stride) if output_stride is None: factor = 1 else: factor = nominal_stride // output_stride output = resnet_utils.subsample(output, factor) # Make the two networks use the same weights. tf.get_variable_scope().reuse_variables() # Feature extraction at the nominal network rate. expected, _ = self._resnet_small(inputs, None, is_training=False, global_pool=False) sess.run(tf.global_variables_initializer()) self.assertAllClose(output.eval(), expected.eval(), atol=1e-4, rtol=1e-4)
def tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, loop_function=None, dtype=tf.float32, scope=None): """RNN sequence-to-sequence model with tied encoder and decoder parameters. This model first runs an RNN to encode encoder_inputs into a state vector, and then runs decoder, initialized with the last encoder state, on decoder_inputs. Encoder and decoder use the same RNN cell and share parameters. Args: encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. cell: rnn_cell.RNNCell defining the cell function and size. loop_function: if not None, this function will be applied to i-th output in order to generate i+1-th input, and decoder_inputs will be ignored, except for the first element ("GO" symbol), see rnn_decoder for details. dtype: The dtype of the initial state of the rnn cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "tied_rnn_seq2seq". Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x cell.output_size] containing the generated outputs. states: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. Each item is a 2D Tensor of shape [batch_size x cell.state_size]. """ with tf.variable_scope("combined_tied_rnn_seq2seq"): scope = scope or "tied_rnn_seq2seq" _, enc_states = rnn.rnn( cell, encoder_inputs, dtype=dtype, scope=scope) tf.get_variable_scope().reuse_variables() return rnn_decoder(decoder_inputs, enc_states[-1], cell, loop_function=loop_function, scope=scope)
def sequence_rnn_pad(rnn_cell, input_dim, length=50, first_train=True): state = rnn_cell.zero_state(1, tf.float32) outputs = [] inputs = [] states = [] flags = [] for i in range(length): if i > 0 or first_train == False: tf.get_variable_scope().reuse_variables() input = tf.placeholder('float', (1, input_dim)) inputs.append(input) output_state = rnn_cell(input, state) (output, state) = output_state flag = tf.placeholder('float', (1, rnn_cell.state_size)) state = flag * state flags.append(flag) # flag = tf.placeholder(tf.types.float32) # flags.append(flag) # state = flag * state states.append(state) outputs.append(output) # for i in range(length): # flag = tf.Variable(0) # flags.append(flag) # states[i] = flag * states[i] return inputs, outputs, states, flags
def make_net(self, input_images, input_measurements, input_actions, input_objectives, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() self.fc_val_params = np.copy(self.fc_joint_params) self.fc_val_params['out_dims'][-1] = self.target_dim self.fc_adv_params = np.copy(self.fc_joint_params) self.fc_adv_params['out_dims'][-1] = len(self.net_discrete_actions) * self.target_dim p_img_conv = my_ops.conv_encoder(input_images, self.conv_params, 'p_img_conv', msra_coeff=0.9) p_img_fc = my_ops.fc_net(my_ops.flatten(p_img_conv), self.fc_img_params, 'p_img_fc', msra_coeff=0.9) p_meas_fc = my_ops.fc_net(input_measurements, self.fc_meas_params, 'p_meas_fc', msra_coeff=0.9) if isinstance(self.fc_obj_params, np.ndarray): p_obj_fc = my_ops.fc_net(input_objectives, self.fc_obj_params, 'p_obj_fc', msra_coeff=0.9) p_concat_fc = tf.concat([p_img_fc,p_meas_fc,p_obj_fc], 1) else: p_concat_fc = tf.concat([p_img_fc,p_meas_fc], 1) if self.random_objective_coeffs: raise Exception('Need fc_obj_params with randomized objectives') p_val_fc = my_ops.fc_net(p_concat_fc, self.fc_val_params, 'p_val_fc', last_linear=True, msra_coeff=0.9) p_adv_fc = my_ops.fc_net(p_concat_fc, self.fc_adv_params, 'p_adv_fc', last_linear=True, msra_coeff=0.9) adv_reshape = tf.reshape(p_adv_fc, [-1, len(self.net_discrete_actions), self.target_dim]) pred_all_nomean = adv_reshape - tf.reduce_mean(adv_reshape, reduction_indices=1, keep_dims=True) pred_all = pred_all_nomean + tf.reshape(p_val_fc, [-1, 1, self.target_dim]) pred_relevant = tf.boolean_mask(pred_all, tf.cast(input_actions, tf.bool)) return pred_all, pred_relevant
def build(self, FLAGS): """None Build the model graph :return: """ with tf.name_scope('G_'): self.predict_g = self.__G__() with tf.name_scope('D_'): self.predict_d, self.predict_d_logits = self.__D__(self.input_d, input_type="Real") tf.get_variable_scope().reuse_variables() self.predict_d_for_g, self.predict_d_logits_for_g = self.__D__(self.predict_g, input_type="Gen") if len(self.regularization_values_d) > 0: self.regularization_sum_d = sum(self.regularization_values_d) with tf.name_scope('loss'): # self.loss_g = self.__loss_g__(predict=self.predict_g, self.labels, reg=self.regularization_sum) self.__loss__(FLAGS) with tf.name_scope('training'): self.train_op_d, self.train_op_g = self.__training__(learning_rate=FLAGS.learning_rate) with tf.name_scope('evaluation'): # Calculate accuracy L2 norm self.evaluation = self.__evaluation__(predict=self.predict_g, labels=self.labels)
def inference(x, n_batch, maxlen=None, n_hidden=None, n_out=None): def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.01) return tf.Variable(initial) def bias_variable(shape): initial = tf.zeros(shape, dtype=tf.float32) return tf.Variable(initial) cell = tf.contrib.rnn.GRUCell(n_hidden) initial_state = cell.zero_state(n_batch, tf.float32) state = initial_state outputs = [] # 과거의 은닉층에서 나온 출력을 저장한다 with tf.variable_scope('GRU'): for t in range(maxlen): if t > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(x[:, t, :], state) outputs.append(cell_output) output = outputs[-1] V = weight_variable([n_hidden, n_out]) c = bias_variable([n_out]) y = tf.matmul(output, V) + c # 선형활성 return y
def discriminator(X, reuse=False): with tf.variable_scope('discriminator'): if reuse: tf.get_variable_scope().reuse_variables() K = 64 M = 128 N = 256 W1 = tf.get_variable('D_W1', [4, 4, 1, K], initializer=tf.random_normal_initializer(stddev=0.1)) B1 = tf.get_variable('D_B1', [K], initializer=tf.constant_initializer()) W2 = tf.get_variable('D_W2', [4, 4, K, M], initializer=tf.random_normal_initializer(stddev=0.1)) B2 = tf.get_variable('D_B2', [M], initializer=tf.constant_initializer()) W3 = tf.get_variable('D_W3', [7*7*M, N], initializer=tf.random_normal_initializer(stddev=0.1)) B3 = tf.get_variable('D_B3', [N], initializer=tf.constant_initializer()) W4 = tf.get_variable('D_W4', [N, 1], initializer=tf.random_normal_initializer(stddev=0.1)) B4 = tf.get_variable('D_B4', [1], initializer=tf.constant_initializer()) X = tf.reshape(X, [-1, 28, 28, 1], 'reshape') conv1 = conv(X, W1, B1, stride=2, name='conv1') bn1 = tf.contrib.layers.batch_norm(conv1) conv2 = conv(tf.nn.dropout(lrelu(bn1), 0.4), W2, B2, stride=2, name='conv2') # conv2 = conv(lrelu(conv1), W2, B2, stride=2, name='conv2') bn2 = tf.contrib.layers.batch_norm(conv2) flat = tf.reshape(tf.nn.dropout(lrelu(bn2), 0.4), [-1, 7*7*M], name='flat') # flat = tf.reshape(lrelu(conv2), [-1, 7*7*M], name='flat') dense = lrelu(tf.matmul(flat, W3) + B3) logits = tf.matmul(dense, W4) + B4 prob = tf.nn.sigmoid(logits) return prob, logits
def testModelWithBucketsScopeAndLoss(self): """Test that variable scope reuse is not reset after model_with_buckets.""" classes = 10 buckets = [(4, 4), (8, 8)] with self.test_session(): # Here comes a sample Seq2Seq model using GRU cells. def SampleGRUSeq2Seq(enc_inp, dec_inp, weights, per_example_loss): """Example sequence-to-sequence model that uses GRU cells.""" def GRUSeq2Seq(enc_inp, dec_inp): cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.GRUCell(24)] * 2) return tf.nn.seq2seq.embedding_attention_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=classes, num_decoder_symbols=classes, embedding_size=24) targets = [dec_inp[i+1] for i in range(len(dec_inp) - 1)] + [0] return tf.nn.seq2seq.model_with_buckets( enc_inp, dec_inp, targets, weights, buckets, GRUSeq2Seq, per_example_loss=per_example_loss) # Now we construct the copy model. inp = [tf.placeholder(tf.int32, shape=[None]) for _ in range(8)] out = [tf.placeholder(tf.int32, shape=[None]) for _ in range(8)] weights = [tf.ones_like(inp[0], dtype=tf.float32) for _ in range(8)] with tf.variable_scope("root"): _, losses1 = SampleGRUSeq2Seq(inp, out, weights, per_example_loss=False) # Now check that we did not accidentally set reuse. self.assertEqual(False, tf.get_variable_scope().reuse) # Construct one more model with per-example loss. tf.get_variable_scope().reuse_variables() _, losses2 = SampleGRUSeq2Seq(inp, out, weights, per_example_loss=True) # First loss is scalar, the second one is a 1-dimensinal tensor. self.assertEqual([], losses1[0].get_shape().as_list()) self.assertEqual([None], losses2[0].get_shape().as_list())
def testBasicLSTMCellStateTupleType(self): with self.test_session(): with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m0 = (tf.zeros([1, 2]),) * 2 m1 = (tf.zeros([1, 2]),) * 2 cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.BasicLSTMCell(2)] * 2, state_is_tuple=True) self.assertTrue(isinstance(cell.state_size, tuple)) self.assertTrue(isinstance(cell.state_size[0], tf.nn.rnn_cell.LSTMStateTuple)) self.assertTrue(isinstance(cell.state_size[1], tf.nn.rnn_cell.LSTMStateTuple)) # Pass in regular tuples _, (out_m0, out_m1) = cell(x, (m0, m1)) self.assertTrue(isinstance(out_m0, tf.nn.rnn_cell.LSTMStateTuple)) self.assertTrue(isinstance(out_m1, tf.nn.rnn_cell.LSTMStateTuple)) # Pass in LSTMStateTuples tf.get_variable_scope().reuse_variables() zero_state = cell.zero_state(1, tf.float32) self.assertTrue(isinstance(zero_state, tuple)) self.assertTrue(isinstance(zero_state[0], tf.nn.rnn_cell.LSTMStateTuple)) self.assertTrue(isinstance(zero_state[1], tf.nn.rnn_cell.LSTMStateTuple)) _, (out_m0, out_m1) = cell(x, zero_state) self.assertTrue( isinstance(out_m0, tf.nn.rnn_cell.LSTMStateTuple)) self.assertTrue( isinstance(out_m1, tf.nn.rnn_cell.LSTMStateTuple))
def rnn_decoder(decoder_inputs, initial_state, cell, scope=None): """RNN Decoder that creates training and sampling sub-graphs. Args: decoder_inputs: Inputs for decoder, list of tensors. This is used only in trianing sub-graph. initial_state: Initial state for the decoder. cell: RNN cell to use for decoder. scope: Scope to use, if None new will be produced. Returns: List of tensors for outputs and states for training and sampling sub-graphs. """ with tf.variable_scope(scope or "dnn_decoder"): states, sampling_states = [initial_state], [initial_state] outputs, sampling_outputs = [], [] with tf.op_scope([decoder_inputs, initial_state], "training"): for i, inp in enumerate(decoder_inputs): if i > 0: tf.get_variable_scope().reuse_variables() output, new_state = cell(inp, states[-1]) outputs.append(output) states.append(new_state) with tf.op_scope([initial_state], "sampling"): for i, _ in enumerate(decoder_inputs): if i == 0: sampling_outputs.append(outputs[i]) sampling_states.append(states[i]) else: sampling_output, sampling_state = cell( sampling_outputs[-1], sampling_states[-1]) sampling_outputs.append(sampling_output) sampling_states.append(sampling_state) return outputs, states, sampling_outputs, sampling_states
def gan_discriminator(images1, images2, reuse=False): wd = 0 images = tf.concat(3, [images1, images2]) net = images with tf.variable_scope('discriminator'): with slim.arg_scope([slim.ops.conv2d], stddev=0.1, weight_decay=wd, is_training=True): if reuse: tf.get_variable_scope().reuse_variables() net = slim.ops.repeat_op(1, net, slim.ops.conv2d, 32, [3, 3], batch_norm_params={}, scope='conv1') net = slim.ops.max_pool(net, [2, 2], scope='pool1') net = slim.ops.repeat_op(1, net, slim.ops.conv2d, 64, [3, 3], batch_norm_params={}, scope='conv2') net = slim.ops.max_pool(net, [2, 2], scope='pool2') net = slim.ops.repeat_op(1, net, slim.ops.conv2d, 128, [3, 3], batch_norm_params={}, scope='conv3') net = slim.ops.max_pool(net, [2, 2], scope='pool3') net = slim.ops.repeat_op(1, net, slim.ops.conv2d, 256, [3, 3], batch_norm_params={}, scope='conv4') net = slim.ops.max_pool(net, [2, 2], scope='pool4') net = slim.ops.repeat_op(1, net, slim.ops.conv2d, 1, [3, 3], activation=None, scope='conv5') net = tf.reduce_mean(net, reduction_indices=[1, 2, 3], name='reduce') # net = tf.nn.sigmoid(net) return net
def lstm_fn(height): if height == FLAGS.num_lstm_layer-1: return tf.contrib.rnn.BasicLSTMCell(FLAGS.lstm_unit, state_is_tuple=True, reuse = tf.get_variable_scope().reuse) else: return tf.contrib.rnn.DropoutWrapper(tf.contrib.rnn.BasicLSTMCell(FLAGS.lstm_unit, state_is_tuple=True, reuse = tf.get_variable_scope().reuse), output_keep_prob=0.5)
def _conv(self, input, shape, strides, name, alpha=0.1): """ args: shape : [3, 3, in, out] """ if self.bn_mode: with tf.variable_scope(name) as scope: kernel = self._variable_trunc_normal('weights', shape) conv = tf.nn.conv2d(input, kernel, strides, padding='SAME') bn_conv = self._batch_normalization(conv, shape[-1], [0, 1, 2]) conv_ = tf.maximum(bn_conv, alpha*bn_conv, name=scope.name) if tf.get_variable_scope().reuse is False: self._add_weight_decay(kernel) self._activation_summary(conv_) else: with tf.variable_scope(name) as scope: kernel = self._variable_trunc_normal('weights', shape) conv = tf.nn.conv2d(input,kernel,strides, padding='SAME') biases = self._variable_constant('biases', shape[-1], value=0.01) bias = tf.nn.bias_add(conv, biases) conv_ = tf.maximum(bias, alpha*bias, name=scope.name) if tf.get_variable_scope().reuse is False: self._add_weight_decay(kernel) self._activation_summary(conv_) return conv_
def discriminator_z(self, z, is_training=True, reuse_variables=False, num_hidden_layer_channels=(64, 32, 16), enable_bn=True): if reuse_variables: tf.get_variable_scope().reuse_variables() current = z for i in range(len(num_hidden_layer_channels)): name = 'D_z_fc' + str(i) current = fc( input_vector=current, num_output_length=num_hidden_layer_channels[i], name=name ) if enable_bn: name = 'D_z_bn' + str(i) current = tf.contrib.layers.batch_norm( current, scale=False, is_training=is_training, scope=name, reuse=reuse_variables ) current = tf.nn.relu(current) name = 'D_z_fc' + str(i+1) current = fc( input_vector=current, num_output_length=1, name=name ) return tf.nn.sigmoid(current), current
def ndlstm_base_unrolled(inputs, noutput, scope=None, reverse=False): """Run an LSTM, either forward or backward. This is a 1D LSTM implementation using unrolling and the TensorFlow LSTM op. Args: inputs: input sequence (length, batch_size, ninput) noutput: depth of output scope: optional scope name reverse: run LSTM in reverse Returns: Output sequence (length, batch_size, noutput) """ with tf.variable_scope(scope, "SeqLstmUnrolled", [inputs]): length, batch_size, _ = _shape(inputs) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(noutput, state_is_tuple=False) state = tf.zeros([batch_size, lstm_cell.state_size]) output_u = [] inputs_u = tf.unstack(inputs) if reverse: inputs_u = list(reversed(inputs_u)) for i in xrange(length): if i > 0: tf.get_variable_scope().reuse_variables() output, state = lstm_cell(inputs_u[i], state) output_u += [output] if reverse: output_u = list(reversed(output_u)) outputs = tf.stack(output_u) return outputs
def build_decoder_rnn(self, first_step): """ This function build a decoder if first_step is true, the state is initialized by mean context if first_step is not true, the states are placeholder, and should be assigned. """ with tf.variable_scope("rnnlm"): flattened_ctx = tf.reshape(self.context, [self.batch_size, 196, 512]) ctx_mean = tf.reduce_mean(flattened_ctx, 1) self.decoder_prev_word = tf.placeholder(tf.int32, [None]) if first_step: rnn_input = tf.nn.embedding_lookup(self.Wemb, tf.zeros([self.batch_size], tf.int32)) else: rnn_input = tf.nn.embedding_lookup(self.Wemb, self.decoder_prev_word) tf.get_variable_scope().reuse_variables() if not first_step: initial_state = utils.get_placeholder_state(self.cell.state_size) self.decoder_flattened_state = utils.flatten_state(initial_state) else: initial_state = utils.get_initial_state(ctx_mean, self.cell.state_size) outputs, state = tf.contrib.legacy_seq2seq.attention_decoder([rnn_input], initial_state, flattened_ctx, self.cell, initial_state_attention = not first_step) logits = slim.fully_connected(outputs[0], self.vocab_size + 1, activation_fn = None, scope = 'logit') decoder_probs = tf.reshape(tf.nn.softmax(logits), [self.batch_size, self.vocab_size + 1]) decoder_state = utils.flatten_state(state) # output the probability and flattened state to next time step return [decoder_probs, decoder_state]
def sampler(self, z): tf.get_variable_scope().reuse_variables() # project `z` and reshape h0 = tf.reshape(linear(z, GF_DIM * 8 * 4 * 4, 'g_h0_lin'), [-1, 4, 4, GF_DIM * 8]) h0 = tf.nn.relu(self.g_bn0(h0, train=False)) h1 = deconv2d(h0, [BATCH_SIZE, 8, 8, GF_DIM * 4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1, train=False)) h2 = deconv2d(h1, [BATCH_SIZE, 16, 16, GF_DIM * 2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2, train=False)) h3 = deconv2d(h2, [BATCH_SIZE, 32, 32, GF_DIM * 1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3, train=False)) h4 = deconv2d(h3, [BATCH_SIZE, 64, 64, 3], name='g_h4') print "="*100 print "h4:" print h4.get_shape() print "="*100 return tf.nn.tanh(h4)
def sequence_to_final(inputs, noutput, scope=None, name=None, reverse=False): """Run an LSTM across all steps and returns only the final state. Args: inputs: (length, batch_size, depth) tensor noutput: size of output vector scope: optional scope name name: optional name for output tensor reverse: run in reverse Returns: Batch of size (batch_size, noutput). """ with tf.variable_scope(scope, "SequenceToFinal", [inputs]): length, batch_size, _ = _shape(inputs) lstm = tf.nn.rnn_cell.BasicLSTMCell(noutput, state_is_tuple=False) state = tf.zeros([batch_size, lstm.state_size]) inputs_u = tf.unstack(inputs) if reverse: inputs_u = list(reversed(inputs_u)) for i in xrange(length): if i > 0: tf.get_variable_scope().reuse_variables() output, state = lstm(inputs_u[i], state) outputs = tf.reshape(output, [batch_size, noutput], name=name) return outputs
def __init__(self, image_dim=784, cat_dim=11, z_dim=50, hid_dim=200): self.image_dim = image_dim self.cat_dim = cat_dim self.z_dim = z_dim self.hid_dim = hid_dim self.x = tf.placeholder(dtype=tf.float32, shape=(None, image_dim), name='x') self.y = tf.placeholder(dtype=tf.float32, shape=(None, cat_dim), name='y') self.z_in = tf.placeholder(dtype=tf.float32, shape=(None, z_dim), name='z_in') self.train = tf.placeholder(dtype=tf.bool, shape=(1), name='train') with tf.variable_scope('vae'): self.mean, self.var, self.moment_op_q = self.q_z_xy(self.x, self.y, self.train) # for debug self.log_var = tf.log(self.var) self.KL = -1/2*tf.reduce_mean(tf.reduce_sum(1+tf.log(tf.clip_by_value(self.var, 1e-10, 1.0))-self.mean**2-self.var, reduction_indices=[1])) epsilon = tf.random_normal(shape=[z_dim]) self.z = self.mean+self.var*epsilon self.pi, self.moment_op_p = self.p_x_yz(self.y, self.z, self.train) tf.get_variable_scope().reuse_variables() self.pi_out, _ = self.p_x_yz(self.y, self.z_in, self.train) self.log_likelihood = tf.reduce_mean(self.log_p_x_yz(self.pi, self.x)+self.log_p_y(self.y)) self.lower_bound = -self.KL+self.log_likelihood self.loss = -self.lower_bound self.train_op = tf.train.AdamOptimizer().minimize(self.loss) self.moment_op = tf.group(self.moment_op_q, self.moment_op_p)
def discriminator(self, image, reuse=False, y=None): if reuse: tf.get_variable_scope().reuse_variables() if not self.y_dim: h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv')) h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim*2, name='d_h1_conv'))) h2 = lrelu(self.d_bn2(conv2d(h1, self.df_dim*4, name='d_h2_conv'))) h3 = lrelu(self.d_bn3(conv2d(h2, self.df_dim*8, name='d_h3_conv'))) h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h3_lin') return tf.nn.sigmoid(h4) else: yb = tf.reshape(y, [None, 1, 1, self.y_dim]) x = conv_cond_concat(image, yb) h0 = lrelu(spatial_conv(x, self.c_dim + self.y_dim)) h0 = conv_cond_concat(h0, yb) h1 = lrelu(self.d_bn1(conv2d(h0, self.df_dim + self.y_dim))) h1 = tf.reshape(h1, [h1.get_shape()[0], -1]) h1 = tf.concat(1, [h1, y]) h2 = lrelu(self.d_bn2(linear(h1, self.dfc_dim, 'd_h2_lin'))) h2 = tf.concat(1, [h2, y]) return tf.nn.sigmoid(linear(h2, 1, 'd_h3_lin'))
def _build_rnn_graph_lstm(self, inputs, config, is_training): """Build the inference graph using canonical LSTM cells.""" # Slightly better results can be obtained with forget gate biases # initialized to 1 but the hyperparameters of the model would need to be # different than reported in the paper. cell = self._get_lstm_cell(config, is_training) if is_training and config.keep_prob < 1: cell = tf.contrib.rnn.DropoutWrapper( cell, output_keep_prob=config.keep_prob) cell = tf.contrib.rnn.MultiRNNCell( [cell for _ in range(config.num_layers)], state_is_tuple=True) self._initial_state = cell.zero_state(config.batch_size, data_type()) state = self._initial_state # Simplified version of tensorflow_models/tutorials/rnn/rnn.py's rnn(). # This builds an unrolled LSTM for tutorial purposes only. # In general, use the rnn() or state_saving_rnn() from rnn.py. # # The alternative version of the code below is: # # inputs = tf.unstack(inputs, num=num_steps, axis=1) # outputs, state = tf.contrib.rnn.static_rnn(cell, inputs, # initial_state=self._initial_state) outputs = [] with tf.variable_scope("RNN"): for time_step in range(self.num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.concat(outputs, 1), [-1, config.hidden_size]) return output, state
def full_model(data): latent_mean, latent_log_std = encoder(data) #latent_sample = lm_ae.reparam_normal_sample(latent_mean, latent_log_std, 'sample') latent_sample = latent_mean output_mean, output_log_std = decoder(latent_sample) disc_neg_logit = discriminator(latent_sample) tf.get_variable_scope().reuse_variables() latent_prior_sample = tf.random_normal(tf.shape(latent_mean)) latent_prior_sample.set_shape(latent_mean.get_shape().as_list()) disc_pos_logit = discriminator(latent_prior_sample) reconstruction_error = tf.reduce_sum( -0.5 * numpy.log(2 * numpy.pi) - output_log_std - 0.5 * tf.square(output_mean - data) / tf.exp( 2.0 * output_log_std), reduction_indices=[1]) disc_cross_entropy = 0.5*tf.nn.sigmoid_cross_entropy_with_logits(disc_neg_logit, tf.zeros(tf.shape(disc_neg_logit))) \ + 0.5*tf.nn.sigmoid_cross_entropy_with_logits(disc_pos_logit, tf.ones( tf.shape(disc_pos_logit))) num_copies = 85 image = tf.reshape( tf.tile(tf.expand_dims(tf.transpose(tf.pack([data, output_mean, data - output_mean]), perm=[1, 0, 2]), 2), [1, 1, num_copies, 1]), [-1, 3 * num_copies, SIG_LEN]) lm_ae.summaries.image_summary('posterior_sample', tf.expand_dims(image, -1), 5) rough_error = tf.reduce_mean(tf.square(tf.reduce_mean(tf.square(output_mean), reduction_indices=[1]) - tf.reduce_mean(tf.square(data), reduction_indices=[1]))) return output_mean, tf.reduce_mean(reconstruction_error), tf.reduce_mean(disc_cross_entropy), rough_error
def build_train_graph(self): init_c = tf.zeros([self.batch_size, self.lstm_cell.state_size[0]]) init_h = tf.zeros([self.batch_size, self.lstm_cell.state_size[1]]) initial_state = (init_c, init_h) image_emb = tf.matmul(self.inp_dict["features"], self.image_embedding[ 'weights']) + self.image_embedding['biases'] with tf.variable_scope("LSTM"): output, state = self.lstm_cell(image_emb, initial_state) loss = 0.0 for i in range(1, self.num_timesteps): batch_embed = tf.nn.embedding_lookup( self.word_embedding['weights'], self.inp_dict['captions'][ :, i - 1]) + self.word_embedding['biases'] tf.get_variable_scope().reuse_variables() output, state = self.lstm_cell(batch_embed, state) words = tf.reshape(self.inp_dict['captions'][ :, i], shape=[self.batch_size, 1]) onehot_encoded = tf.one_hot(indices=words, depth=len( self.wtoidx), on_value=1, off_value=0, axis=-1) onehot_encoded = tf.reshape(onehot_encoded, shape=[ self.batch_size, self.max_words]) target_logit = tf.matmul( output, self.target_word['weights']) + self.target_word['biases'] cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=target_logit, labels=onehot_encoded) cross_entropy = cross_entropy * self.inp_dict["mask"][:, i] current_loss = tf.reduce_sum(cross_entropy) loss = loss + current_loss loss = loss / tf.reduce_sum(self.inp_dict["mask"][:, 1:]) # introducing L2 regularization in Loss/Cost Function # self.beta=0 #l2_loss = self.beta * sum([tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables() if not "Bias" in tf_var.name]) #loss = tf.reduce_mean(loss+l2_loss) return loss, self.inp_dict
def build_decode_graph(self): image_features = tf.placeholder( tf.float32, [1, self.dim_imgft], name='Input_Features') image_emb = tf.matmul(image_features, self.image_embedding[ 'weights']) + self.image_embedding['biases'] init_c = tf.zeros([1, self.lstm_cell.state_size[0]]) init_h = tf.zeros([1, self.lstm_cell.state_size[1]]) initial_state = (init_c, init_h) IDs = [] with tf.variable_scope("LSTM"): output, state = self.lstm_cell(image_emb, initial_state) pred_ID = tf.nn.embedding_lookup( self.word_embedding['weights'], [ self.wtoidx["<S>"]]) + self.word_embedding['biases'] for i in range(self.num_timesteps): tf.get_variable_scope().reuse_variables() output, state = self.lstm_cell(pred_ID, state) logits = tf.matmul(output, self.target_word[ "weights"]) + self.target_word["biases"] predicted_next_idx = tf.argmax(logits, axis=1) pred_ID = tf.nn.embedding_lookup( self.word_embedding['weights'], predicted_next_idx) pred_ID = pred_ID + self.word_embedding['biases'] predicted_next_idx = tf.cast(predicted_next_idx, tf.int32, name="word_"+str(i)) IDs.append(predicted_next_idx) with open("model/Decoder/DecoderOutputs.txt", 'w') as f: for name in IDs: f.write(name.name.split(":0")[0] + "\n") return image_features, IDs
def sampler(self,images, y=None): tf.get_variable_scope().reuse_variables() if not self.y_dim: h1 = conv2d(images,self.gf_dim*2,d_h=1,d_w=1, name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1,train=False)) h2 = conv2d(h1,self.gf_dim*4,d_h=1,d_w=1, name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2,train=False)) h3 = conv2d(h2,self.gf_dim*4,d_h=1,d_w=1, name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3,train=False)) h4 = conv2d(h3,self.gf_dim*2,d_h=1,d_w=1, name='g_h4') h4 = tf.nn.relu(self.g_bn4(h4,train=False)) h5 = conv2d(h4,3, d_h=1,d_w=1, name='g_h5') return tf.nn.tanh(h5) else: yb = tf.reshape(y, [None, 1, 1, self.y_dim]) z = tf.concat(1, [z, y]) h0 = tf.nn.relu(self.bn0(linear(z, self.gfc_dim, 'g_h0_lin'))) h0 = tf.concat(1, [h0, y]) h1 = tf.nn.relu(self.g_bn1(linear(z, self.gf_dim*2*7*7, 'g_h1_lin'))) h1 = tf.reshape(h1, [None, 7, 7, self.gf_dim * 2]) h1 = conv_cond_concat(h1, yb) h2 = tf.nn.relu(self.bn2(deconv2d(h1, self.gf_dim, name='g_h2'))) h2 = conv_cond_concat(h2, yb) return tf.nn.sigmoid(deconv2d(h2, self.c_dim, name='g_h3'))
def generator(self, gen_x_dim = 30, gen_y_dim = 30, reuse = False): if reuse: tf.get_variable_scope().reuse_variables() n_network = self.net_size_g gen_n_points = gen_x_dim * gen_y_dim z_scaled = tf.reshape(self.z, [self.batch_size, 1, self.z_dim]) * \ tf.ones([gen_n_points, 1], dtype=tf.float32) * self.scale z_unroll = tf.reshape(z_scaled, [self.batch_size*gen_n_points, self.z_dim]) x_unroll = tf.reshape(self.x, [self.batch_size*gen_n_points, 1]) y_unroll = tf.reshape(self.y, [self.batch_size*gen_n_points, 1]) r_unroll = tf.reshape(self.r, [self.batch_size*gen_n_points, 1]) U = fully_connected(z_unroll, n_network, self.model_name+'_g_0_z') + \ fully_connected(x_unroll, n_network, self.model_name+'_g_0_x', with_bias = False) + \ fully_connected(y_unroll, n_network, self.model_name+'_g_0_y', with_bias = False) + \ fully_connected(r_unroll, n_network, self.model_name+'_g_0_r', with_bias = False) H = tf.nn.relu(U) for i in range(1, self.net_depth_g): H = tf.nn.tanh(fully_connected(H, n_network, self.model_name+'_g_tanh_'+str(i))) H = tf.nn.relu(fully_connected(H, n_network, self.model_name+'_g_relu_'+str(i))) output = tf.nn.sigmoid(fully_connected(H, self.c_dim, self.model_name+'_g_'+str(self.net_depth_g))) result = tf.reshape(output, [self.batch_size, gen_y_dim, gen_x_dim, self.c_dim]) return result
def update_target_network(sess, network_name_train, network_name_target): ''' This helper method copies all the trainable weights and biases from one DeepQNetwork to another. This method is used for synchronisation of the train and target Q-networks ''' tf.get_variable_scope().reuse_variables() vars_source = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=network_name_train ) copy_ops = [] check_ops = [] for v in vars_source: # Note the [0:-2] to cut of the device placement v_source = tf.get_variable(v.name[0:-2]) # Remove variable prefix (network name) var_name = v.name[v.name.find("/"):] v_target = tf.get_variable(network_name_target + var_name[0:-2]) # print("Copying variable:") #print(" Source: " + v_source.name) #print(" Target: " + v_target.name) copy_ops.append(v_target.assign(v_source)) check_ops.append(tf.equal(v_target, v_source)) # Actual copying all the variables, check if the values are equal sess.run(copy_ops) check_res = sess.run(check_ops) for res in check_res: if not np.all(res): raise ValueError("Verification of tf.equal(var_train, var_target) failed.")
def _build_graph(self, inputs, is_training): state, action, reward, next_state, isOver = inputs self.predict_value = self._get_DQN_prediction(state, is_training) action_onehot = tf.one_hot(action, NUM_ACTIONS, 1.0, 0.0) pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) #N, max_pred_reward = tf.reduce_mean(tf.reduce_max( self.predict_value, 1), name='predict_reward') add_moving_summary(max_pred_reward) with tf.variable_scope('target'): targetQ_predict_value = self._get_DQN_prediction(next_state, False) # NxA # DQN #best_v = tf.reduce_max(targetQ_predict_value, 1) # N, # Double-DQN tf.get_variable_scope().reuse_variables() next_predict_value = self._get_DQN_prediction(next_state, is_training) self.greedy_choice = tf.argmax(next_predict_value, 1) # N, predict_onehot = tf.one_hot(self.greedy_choice, NUM_ACTIONS, 1.0, 0.0) best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1) target = reward + (1.0 - tf.cast(isOver, tf.float32)) * GAMMA * tf.stop_gradient(best_v) sqrcost = tf.square(target - pred_action_value) abscost = tf.abs(target - pred_action_value) # robust error func cost = tf.select(abscost < 1, sqrcost, abscost) summary.add_param_summary([('conv.*/W', ['histogram', 'rms']), ('fc.*/W', ['histogram', 'rms']) ]) # monitor all W self.cost = tf.reduce_mean(cost, name='cost')
def inference(input, batch_size, num_segments, lstm_keep_prob=0.5, conv_keep_prob=1.0, train_conv123=False, train_conv45=False, train_fc67=False): # input size is [num_segments, batch_size, 224, 224, num_length*3/2] fc6_per_step = [] with tf.variable_scope("conv"): for time_step in range(num_segments): if time_step > 0: tf.get_variable_scope().reuse_variables() fc8 = vgg16.inference(input[time_step, :, :, :, :], conv_keep_prob, train_conv123, train_conv45, train_fc67, False) fc7 = tf.get_default_graph().get_tensor_by_name("conv/fc7/fc7:0") fc6 = tf.get_default_graph().get_tensor_by_name("conv/fc6/fc6:0") # output is [batch_size*num_segments, 4096] fc6_per_step.append(fc6) with tf.variable_scope("lstm"): hidden_size = 512 lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0, state_is_tuple=True) lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, input_keep_prob=lstm_keep_prob, output_keep_prob=lstm_keep_prob) cell = lstm_cell _initial_state = cell.zero_state(batch_size, tf.float32) outputs = [] state = _initial_state for time_step in range(num_segments): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(fc6_per_step[time_step], state) outputs.append(cell_output) final_state = state lstm_params = [var for var in tf.all_variables() if var.name.startswith("lstm")] for var in lstm_params: tf.add_to_collection("params", var) logits = layers.fc(tf.concat(0, outputs, 'concat'), 101, relu=False, name='cls') return logits
def __init__(self, is_training, config, input_): self._input = input_ if is_training: batch_size = input_.batch_size num_steps = input_.num_steps else: batch_size = config.batch_size num_steps = config.num_steps size = config.hidden_size vocab_size = config.vocab_size self.question = tf.placeholder(tf.int32, [1, None]) def lstm_cell(): return tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True) attn_cell = lstm_cell if is_training and config.keep_prob < 1: def attn_cell(): return tf.contrib.rnn.DropoutWrapper( lstm_cell(), output_keep_prob=config.keep_prob) cell = tf.contrib.rnn.MultiRNNCell( [attn_cell() for _ in range(config.num_layers)], state_is_tuple=True) self._initial_state = cell.zero_state(batch_size, data_type()) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, size], dtype=data_type()) if is_training: inputs = tf.nn.embedding_lookup(embedding, input_.input_data) else: inputs = tf.nn.embedding_lookup(embedding, self.question) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) outputs = [] state = self._initial_state with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, size]) softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=data_type()) softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type()) self._logits = tf.nn.softmax(tf.matmul(output, softmax_w) + softmax_b) #loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( # [logits], # [tf.reshape(input_.targets, [-1])], # [tf.ones([batch_size * num_steps], dtype=data_type())]) self._final_state = state if not is_training: return num_samples = 10 labels = tf.reshape(input_.targets, [-1, 1]) hidden = output w_t = tf.transpose(softmax_w) loss = tf.nn.sampled_softmax_loss(w_t, softmax_b, labels, hidden, num_samples, vocab_size) self._cost = cost = tf.reduce_sum(loss) / batch_size self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self._lr) self._train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step()) self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr)
state] = sess.run([self.model_output, self.final_state], feed_dict=feed_dict) sample = np.argmax(model_output[0]) if sample == 0: break word = words[sample] out_sentence = out_sentence + ' ' + word return (out_sentence) # 定义LSTM模型 lstm_model = LSTM_Model(embedding_size, rnn_size, batch_size, learning_rate, training_seq_len, vocab_size) # 重新使用之前的variable scope, 也就是变量的集合, 用于测试 with tf.variable_scope(tf.get_variable_scope(), reuse=True): test_lstm_model = LSTM_Model(embedding_size, rnn_size, batch_size, learning_rate, training_seq_len, vocab_size, infer_sample=True) # 保存参数 saver = tf.train.Saver(tf.global_variables()) # 制作minibatch num_batches = int(len(s_text_ix) / (batch_size * training_seq_len)) + 1 # 按照batch的数量分割 batches = np.array_split(s_text_ix, num_batches)
def __init__(self, hparams, mode, iterator, source_vocab_table, target_vocab_table, reverse_target_vocab_table=None, scope=None, extra_args=None): """Create the model. Args: hparams: Hyperparameter configurations. mode: TRAIN | EVAL | INFER iterator: Dataset Iterator that feeds data. source_vocab_table: Lookup table mapping source words to ids. target_vocab_table: Lookup table mapping target words to ids. reverse_target_vocab_table: Lookup table mapping ids to target words. Only required in INFER mode. Defaults to None. scope: scope of the model. extra_args: model_helper.ExtraArgs, for passing customizable functions. """ assert isinstance(iterator, iterator_utils.BatchedInput) self.iterator = iterator self.mode = mode self.src_vocab_table = source_vocab_table self.tgt_vocab_table = target_vocab_table # source 词库大小 self.src_vocab_size = hparams.src_vocab_size # target 词库大小 self.tgt_vocab_size = hparams.tgt_vocab_size self.num_gpus = hparams.num_gpus self.time_major = hparams.time_major # extra_args: to make it flexible for adding external customizable code self.single_cell_fn = None if extra_args: # 单一的RNNs self.single_cell_fn = extra_args.single_cell_fn # Set num layers self.num_encoder_layers = hparams.num_encoder_layers self.num_decoder_layers = hparams.num_decoder_layers assert self.num_encoder_layers assert self.num_decoder_layers # Set num residual layers 剩余??? if hasattr(hparams, "num_residual_layers"): # compatible common_test_utils self.num_encoder_residual_layers = hparams.num_residual_layers self.num_decoder_residual_layers = hparams.num_residual_layers else: self.num_encoder_residual_layers = hparams.num_encoder_residual_layers self.num_decoder_residual_layers = hparams.num_decoder_residual_layers # Initializer 初始化随机种子等 initializer = model_helper.get_initializer(hparams.init_op, hparams.random_seed, hparams.init_weight) tf.get_variable_scope().set_initializer(initializer) # Embeddings 如果不加载预训练的词向量,那么需要在图中加入embedding layer self.init_embeddings(hparams, scope) # batch_size self.batch_size = tf.size(self.iterator.source_sequence_length) # Projection with tf.variable_scope(scope or "build_network"): with tf.variable_scope("decoder/output_projection"): self.output_layer = layers_core.Dense(hparams.tgt_vocab_size, use_bias=False, name="output_projection") ## Train graph res = self.build_graph(hparams, scope=scope) if self.mode == tf.contrib.learn.ModeKeys.TRAIN: self.train_loss = res[1] self.word_count = tf.reduce_sum( # 总的词数量 self.iterator.source_sequence_length) + tf.reduce_sum( self.iterator.target_sequence_length) elif self.mode == tf.contrib.learn.ModeKeys.EVAL: self.eval_loss = res[1] elif self.mode == tf.contrib.learn.ModeKeys.INFER: self.infer_logits, _, self.final_context_state, self.sample_id = res self.sample_words = reverse_target_vocab_table.lookup( tf.to_int64(self.sample_id)) if self.mode != tf.contrib.learn.ModeKeys.INFER: ## Count the number of predicted words for compute ppl. self.predict_count = tf.reduce_sum( self.iterator.target_sequence_length) self.global_step = tf.Variable(0, trainable=False) params = tf.trainable_variables() # Gradients and SGD update operation for training the model. # Arrage for the embedding vars to appear at the beginning. if self.mode == tf.contrib.learn.ModeKeys.TRAIN: self.learning_rate = tf.constant(hparams.learning_rate) # warm-up self.learning_rate = self._get_learning_rate_warmup(hparams) # decay self.learning_rate = self._get_learning_rate_decay(hparams) # Optimizer if hparams.optimizer == "sgd": opt = tf.train.GradientDescentOptimizer(self.learning_rate) tf.summary.scalar("lr", self.learning_rate) elif hparams.optimizer == "adam": opt = tf.train.AdamOptimizer(self.learning_rate) # Gradients gradients = tf.gradients(self.train_loss, params, colocate_gradients_with_ops=hparams. colocate_gradients_with_ops) clipped_grads, grad_norm_summary, grad_norm = model_helper.gradient_clip( gradients, max_gradient_norm=hparams.max_gradient_norm) self.grad_norm = grad_norm self.update = opt.apply_gradients(zip(clipped_grads, params), global_step=self.global_step) # Summary self.train_summary = tf.summary.merge([ tf.summary.scalar("lr", self.learning_rate), tf.summary.scalar("train_loss", self.train_loss), ] + grad_norm_summary) if self.mode == tf.contrib.learn.ModeKeys.INFER: self.infer_summary = self._get_infer_summary(hparams) # Saver self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=hparams.num_keep_ckpts) # Print trainable variables utils.print_out("# Trainable variables") for param in params: utils.print_out( " %s, %s, %s" % (param.name, str(param.get_shape()), param.op.device))
def train(): global parameters config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) config.intra_op_parallelism_threads = 1 config.inter_op_parallelism_threads = 0 with tf.Graph().as_default(), tf.device("/" + FLAGS.local_ps_device + ":0"): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) device_ids = FLAGS.device_ids if not device_ids: device_ids = [str(i) for i in range(FLAGS.num_gpus)] else: device_ids = device_ids.split(',') print('device_ids: ', device_ids) if len(device_ids) > FLAGS.num_gpus: print('The device_ids should have the same number of GPUs with num_gpus') return lr = 0.001 optimizer = tf.train.MomentumOptimizer(lr, 0.9) def assign_to_device(device, ps_device=FLAGS.local_ps_device): worker_device = device ps_sizes = [0] if FLAGS.local_ps_device.lower == 'gpu': ps_sizes = [0] * FLAGS.num_gpus def _assign(op): if op.device: return op.device if op.type not in ['Variable', 'VariableV2']: return worker_device device_index, _ = min(enumerate( ps_sizes), key=operator.itemgetter(1)) device_name = '/' + FLAGS.local_ps_device +':' + str(device_index) var_size = op.outputs[0].get_shape().num_elements() ps_sizes[device_index] += var_size return device_name return _assign images = None labels = None initalizer = None if FLAGS.use_dataset: with tf.device('/CPU:0'): iterator, initalizer = cifar10_input.dataSet(FLAGS.data_dir, FLAGS.batch_size) images, labels = iterator.get_next() tower_grads = [] average_loss_tensor = [] reuse_variables = False for i in xrange(FLAGS.num_gpus): print('what is i: ', i) with tf.device('/gpu:%s'%device_ids[i]): with tf.name_scope('%s_%s' % ('TOWER', device_ids[i])) as n_scope: _init_global_variables() with tf.device('/cpu:0'): if not FLAGS.use_dataset: images, labels = cifar10_input.inputs(False, FLAGS.data_dir, FLAGS.batch_size) with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): logits = inference(images) loss = loss_function(logits, tf.contrib.layers.one_hot_encoding(labels, 10)) reuse_variables = True average_loss_tensor.append(loss) grads = optimizer.compute_gradients(loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) train_op = apply_gradient_op average_op = tf.reduce_mean(average_loss_tensor) # Create a saver. saver = tf.train.Saver(tf.global_variables()) init = tf.global_variables_initializer() sess = tf.Session(config=config) sess.run(init) coord = None threads = None if FLAGS.use_dataset: sess.run(initalizer) else: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) real_batch_size = FLAGS.batch_size * FLAGS.num_gpus num_batches_per_epoch = int((EPOCH_SIZE + real_batch_size - 1)/ real_batch_size) iterations = FLAGS.epochs * num_batches_per_epoch average_batch_time = 0.0 epochs_info = [] step = 0 average_loss = 0.0 for step in xrange(iterations): start_time = time.time() _, loss_v = sess.run([train_op, average_op]) duration = time.time() - start_time average_batch_time += float(duration) assert not np.isnan(loss_v), 'Model diverged with loss = NaN' average_loss += loss_v if step % FLAGS.log_step == 0: examples_per_sec = real_batch_size / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)') print (format_str % (datetime.now(), step, loss_v, examples_per_sec, sec_per_batch)) if step > 0 and step % (FLAGS.eval_step * num_batches_per_epoch) == 0: average_loss /= num_batches_per_epoch * FLAGS.eval_step print ('epoch: %d, loss: %.2f' % (step /num_batches_per_epoch, average_loss)) epochs_info.append('%d:_:%s'%(step/(FLAGS.eval_step*num_batches_per_epoch), average_loss)) average_loss = 0.0 checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) if not FLAGS.use_dataset: coord.request_stop() coord.join(threads) average_batch_time /= iterations print 'average_batch_time: ', average_batch_time print ('epoch_info: %s' % ','.join(epochs_info))
lstm_hidden_size=100 lstm=tf.nn.rnn_cell.BasicLSTMCell(lstm_hidden_size) #将 LSTM中的状态初始化为全0数组,每次使用一个batch的训练样本 state=lstm.zero_state(batch_size=80,dtype=tf.float32) #定义损失函数 loss=0.0 #规定最大序列长度,用num_step表示 num_step=40 #最大序列长度 for i in range(num_step): # 在第一个时刻声明LSTM结构中使用的变量,在之后的时刻都需要复用之前定义好的变量 if i >0: tf.get_variable_scope().reuse_variables() #每一步处理时间序列中的一个时刻。将当前输入(current_input)和前一时刻状态 (state)传入定义的LSTM结构可以得到 # 当前LSTM结构的输出lstm_output和更新后的状态state lstm_output,state=lstm(current_input,state) #将当前时刻的输出传入一个全连接层得到最后的输出 final_output=fully_connected(lstm_output) #计算当前时刻输出的损失 loss +=calc_loss(final_output,expected_output)
def model_fn(features, labels, mode, params): is_training = mode == tf.estimator.ModeKeys.TRAIN with tf.variable_scope('Discriminator'): embedding = tf.get_variable(name='embedding', shape=[FLAGS.vocab_size, FLAGS.emb_dim], initializer=tf.random_uniform_initializer( -0.08, 0.08)) key, lk = features['key'], features['len'] key = tf.nn.embedding_lookup(embedding, key) sentence, ls = labels['sentence'], labels['len'] targets = sentence[:, 1:] sentence = sentence[:, :-1] ls -= 1 sentence = tf.nn.embedding_lookup(embedding, sentence) cell = tf.nn.rnn_cell.BasicLSTMCell(params.mem_dim) if is_training: cell = tf.nn.rnn_cell.DropoutWrapper(cell, params.keep_prob, params.keep_prob) out, initial_state = tf.nn.dynamic_rnn(cell, key, lk, dtype=tf.float32) feat = tf.nn.l2_normalize(initial_state[1], axis=1) batch_size = tf.shape(feat)[0] with tf.variable_scope('Generator'): embedding = tf.get_variable(name='embedding', shape=[FLAGS.vocab_size, FLAGS.emb_dim], initializer=tf.random_uniform_initializer( -0.08, 0.08)) softmax_w = tf.matrix_transpose(embedding) softmax_b = tf.get_variable('softmax_b', [FLAGS.vocab_size]) cell = tf.nn.rnn_cell.BasicLSTMCell(params.mem_dim) if is_training: cell = tf.nn.rnn_cell.DropoutWrapper(cell, params.keep_prob, params.keep_prob) zero_state = cell.zero_state(batch_size, tf.float32) _, state = cell(feat, zero_state) tf.get_variable_scope().reuse_variables() out, state = tf.nn.dynamic_rnn(cell, sentence, ls, state) out = tf.reshape(out, [-1, FLAGS.mem_dim]) logits = tf.nn.bias_add(tf.matmul(out, softmax_w), softmax_b) logits = tf.reshape(logits, [batch_size, -1, FLAGS.vocab_size]) predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) mask = tf.sequence_mask(ls, tf.shape(sentence)[1]) targets = tf.boolean_mask(targets, mask) logits = tf.boolean_mask(logits, mask) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets, logits=logits) loss = tf.reduce_mean(loss) opt = tf.train.AdamOptimizer(params.lr) if params.multi_gpu: opt = tf.contrib.estimator.TowerOptimizer(opt) grads = opt.compute_gradients(loss) grads = transform_grads_fn(grads) train_op = opt.apply_gradients(grads, global_step=tf.train.get_global_step()) train_hooks = None if not FLAGS.multi_gpu or opt._graph_state().is_the_last_tower: with open('data/word_counts.txt', 'r') as f: dic = list(f) dic = [i.split()[0] for i in dic] end_id = dic.index('</S>') dic.append('<unk>') dic = tf.convert_to_tensor(dic) noise = features['key'][0] m = tf.sequence_mask(features['len'][0], tf.shape(noise)[0]) noise = tf.boolean_mask(noise, m) noise = tf.gather(dic, noise) sentence = crop_sentence(labels['sentence'][0], end_id) sentence = tf.gather(dic, sentence) pred = crop_sentence(predictions[0], end_id) pred = tf.gather(dic, pred) train_hooks = [ tf.train.LoggingTensorHook( { 'sentence': sentence, 'noise': noise, 'pred': pred }, every_n_iter=100) ] for variable in tf.trainable_variables(): tf.summary.histogram(variable.op.name, variable) predictions = tf.boolean_mask(predictions, mask) metrics = {'acc': tf.metrics.accuracy(targets, predictions)} return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=train_hooks, eval_metric_ops=metrics)
def convert_caffemodel_to_ckpt(): caffe_weights = joblib.load(args.caffe_weights_path) # create network vgg16 = model_cnn.SsdVgg16() model_scope = args.model_scope vgg16.build_model(tf.placeholder(tf.float32, shape=[32,300,300,3]), scope=model_scope) # auxillary functions for conversion def load_conv_weight(target_name, src_name): target_name = model_scope + '/' + target_name # [n_out, n_in, h, w] => [h, w, n_in, n_out] src = np.transpose(caffe_weights[src_name][0], (2,3,1,0)) return tf.assign(tf.get_variable(target_name), src) def load_conv_bias(target_name, src_name): target_name = model_scope + '/' + target_name src = caffe_weights[src_name][1] return tf.assign(tf.get_variable(target_name), src) # loding caffemodel weights with tf.Session() as session: tf.get_variable_scope().reuse_variables() assigns = [ load_conv_weight('conv1_1/conv2d/weight', 'conv1_1'), load_conv_bias('conv1_1/conv2d/bias', 'conv1_1'), load_conv_weight('conv1_2/conv2d/weight', 'conv1_2'), load_conv_bias('conv1_2/conv2d/bias', 'conv1_2'), load_conv_weight('conv2_1/conv2d/weight', 'conv2_1'), load_conv_bias('conv2_1/conv2d/bias', 'conv2_1'), load_conv_weight('conv2_2/conv2d/weight', 'conv2_2'), load_conv_bias('conv2_2/conv2d/bias', 'conv2_2'), load_conv_weight('conv3_1/conv2d/weight', 'conv3_1'), load_conv_bias('conv3_1/conv2d/bias', 'conv3_1'), load_conv_weight('conv3_2/conv2d/weight', 'conv3_2'), load_conv_bias('conv3_2/conv2d/bias', 'conv3_2'), load_conv_weight('conv3_3/conv2d/weight', 'conv3_3'), load_conv_bias('conv3_3/conv2d/bias', 'conv3_3'), load_conv_weight('conv4_1/conv2d/weight', 'conv4_1'), load_conv_bias('conv4_1/conv2d/bias', 'conv4_1'), load_conv_weight('conv4_2/conv2d/weight', 'conv4_2'), load_conv_bias('conv4_2/conv2d/bias', 'conv4_2'), load_conv_weight('conv4_3/conv2d/weight', 'conv4_3'), load_conv_bias('conv4_3/conv2d/bias', 'conv4_3'), load_conv_weight('conv5_1/conv2d/weight', 'conv5_1'), load_conv_bias('conv5_1/conv2d/bias', 'conv5_1'), load_conv_weight('conv5_2/conv2d/weight', 'conv5_2'), load_conv_bias('conv5_2/conv2d/bias', 'conv5_2'), load_conv_weight('conv5_3/conv2d/weight', 'conv5_3'), load_conv_bias('conv5_3/conv2d/bias', 'conv5_3'), load_conv_weight('fc6/atrous_conv2d/weight', 'fc6'), load_conv_bias('fc6/atrous_conv2d/bias', 'fc6'), load_conv_weight('fc7/conv2d/weight', 'fc7'), load_conv_bias('fc7/conv2d/bias', 'fc7'), ] with tf.control_dependencies(assigns): load_op = tf.no_op(name='load_op') session.run(load_op) # save checkpoint saver = tf.train.Saver() saver.save(session, args.ckpt_path)
def __init__(self, batch_size, num_unroll_steps, embeddings, embedding_size, rnn_size, num_rnn_layers, max_grad_norm, l2_reg_lambda=0.0, adjust_weight=False, label_weight=[], is_training=True): # define input variable self.batch_size = batch_size self.embeddings = embeddings self.embedding_size = embedding_size self.adjust_weight = adjust_weight self.label_weight = label_weight self.rnn_size = rnn_size self.num_rnn_layers = num_rnn_layers self.num_unroll_steps = num_unroll_steps self.max_grad_norm = max_grad_norm self.l2_reg_lambda = l2_reg_lambda self.is_training = is_training self.keep_prob = tf.placeholder(tf.float32, name="keep_drop") self.lr = tf.Variable(0.0, trainable=False) self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr) self.ori_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.cand_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) self.neg_input_quests = tf.placeholder( tf.int32, shape=[None, self.num_unroll_steps]) #build LSTM network lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.rnn_size, forget_bias=0.0, state_is_tuple=True) lstm_cell = tf.nn.rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=self.keep_prob) self.cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * self.num_rnn_layers, state_is_tuple=True) self._initial_state = self.cell.zero_state(self.batch_size, dtype=tf.float32) #embedding layer with tf.device("/cpu:0"), tf.name_scope("embedding_layer"): W = tf.Variable(tf.to_float(self.embeddings), trainable=True, name="W") self.ori_quests = tf.nn.embedding_lookup(W, self.ori_input_quests) self.cand_quests = tf.nn.embedding_lookup(W, self.cand_input_quests) self.neg_quests = tf.nn.embedding_lookup(W, self.neg_input_quests) #ori_quests = tf.nn.dropout(ori_quests, self.keep_prob) #cand_quests = tf.nn.dropout(cand_quests, self.keep_prob) #neg_quests = tf.nn.dropout(neg_quests, self.keep_prob) ori_out_put = [] cand_out_put = [] neg_out_put = [] ori_state = self._initial_state cand_state = self._initial_state neg_state = self._initial_state with tf.variable_scope("LSTM_layer_ori"): for time_step in range(self.num_unroll_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (ori_cell_output, ori_state) = self.cell(self.ori_quests[:, time_step, :], ori_state) ori_out_put.append(ori_cell_output) tf.get_variable_scope().reuse_variables() (cand_cell_output, cand_state) = self.cell(self.cand_quests[:, time_step, :], cand_state) cand_out_put.append(cand_cell_output) tf.get_variable_scope().reuse_variables() (neg_cell_output, neg_state) = self.cell(self.neg_quests[:, time_step, :], neg_state) neg_out_put.append(neg_cell_output) #ori_inputs = [tf.squeeze(input_step, [1]) # for input_step in tf.split(1, self.num_unroll_steps, self.ori_quests)] #ori_out_put, ori_state = tf.nn.rnn(self.cell, ori_inputs, initial_state=self._initial_state, scope="ori") #cand_inputs = [tf.squeeze(input_step, [1]) # for input_step in tf.split(1, self.num_unroll_steps, self.cand_quests)] #cand_out_put, cand_state = tf.nn.rnn(self.cell, cand_inputs, initial_state=self._initial_state, scope="cand") #neg_inputs = [tf.squeeze(input_step, [1]) # for input_step in tf.split(1, self.num_unroll_steps, self.neg_quests)] #neg_out_put, neg_state = tf.nn.rnn(self.cell, neg_inputs, initial_state=self._initial_state, scope="neg") #cand_out_put=[] #state=self._initial_state #with tf.variable_scope("LSTM_layer_cand"): # for time_step in range(self.num_unroll_steps): # if time_step > 0: tf.get_variable_scope().reuse_variables() # (cell_output, state)=self.cell(self.cand_quests[:,time_step, :], state) # cand_out_put.append(cell_output) # # #neg_out_put=[] #state=self._initial_state #with tf.variable_scope("LSTM_layer_neg"): # for time_step in range(self.num_unroll_steps): # if time_step > 0: tf.get_variable_scope().reuse_variables() # (cell_output, state)=self.cell(self.neg_quests[:,time_step, :], state) # neg_out_put.append(cell_output) #out_put=out_put * mask_x[:,:,None] #with tf.name_scope("mean_pooling_layer"):#(batch_size * rnn_size) # out_put=tf.reduce_sum(out_put,0)/(tf.reduce_sum(mask_x,0)[:,None]) # ori_out_put(num_unroll_steps * batch_size * rnn_size) with tf.name_scope("regulation_layer"): ori_out_put, cand_out_put, neg_out_put = tf.transpose( ori_out_put, perm=[1, 2, 0]), tf.transpose(cand_out_put, perm=[1, 2, 0]), tf.transpose(neg_out_put, perm=[1, 2, 0]) ori_batch_output, cand_batch_output, neg_batch_output = [], [], [] for sent_idx in range(self.batch_size): ori_batch_output.append(tf.reduce_max(ori_out_put[sent_idx], 1)) cand_batch_output.append( tf.reduce_max(cand_out_put[sent_idx], 1)) neg_batch_output.append(tf.reduce_max(neg_out_put[sent_idx], 1)) self.out_ori = tf.nn.tanh(ori_batch_output, name="tanh_ori") #(batch_size, rnn_size) self.out_cand = tf.nn.tanh(cand_batch_output, name="tanh_cand") self.out_neg = tf.nn.tanh(neg_batch_output, name="tanh_neg") #def cal_loss(self, self.out_ori, self.out_cand, self.out_neg): # dropout #self.out_ori = tf.nn.dropout(self.out_ori, self.keep_prob) #self.out_cand = tf.nn.dropout(self.out_cand, self.keep_prob) #self.out_neg = tf.nn.dropout(self.out_neg, self.keep_prob) # cal cosine simulation self.ori_seq_len = tf.sqrt(tf.reduce_sum( tf.mul(self.out_ori, self.out_ori), 1), name="sqrt_ori") self.cand_seq_len = tf.sqrt(tf.reduce_sum( tf.mul(self.out_cand, self.out_cand), 1), name="sqrt_cand") self.neg_seq_len = tf.sqrt(tf.reduce_sum( tf.mul(self.out_neg, self.out_neg), 1), name="sqrt_neg") self.ori_cand_dist = tf.reduce_sum(tf.mul(self.out_ori, self.out_cand), 1, name="ori_cand") self.ori_neg_dist = tf.reduce_sum(tf.mul(self.out_ori, self.out_neg), 1, name="ori_neg") # cal the score with tf.name_scope("score"): self.ori_cand_score = tf.div(self.ori_cand_dist, tf.mul(self.ori_seq_len, self.cand_seq_len), name="score_positive") self.ori_neg_score = tf.div(self.ori_neg_dist, tf.mul(self.ori_seq_len, self.neg_seq_len), name="score_negative") # the target function zero = tf.fill(tf.shape(self.ori_cand_score), 0.0) margin = tf.fill(tf.shape(self.ori_cand_score), 0.1) l2_loss = tf.constant(0.0) with tf.name_scope("loss"): losses = tf.maximum( zero, tf.sub(margin, tf.sub(self.ori_cand_score, self.ori_neg_score))) self.loss = tf.reduce_sum(losses) + self.l2_reg_lambda * l2_loss # cal accurancy with tf.name_scope("acc"): correct = tf.equal(zero, losses) self.acc = tf.reduce_mean(tf.cast(correct, "float"), name="acc")
def lstm_cell(i, s): print('creating cell %i in %s' % (i, s)) return tf.contrib.rnn.LSTMCell(nstates, reuse=tf.get_variable_scope().reuse)
def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping=None, gamma=1.0, double_q=True, scope="deepq", reuse=None, param_noise=False, param_noise_filter_func=None): """ Creates the train function: :param make_obs_ph: (function (str): TensorFlow Tensor) a function that takes a name and creates a placeholder of input with that name :param q_func: (function (TensorFlow Tensor, int, str, bool): TensorFlow Tensor) the model that takes the following inputs: - observation_in: (Any) the output of observation placeholder - num_actions: int number of actions - scope: (str) - reuse: (bool) should be passed to outer variable scope and returns a tensor of shape (batch_size, num_actions) with values of every action. :param num_actions: (int) number of actions :param reuse: (bool) whether or not to reuse the graph variables :param optimizer: (tf.train.Optimizer) optimizer to use for the Q-learning objective. :param grad_norm_clipping: (float) clip gradient norms to this value. If None no clipping is performed. :param gamma: (float) discount rate. :param double_q: (bool) if true will use Double Q Learning (https://arxiv.org/abs/1509.06461). In general it is a good idea to keep it enabled. :param scope: (str or VariableScope) optional scope for variable_scope. :param reuse: (bool) whether or not the variables should be reused. To be able to reuse the scope must be given. :param param_noise: (bool) whether or not to use parameter space noise (https://arxiv.org/abs/1706.01905) :param param_noise_filter_func: (function (TensorFlow Tensor): bool) function that decides whether or not a variable should be perturbed. Only applicable if param_noise is True. If set to None, default_param_noise_filter is used by default. :return: (tuple) act: (function (TensorFlow Tensor, bool, float): TensorFlow Tensor) function to select and action given observation. See the top of the file for details. train: (function (Any, numpy float, numpy float, Any, numpy bool, numpy float): numpy float) optimize the error in Bellman's equation. See the top of the file for details. update_target: (function) copy the parameters from optimized Q function to the target Q function. See the top of the file for details. debug: ({str: function}) a bunch of functions to print debug data like q_values. """ if param_noise: act_f = build_act_with_param_noise( make_obs_ph, q_func, num_actions, scope=scope, reuse=reuse, param_noise_filter_func=param_noise_filter_func) else: act_f = build_act(make_obs_ph, q_func, num_actions, scope=scope, reuse=reuse) with tf.variable_scope(scope, reuse=reuse): # set up placeholders obs_t_input = make_obs_ph("obs_t") act_t_ph = tf.placeholder(tf.int32, [None], name="action") rew_t_ph = tf.placeholder(tf.float32, [None], name="reward") obs_tp1_input = make_obs_ph("obs_tp1") done_mask_ph = tf.placeholder(tf.float32, [None], name="done") importance_weights_ph = tf.placeholder(tf.float32, [None], name="weight") # q network evaluation q_t = q_func(obs_t_input.get(), num_actions, scope="q_func", reuse=True) # reuse parameters from act q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/q_func") # target q network evalution q_tp1 = q_func(obs_tp1_input.get(), num_actions, scope="target_q_func") target_q_func_vars = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/target_q_func") # q scores for actions which we know were selected in the given state. q_t_selected = tf.reduce_sum(q_t * tf.one_hot(act_t_ph, num_actions), 1) # compute estimate of best possible value starting from state at t + 1 if double_q: q_tp1_using_online_net = q_func(obs_tp1_input.get(), num_actions, scope="q_func", reuse=True) q_tp1_best_using_online_net = tf.argmax(q_tp1_using_online_net, 1) q_tp1_best = tf.reduce_sum( q_tp1 * tf.one_hot(q_tp1_best_using_online_net, num_actions), 1) else: q_tp1_best = tf.reduce_max(q_tp1, 1) q_tp1_best_masked = (1.0 - done_mask_ph) * q_tp1_best # compute RHS of bellman equation q_t_selected_target = rew_t_ph + gamma * q_tp1_best_masked # compute the error (potentially clipped) td_error = q_t_selected - tf.stop_gradient(q_t_selected_target) errors = tf_utils.huber_loss(td_error) weighted_error = tf.reduce_mean(importance_weights_ph * errors) # compute optimization op (potentially with gradient clipping) if grad_norm_clipping is not None: gradients = optimizer.compute_gradients(weighted_error, var_list=q_func_vars) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, grad_norm_clipping), var) optimize_expr = optimizer.apply_gradients(gradients) else: optimize_expr = optimizer.minimize(weighted_error, var_list=q_func_vars) # update_target_fn will be called periodically to copy Q network to target Q network update_target_expr = [] for var, var_target in zip( sorted(q_func_vars, key=lambda v: v.name), sorted(target_q_func_vars, key=lambda v: v.name)): update_target_expr.append(var_target.assign(var)) update_target_expr = tf.group(*update_target_expr) # Create callable functions train = tf_utils.function(inputs=[ obs_t_input, act_t_ph, rew_t_ph, obs_tp1_input, done_mask_ph, importance_weights_ph ], outputs=td_error, updates=[optimize_expr]) update_target = tf_utils.function([], [], updates=[update_target_expr]) q_values = tf_utils.function([obs_t_input], q_t) return act_f, train, update_target, {'q_values': q_values}
def __init__(self, is_training, batch_size, num_steps): self.batch_size = batch_size self.num_steps = num_steps ## 定义输入层。 self.input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self.targets = tf.placeholder(tf.int32, [batch_size, num_steps]) ## 定义使用LSTM结构及训练时使用dropout。 lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(HIDDEN_SIZE) if is_training: # 训练集需要进行dropout lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=KEEP_PROB) cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * NUM_LAYERS) '''Create a RNN cell composed sequentially of a number of RNNCells''' # 初始化最初的状态。 self.initial_state = cell.zero_state(batch_size, tf.float32) embedding = tf.get_variable(name = "embedding", shape = [VOCAB_SIZE, HIDDEN_SIZE]) ''' get_variable():由于tf.Variable() 每次都在创建新对象,所有reuse=True 和它并没有什么关系。对于get_variable(),来说, 如果已经创建的变量对象,就把那个对象返回,如果没有创建变量对象的话,就创建一个新的。 ''' # 将原本单词ID转为单词向量。 inputs = tf.nn.embedding_lookup(embedding, self.input_data) if is_training: inputs = tf.nn.dropout(inputs, KEEP_PROB) # 定义输出列表。 outputs = [] state = self.initial_state with tf.variable_scope("RNN"): '''所以要共享变量,需要使用tf.variable_scope()''' for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() cell_output, state = cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE]) weight = tf.get_variable("weight", [HIDDEN_SIZE, VOCAB_SIZE]) bias = tf.get_variable("bias", [VOCAB_SIZE]) logits = tf.matmul(output, weight) + bias # 定义交叉熵损失函数和平均损失。 loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self.targets, [-1])], [tf.ones([batch_size * num_steps], dtype=tf.float32)]) self.cost = tf.reduce_sum(loss) / batch_size self.final_state = state # 只在训练模型时定义反向传播操作。 if not is_training: return trainable_variables = tf.trainable_variables() # 控制梯度大小,定义优化方法和训练步骤。 grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, trainable_variables), MAX_GRAD_NORM) optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE) self.train_op = optimizer.apply_gradients(zip(grads, trainable_variables))
def new_function(*args, **kwargs): print('get variable:', '/'.join( (tf.get_variable_scope().name, args[0]))) result = original_function(*args, **kwargs) return result
def train_mnist_multitower(data_dir, num_epochs, num_towers, use_fake_data=True, devices=None): """Train a ConvNet on MNIST. Training data is split equally among the towers. Each tower computes loss on its own batch of data and the loss is aggregated on the CPU. The model variables are placed on first tower. The covariance and inverse update ops and variables are placed on GPUs in a round robin manner. Args: data_dir: string. Directory to read MNIST examples from. num_epochs: int. Number of passes to make over the training set. num_towers: int. Number of CPUs to split inference across. use_fake_data: bool. If True, generate a synthetic dataset. devices: string, Either list of CPU or GPU. The covariance and inverse update ops are run on this device. Returns: accuracy of model on the final minibatch of training data. """ if devices: device_count = {"GPU": num_towers} else: device_count = {"CPU": num_towers} devices = devices or [ "/cpu:{}".format(tower_id) for tower_id in range(num_towers) ] # Load a dataset. tf.logging.info("Loading MNIST into memory.") tower_batch_size = 128 batch_size = tower_batch_size * num_towers tf.logging.info( ("Loading MNIST into memory. Using batch_size = %d = %d towers * %d " "tower batch size.") % (batch_size, num_towers, tower_batch_size)) examples, labels = mnist.load_mnist( data_dir, num_epochs=num_epochs, batch_size=batch_size, use_fake_data=use_fake_data, flatten_images=False) # Split minibatch across towers. examples = tf.split(examples, num_towers) labels = tf.split(labels, num_towers) # Build an MLP. Each tower's layers will be added to the LayerCollection. layer_collection = lc.LayerCollection() tower_results = [] for tower_id in range(num_towers): with tf.device(devices[tower_id]): with tf.name_scope("tower%d" % tower_id): with tf.variable_scope(tf.get_variable_scope(), reuse=(tower_id > 0)): tf.logging.info("Building tower %d." % tower_id) tower_results.append( build_model(examples[tower_id], labels[tower_id], 10, layer_collection)) losses, accuracies = zip(*tower_results) # Average across towers. loss = tf.reduce_mean(losses) accuracy = tf.reduce_mean(accuracies) # Fit model. session_config = tf.ConfigProto( allow_soft_placement=False, device_count=device_count, ) g_step = tf.train.get_or_create_global_step() optimizer = opt.KfacOptimizer( learning_rate=0.0001, cov_ema_decay=0.95, damping=0.001, layer_collection=layer_collection, placement_strategy="round_robin", cov_devices=devices, inv_devices=devices, momentum=0.9) (cov_update_thunks, inv_update_thunks) = optimizer.make_vars_and_create_op_thunks() def make_update_op(update_thunks): update_ops = [thunk() for thunk in update_thunks] return tf.group(*update_ops) cov_update_op = make_update_op(cov_update_thunks) with tf.control_dependencies([cov_update_op]): inverse_op = tf.cond( tf.equal(tf.mod(g_step, _INVERT_EVERY), 0), lambda: make_update_op(inv_update_thunks), tf.no_op) with tf.control_dependencies([inverse_op]): train_op = optimizer.minimize(loss, global_step=g_step) tf.logging.info("Starting training.") with tf.train.MonitoredTrainingSession(config=session_config) as sess: while not sess.should_stop(): global_step_, loss_, accuracy_, _ = sess.run( [g_step, loss, accuracy, train_op]) if global_step_ % _INVERT_EVERY == 0: tf.logging.info("global_step: %d | loss: %f | accuracy: %s", global_step_, loss_, accuracy_)
def main(self): with tf.Graph().as_default() as graph, tf.device('/cpu:0'): num_gpu = len(cfgs.GPU_GROUP.strip().split(',')) global_step = slim.get_or_create_global_step() lr = self.warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) r2cnn = build_whole_network.DetectionNetworkR2CNN(cfgs=self.cfgs, is_training=True) with tf.name_scope('get_batch'): if cfgs.IMAGE_PYRAMID: shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN) shortside_len = tf.random_shuffle(shortside_len_list)[0] else: shortside_len = cfgs.IMG_SHORT_SIDE_LEN img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ self.reader.next_batch(dataset_name=cfgs.DATASET_NAME, batch_size=cfgs.BATCH_SIZE * num_gpu, shortside_len=shortside_len, is_training=True) # data processing inputs_list = [] for i in range(num_gpu): img = tf.expand_dims(img_batch[i], axis=0) pretrain_zoo = PretrainModelZoo() if self.cfgs.NET_NAME in pretrain_zoo.pth_zoo or self.cfgs.NET_NAME in pretrain_zoo.mxnet_zoo: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label_r = tf.py_func( backward_convert, inp=[gtboxes_and_label_batch[i]], Tout=tf.float32) gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6]) gtboxes_and_label_h = get_horizen_minAreaRectangle( gtboxes_and_label_batch[i]) gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5]) num_objects = num_objects_batch[i] num_objects = tf.cast(tf.reshape(num_objects, [ -1, ]), tf.float32) img_h = img_h_batch[i] img_w = img_w_batch[i] inputs_list.append([ img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects, img_h, img_w ]) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer( cfgs.WEIGHT_DECAY) with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope( [ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf. constant_initializer(0.0)): gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func( self.get_gtboxes_and_label, inp=[ inputs_list[i][1], inputs_list[i][2], inputs_list[i][3] ], Tout=[tf.float32, tf.float32]) gtboxes_and_label_h = tf.reshape( gtboxes_and_label_h, [-1, 5]) gtboxes_and_label_r = tf.reshape( gtboxes_and_label_r, [-1, 6]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] img = tf.image.crop_to_bounding_box( image=img, offset_height=0, offset_width=0, target_height=tf.cast( img_shape[0], tf.int32), target_width=tf.cast( img_shape[1], tf.int32)) outputs = r2cnn.build_whole_detection_network( input_img_batch=img, gtboxes_batch_h=gtboxes_and_label_h, gtboxes_batch_r=gtboxes_and_label_r, gpu_id=i) gtboxes_in_img_h = self.drawer.draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_h[:, :-1], labels=gtboxes_and_label_h[:, -1], method=0) gtboxes_in_img_r = self.drawer.draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_r[:, :-1], labels=gtboxes_and_label_r[:, -1], method=1) tf.summary.image( 'Compare/gtboxes_h_gpu:%d' % i, gtboxes_in_img_h) tf.summary.image( 'Compare/gtboxes_r_gpu:%d' % i, gtboxes_in_img_r) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = self.drawer.draw_boxes_with_categories_and_scores( img_batch=img, boxes=outputs[0], scores=outputs[1], labels=outputs[2], method=1) tf.summary.image( 'Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_loss_dict, total_losses = self.loss_dict( loss_dict, num_gpu) if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n( regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) if cfgs.GRADIENT_CLIPPING_BY_NORM is not None: grads = slim.learning.clip_gradient_norms( grads, cfgs.GRADIENT_CLIPPING_BY_NORM) tower_grads.append(grads) self.log_printer(r2cnn, optimizer, global_step, tower_grads, total_loss_dict, num_gpu, graph)
def __build_model_and_summary(self): # split the tensors with tf.variable_scope("tower_split"), tf.device("/cpu:0"): # splitted tensors ts_inputs = tf.split(self.t_inputs, self.num_gpu, 1) ts_lstm_initial_state = tf.split(self.t_lstm_initial_state, self.num_gpu, 2) ts_initial_poses = tf.split(self.t_initial_poses, self.num_gpu, 0) ts_imu_data = tf.split(self.t_imu_data, self.num_gpu, 1) ts_ekf_initial_state = tf.split(self.t_ekf_initial_state, self.num_gpu, 0) ts_ekf_initial_covar = tf.split(self.t_ekf_initial_covariance, self.num_gpu, 0) ts_se3_labels = tf.split(self.t_se3_labels, self.num_gpu, 1) ts_fc_labels = tf.split(self.t_fc_labels, self.num_gpu, 1) # list to store results ts_ekf_states = [] ts_ekf_covar_states = [] ts_lstm_states = [] losses_keys = [ "se3_loss", "se3_xyz_loss", "se3_quat_loss", "fc_loss", "fc_xyz_loss", "fc_ypr_loss", "x_loss", "y_loss", "z_loss", "total_loss" ] ts_losses_dict = dict( zip(losses_keys, [[] for i in range(len(losses_keys))])) for i in range(0, self.num_gpu): device_setter = tf.train.replica_device_setter( ps_tasks=1, ps_device='/job:localhost/replica:0/task:0/device:CPU:0', worker_device='/job:localhost/replica:0/task:0/device:GPU:%d' % i) with tf.name_scope("tower_%d" % i), tf.device(device_setter): tools.printf("Building model...") fc_outputs, fc_covar, se3_outputs, lstm_states, ekf_states, ekf_covar_states = \ model.build_seq_model(self.cfg, ts_inputs[i], ts_lstm_initial_state[i], ts_initial_poses[i], ts_imu_data[i], ts_ekf_initial_state[i], ts_ekf_initial_covar[i], self.t_is_training, get_activations=True, use_initializer=self.t_use_initializer) # this returns lstm states as a tuple, we need to stack them lstm_states = tf.stack(lstm_states, 0) ts_lstm_states.append(lstm_states) ts_ekf_states.append(ekf_states) ts_ekf_covar_states.append(ekf_covar_states) with tf.variable_scope("loss"): se3_loss, se3_xyz_loss, se3_quat_loss \ = losses.se3_losses(se3_outputs, ts_se3_labels[i], self.cfg.k_se3) fc_loss, fc_xyz_loss, fc_ypr_loss, x_loss, y_loss, z_loss \ = losses.fc_losses(fc_outputs, fc_covar, ts_fc_labels[i], self.cfg.k_fc) total_loss = ( 1 - self.t_alpha) * se3_loss + self.t_alpha * fc_loss for k, v in ts_losses_dict.items(): v.append(locals()[k]) tf.get_variable_scope().reuse_variables() with tf.variable_scope("tower_join"), tf.device("/cpu:0"): # join the lstm states self.t_lstm_states = tf.concat(ts_lstm_states, 2) for k, v in ts_losses_dict.items(): ts_losses_dict[k] = tf.reduce_mean(v) self.t_ekf_states = tf.concat(ts_ekf_states, 1) self.t_ekf_covar_states = tf.concat(ts_ekf_covar_states, 1) self.t_total_loss = ts_losses_dict["total_loss"] self.t_se3_loss = ts_losses_dict["se3_loss"] tools.printf("Building optimizer...") with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE): if self.cfg.use_init and self.cfg.only_train_init: train_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "initializer_layer") else: train_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES) self.op_trainer = tf.train.AdamOptimizer(learning_rate=self.t_lr). \ minimize(self.t_total_loss, colocate_gradients_with_ops=True, var_list=train_vars) # tensorboard summaries tools.printf("Building tensorboard summaries...") with tf.device("/cpu:0"): self.t_sequence_id = tf.placeholder(dtype=tf.uint8, shape=[]) self.t_epoch = tf.placeholder(dtype=tf.int32, shape=[]) tf.summary.scalar("total_loss", ts_losses_dict["total_loss"]) tf.summary.scalar("fc_loss", ts_losses_dict["fc_loss"]) tf.summary.scalar("se3_loss", ts_losses_dict["se3_loss"]) tf.summary.scalar("fc_xyz_loss", ts_losses_dict["fc_xyz_loss"]) tf.summary.scalar("fc_ypr_loss", ts_losses_dict["fc_ypr_loss"]) tf.summary.scalar("se3_xyz_loss", ts_losses_dict["se3_xyz_loss"]) tf.summary.scalar("se3_quat_loss", ts_losses_dict["se3_quat_loss"]) tf.summary.scalar("x_loss", ts_losses_dict["x_loss"]) tf.summary.scalar("y_loss", ts_losses_dict["y_loss"]) tf.summary.scalar("z_loss", ts_losses_dict["z_loss"]) tf.summary.scalar("alpha", self.t_alpha) tf.summary.scalar("lr", self.t_lr) tf.summary.scalar("sequence_id", self.t_sequence_id) tf.summary.scalar("epoch", self.t_epoch) self.op_train_merged_summary = tf.summary.merge_all() activations = tf.get_collection(tf.GraphKeys.ACTIVATIONS) initial_layer = tf.summary.image( "1st layer activations", tf.expand_dims(activations[0][:, 0, :, :], -1)) final_layer = tf.summary.image( "Last layer activations", tf.expand_dims(activations[1][:, 0, :, :], -1)) self.op_train_image_summary = tf.summary.merge( [initial_layer, final_layer]) val_loss_sum = tf.summary.scalar("val_total_loss", ts_losses_dict["total_loss"]) val_fc_sum = tf.summary.scalar("val_fc_losses", ts_losses_dict["fc_loss"]) val_se3_sum = tf.summary.scalar("val_se3_losses", ts_losses_dict["se3_loss"]) val_z_sum = tf.summary.scalar("val_z_loss", ts_losses_dict["z_loss"]) self.op_val_merged_summary = tf.summary.merge( [val_loss_sum, val_fc_sum, val_se3_sum, val_z_sum])
val_ds = val_ds.map(lambda x: load_four(x)).repeat(57).batch(2 * BATCH_SIZE) print(train_ds) print(len(train_arr)) print(val_ds) print(len(val_arr)) iterator = tf.data.Iterator.from_structure(train_ds.output_types, train_ds.output_shapes) img_no_shadow, img_with_shadow, input_pureflash, shadow_mask = iterator.get_next( ) training_init_op = iterator.make_initializer(train_ds) validation_init_op = iterator.make_initializer(val_ds) with tf.variable_scope(tf.get_variable_scope()): gray_pureflash = 0.33 * (input_pureflash[..., 0:1] + input_pureflash[..., 1:2] + input_pureflash[..., 2:3]) # bad_mask = detect_shadow(img_with_shadow, input_pureflash) shadow_mask_layer = UNet_SE(img_with_shadow, output_channel=3, ext='Ref_') # tf.math.sigmoid() no_shadow_layer = UNet_SE(tf.concat([img_with_shadow, shadow_mask_layer], axis=3), ext='Trans_') lossDict["percep_t"] = 0.1 * compute_percep_loss( img_no_shadow, no_shadow_layer, reuse=False) # lossDict["percep_r"]=0.1* tf.reduce_mean(tf.square(shadow_mask-shadow_mask_layer)) lossDict["percep_r"] = 0.1 * compute_percep_loss( shadow_mask, shadow_mask_layer, reuse=True)
def run(args, server): env = new_env(args) if args.alg == 'A3C': trainer = A3C(env, args) elif args.alg == 'Q': trainer = Q(env, args) elif args.alg == 'VPN': print "~~~~~~~~~~~~~~~~~~~~~~VPN IS DEPOLEYED~~~~~~~~~~~~~~~~~~~~~~~~~~~" env_off = new_env(args) env_off.verbose = 0 env_off.reset() trainer = VPN(env, args, env_off=env_off) else: raise ValueError('Invalid algorithm: ' + args.alg) # Variable names that start with "local" are not saved in checkpoints. variables_to_save = [v for v in tf.global_variables() if \ not v.name.startswith("global") and not v.name.startswith("local/target/")] global_variables = [v for v in tf.global_variables() if not v.name.startswith("local")] init_op = tf.variables_initializer(global_variables) init_all_op = tf.global_variables_initializer() saver = FastSaver(variables_to_save, max_to_keep=0) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) logger.info('Trainable vars:') for v in var_list: logger.info(' %s %s', v.name, v.get_shape()) logger.info("Num parameters: %d", trainer.local_network.num_param) def init_fn(ses): logger.info("Initializing all parameters.") ses.run(init_all_op) device = 'gpu' if args.gpu > 0 else 'cpu' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.15) config = tf.ConfigProto(device_filters=["/job:ps", "/job:worker/task:{}/{}:0".format(args.task, device)], gpu_options=gpu_options, allow_soft_placement=True) logdir = os.path.join(args.log, 'train') summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task) logger.info("Events directory: %s_%s", logdir, args.task) sv = tf.train.Supervisor(is_chief=(args.task == 0), logdir=logdir, saver=saver, summary_op=None, init_op=init_op, init_fn=init_fn, summary_writer=summary_writer, ready_op=tf.report_uninitialized_variables(global_variables), global_step=trainer.global_step, save_model_secs=0, save_summaries_secs=30) logger.info( "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. " + "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified.") with sv.managed_session(server.target, config=config) as sess, sess.as_default(): sess.run(trainer.sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) epoch = -1 logger.info("Starting training at step=%d", global_step) while not sv.should_stop() and (not args.max_step or global_step < args.max_step): if args.task == 0 and int(global_step / args.eval_freq) > epoch: epoch = int(global_step / args.eval_freq) filename = os.path.join(args.log, 'e%d' % (epoch)) sv.saver.save(sess, filename) sv.saver.save(sess, os.path.join(args.log, 'latest')) print("Saved to: %s" % filename) trainer.process(sess) global_step = sess.run(trainer.global_step) if args.task == 0 and int(global_step / args.eval_freq) > epoch: epoch = int(global_step / args.eval_freq) filename = os.path.join(args.log, 'e%d' % (epoch)) sv.saver.save(sess, filename) sv.saver.save(sess, os.path.join(args.log, 'latest')) print("Saved to: %s" % filename) # Ask for all the services to stop. sv.stop() logger.info('reached %s steps. worker stopped.', global_step)
def __init__(self, model, data, trainer_agr, optimizer, lr_initial, batch_size, min_num_iter, max_num_iter, num_iter_after_best_val, max_num_iter_cotrain, reg_weight_ll, reg_weight_lu, reg_weight_uu, num_pairs_reg, iter_cotrain, reg_weight_vat=0.0, use_ent_min=False, enable_summaries=False, summary_step=1, summary_dir=None, warm_start=False, gradient_clip=None, logging_step=1, eval_step=1, abs_loss_chg_tol=1e-10, rel_loss_chg_tol=1e-7, loss_chg_iter_below_tol=30, checkpoints_dir=None, weight_decay=None, weight_decay_schedule=None, penalize_neg_agr=False, first_iter_original=True, use_l2_classif=True, seed=None, lr_decay_steps=None, lr_decay_rate=None, use_graph=False): super(TrainerClassificationGCN, self).__init__(model=model, abs_loss_chg_tol=abs_loss_chg_tol, rel_loss_chg_tol=rel_loss_chg_tol, loss_chg_iter_below_tol=loss_chg_iter_below_tol) self.data = data self.trainer_agr = trainer_agr self.batch_size = batch_size self.min_num_iter = min_num_iter self.max_num_iter = max_num_iter self.num_iter_after_best_val = num_iter_after_best_val self.max_num_iter_cotrain = max_num_iter_cotrain self.enable_summaries = enable_summaries self.summary_step = summary_step self.summary_dir = summary_dir self.warm_start = warm_start self.gradient_clip = gradient_clip self.logging_step = logging_step self.eval_step = eval_step self.checkpoint_path = (os.path.join(checkpoints_dir, 'classif_best.ckpt') if checkpoints_dir is not None else None) self.weight_decay_initial = weight_decay self.weight_decay_schedule = weight_decay_schedule self.num_pairs_reg = num_pairs_reg self.reg_weight_ll = reg_weight_ll self.reg_weight_lu = reg_weight_lu self.reg_weight_uu = reg_weight_uu self.reg_weight_vat = reg_weight_vat self.use_ent_min = use_ent_min self.penalize_neg_agr = penalize_neg_agr self.use_l2_classif = use_l2_classif self.first_iter_original = first_iter_original self.iter_cotrain = iter_cotrain self.lr_initial = lr_initial self.lr_decay_steps = lr_decay_steps self.lr_decay_rate = lr_decay_rate self.use_graph = use_graph # Build TensorFlow graph. logging.info('Building classification TensorFlow graph...') # Create placeholders. input_indices = tf.placeholder(tf.int64, shape=(None, ), name='input_indices') input_indices_unlabeled = tf.placeholder( tf.int32, shape=(None, ), name='input_indices_unlabeled') input_labels = tf.placeholder(tf.int64, shape=(None, ), name='input_labels') # Create a placeholder specifying if this is train time. is_train = tf.placeholder_with_default(False, shape=[], name='is_train') # Create some placeholders specific to GCN. self.support_op = tf.sparse_placeholder(tf.float32, name='support') self.features_op = tf.sparse_placeholder(tf.float32, name='features') self.num_features_nonzero_op = tf.placeholder( tf.int32, name='num_features_nonzero') # Save the data required to fill in these placeholders. We don't add them # directly in the graph as constants in order to avoid saving large # checkpoints. self.support = data.support self.features = data.dataset.features_sparse self.num_features_nonzero = data.num_features_nonzero # Create variables and predictions. with tf.variable_scope('predictions'): encoding, variables_enc, reg_params_enc = ( self.model.get_encoding_and_params( inputs=self.features_op, is_train=is_train, support=self.support_op, num_features_nonzero=self.num_features_nonzero_op)) self.variables = variables_enc self.reg_params = reg_params_enc predictions, variables_pred, reg_params_pred = ( self.model.get_predictions_and_params( encoding=encoding, is_train=is_train, support=self.support_op, num_features_nonzero=self.num_features_nonzero_op)) self.variables.update(variables_pred) self.reg_params.update(reg_params_pred) normalized_predictions = self.model.normalize_predictions( predictions) predictions_var_scope = tf.get_variable_scope() predictions_batch = tf.gather(predictions, input_indices, axis=0) normalized_predictions_batch = tf.gather(normalized_predictions, input_indices, axis=0) one_hot_labels = tf.one_hot(input_labels, data.num_classes, name='targets_one_hot') # Create a variable for weight decay that may be updated. weight_decay_var, weight_decay_update = self._create_weight_decay_var( weight_decay, weight_decay_schedule) # Create counter for classification iterations. iter_cls_total, iter_cls_total_update = self._create_counter() # Create loss. with tf.name_scope('loss'): if self.use_l2_classif: loss_supervised = tf.square(one_hot_labels - normalized_predictions_batch) loss_supervised = tf.reduce_sum(loss_supervised, axis=-1) loss_supervised = tf.reduce_mean(loss_supervised) else: loss_supervised = self.model.get_loss( predictions=predictions_batch, targets=one_hot_labels, weight_decay=None) # Agreement regularization loss. loss_agr = self._get_agreement_reg_loss(data, is_train) # If the first co-train iteration trains the original model (for # comparison purposes), then we do not add an agreement loss. if self.first_iter_original: loss_agr_weight = tf.cast(tf.greater(iter_cotrain, 0), tf.float32) loss_agr = loss_agr * loss_agr_weight # Weight decay loss. loss_reg = 0.0 if weight_decay_var is not None: for var in self.reg_params.values(): loss_reg += weight_decay_var * tf.nn.l2_loss(var) # Adversarial loss, in case we want to add VAT on top of GAM. ones = tf.fill(tf.shape(input_indices_unlabeled), 1.0) unlabeled_mask = tf.scatter_nd(input_indices_unlabeled[:, None], updates=ones, shape=[ data.num_samples, ], name='unlabeled_mask') placeholders = { 'support': self.support_op, 'num_features_nonzero': self.num_features_nonzero_op } loss_vat = get_loss_vat( inputs=self.features_op, predictions=predictions, mask=unlabeled_mask, is_train=is_train, model=model, placeholders=placeholders, predictions_var_scope=predictions_var_scope) num_unlabeled = tf.shape(input_indices_unlabeled)[0] loss_vat = tf.cond(tf.greater(num_unlabeled, 0), lambda: loss_vat, lambda: 0.0) if self.use_ent_min: # Use entropy minimization with VAT (i.e. VATENT). loss_ent = entropy_y_x(predictions, unlabeled_mask) loss_vat = loss_vat + tf.cond(tf.greater(num_unlabeled, 0), lambda: loss_ent, lambda: 0.0) loss_vat = loss_vat * self.reg_weight_vat if self.first_iter_original: # Do not add the adversarial loss in the first iteration if # the first iteration trains the plain baseline model. weight_loss_vat = tf.cond(tf.greater(iter_cotrain, 0), lambda: 1.0, lambda: 0.0) loss_vat = loss_vat * weight_loss_vat # Total loss. loss_op = loss_supervised + loss_agr + loss_reg + loss_vat # Create accuracy. accuracy = tf.equal(tf.argmax(normalized_predictions_batch, 1), input_labels) accuracy = tf.reduce_mean(tf.cast(accuracy, tf.float32)) # Create Tensorboard summaries. if self.enable_summaries: summaries = [ tf.summary.scalar('loss_supervised', loss_supervised), tf.summary.scalar('loss_agr', loss_agr), tf.summary.scalar('loss_reg', loss_reg), tf.summary.scalar('loss_total', loss_op) ] self.summary_op = tf.summary.merge(summaries) # Create learning rate schedule and optimizer. self.global_step = tf.train.get_or_create_global_step() if self.lr_decay_steps is not None and self.lr_decay_rate is not None: self.lr = tf.train.exponential_decay(self.lr_initial, self.global_step, self.lr_decay_steps, self.lr_decay_rate, staircase=True) self.optimizer = optimizer(self.lr) else: self.optimizer = optimizer(lr_initial) # Get trainable variables and compute gradients. grads_and_vars = self.optimizer.compute_gradients( loss_op, tf.trainable_variables( scope=tf.get_default_graph().get_name_scope())) # Clip gradients. if self.gradient_clip: variab = [elem[1] for elem in grads_and_vars] gradients = [elem[0] for elem in grads_and_vars] gradients, _ = tf.clip_by_global_norm(gradients, self.gradient_clip) grads_and_vars = tuple(zip(gradients, variab)) with tf.control_dependencies( tf.get_collection( tf.GraphKeys.UPDATE_OPS, scope=tf.get_default_graph().get_name_scope())): train_op = self.optimizer.apply_gradients( grads_and_vars, global_step=self.global_step) # Create a saver for model variables. trainable_vars = [v for _, v in grads_and_vars] # Put together the subset of variables to save and restore from the best # validation accuracy as we train the agreement model in one cotrain round. vars_to_save = trainable_vars + [] if isinstance(weight_decay_var, tf.Variable): vars_to_save.append(weight_decay_var) saver = tf.train.Saver(vars_to_save) # Put together all variables that need to be saved in case the process is # interrupted and needs to be restarted. self.vars_to_save = [iter_cls_total, self.global_step] if isinstance(weight_decay_var, tf.Variable): self.vars_to_save.append(weight_decay_var) if self.warm_start: self.vars_to_save.extend([v for v in self.variables]) # More variables to be initialized after the session is created. self.is_initialized = False self.rng = np.random.RandomState(seed) self.input_indices = input_indices self.input_indices_unlabeled = input_indices_unlabeled self.input_labels = input_labels self.predictions = predictions self.normalized_predictions = normalized_predictions self.normalized_predictions_batch = normalized_predictions_batch self.weight_decay_var = weight_decay_var self.weight_decay_update = weight_decay_update self.iter_cls_total = iter_cls_total self.iter_cls_total_update = iter_cls_total_update self.accuracy = accuracy self.train_op = train_op self.loss_op = loss_op self.saver = saver self.batch_size_actual = tf.shape(self.predictions)[0] self.reset_optimizer = tf.variables_initializer( self.optimizer.variables()) self.is_train = is_train
def __init__(self, is_training, config): """ :param is_training: 是否要进行训练.如果is_training=False,则不会进行参数的修正。 """ self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size vocab_size = config.vocab_size self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) # 输入 self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) # 预期输出,两者都是index序列,长度为num_step # Slightly better results can be obtained with forget gate biases # initialized to 1 but the hyperparameters of the model would need to be # different than reported in the paper. lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=True) if is_training and config.keep_prob < 1: # 在外面包裹一层dropout lstm_cell = tf.nn.rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers, state_is_tuple=True) # 多层lstm cell 堆叠起来 self._initial_state = cell.zero_state(batch_size, data_type()) # 参数初始化,rnn_cell.RNNCell.zero_state with tf.device("/cpu:0"): embedding = tf.get_variable( "embedding", [vocab_size, size], dtype=data_type()) # vocab size * hidden size, 将单词转成embedding描述 # 将输入seq用embedding表示, shape=[batch, steps, hidden_size] inputs = tf.nn.embedding_lookup(embedding, self._input_data) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) # Simplified version of tensorflow.models.rnn.rnn.py's rnn(). # This builds an unrolled LSTM for tutorial purposes only. # In general, use the rnn() or state_saving_rnn() from rnn.py. # # The alternative version of the code below is: # # inputs = [tf.squeeze(input_, [1]) # for input_ in tf.split(1, num_steps, inputs)] # outputs, state = tf.nn.rnn(cell, inputs, initial_state=self._initial_state) outputs = [] state = self._initial_state # state 表示 各个batch中的状态 with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() # cell_out: [batch, hidden_size] (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) # output: shape[num_steps][batch,hidden_size] # 把之前的list展开,成[batch, hidden_size*num_steps],然后 reshape, 成[batch*numsteps, hidden_size] output = tf.reshape(tf.concat(outputs, 1), [-1, size]) # softmax_w , shape=[hidden_size, vocab_size], 用于将distributed表示的单词转化为one-hot表示 softmax_w = tf.get_variable( "softmax_w", [size, vocab_size], dtype=data_type()) softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type()) # [batch*numsteps, vocab_size] 从隐藏语义转化成完全表示 logits = tf.matmul(output, softmax_w) + softmax_b # loss , shape=[batch*num_steps] # 带权重的交叉熵计算 loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( [logits], # output [batch*numsteps, vocab_size] [tf.reshape(self._targets, [-1])], # target, [batch_size, num_steps] 然后展开成一维【列表】 [tf.ones([batch_size * num_steps], dtype=data_type())]) # weight self._cost = cost = tf.reduce_sum(loss) / batch_size # 计算得到平均每批batch的误差 self._final_state = state if not is_training: # 如果没有训练,则不需要更新state的值。 return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() # clip_by_global_norm: 梯度衰减,具体算法为t_list[i] * clip_norm / max(global_norm, clip_norm) # 这里gradients求导,ys和xs都是张量 # 返回一个长为len(xs)的张量,其中的每个元素都是\grad{\frac{dy}{dx}} # clip_by_global_norm 用于控制梯度膨胀,前两个参数t_list, global_norm, 则 # t_list[i] * clip_norm / max(global_norm, clip_norm) # 其中 global_norm = sqrt(sum([l2norm(t)**2 for t in t_list])) grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) # 梯度下降优化,指定学习速率 optimizer = tf.train.GradientDescentOptimizer(self._lr) # optimizer = tf.train.AdamOptimizer() # optimizer = tf.train.GradientDescentOptimizer(0.5) self._train_op = optimizer.apply_gradients(zip(grads, tvars)) # 将梯度应用于变量 self._new_lr = tf.placeholder( tf.float32, shape=[], name="new_learning_rate") # 用于外部向graph输入新的 lr值 self._lr_update = tf.assign(self._lr, self._new_lr) # 使用new_lr来更新lr的值
def __init__(self, scope, trainer, global_step=None): with tf.variable_scope(scope): self.inputs = tf.placeholder(shape=[ None, FLAGS.game_size, FLAGS.game_size, FLAGS.game_channels ], dtype=tf.float32, name="Inputs") self.conv = tf.contrib.layers.conv2d(self.inputs, 32, 5, 2, activation_fn=tf.nn.elu, scope="conv1") self.image_summaries = [] with tf.variable_scope('conv1'): tf.get_variable_scope().reuse_variables() weights = tf.get_variable('weights') grid = self.put_kernels_on_grid(weights) self.image_summaries.append( tf.summary.image('kernels', grid, max_outputs=1)) with tf.variable_scope('inputs'): tf.get_variable_scope().reuse_variables() self.image_summaries.append( tf.summary.image('input', self.inputs, max_outputs=1)) self.fc = tf.contrib.layers.fully_connected( tf.contrib.layers.flatten(self.conv), 64) # self.conv = tf.contrib.layers.layer_norm(self.conv) self.elu = tf.nn.elu(self.fc) summary_conv_act = tf.contrib.layers.summarize_activation(self.elu) if FLAGS.meta: self.timestep = tf.placeholder(shape=[None, 1], dtype=tf.float32, name="timestep") self.prev_rewards = tf.placeholder(shape=[None], dtype=tf.int32, name="Prev_Rewards") self.prev_rewards_onehot = tf.one_hot( self.prev_rewards, 2, dtype=tf.float32, name="Prev_Rewards_OneHot") self.prev_actions = tf.placeholder(shape=[None], dtype=tf.int32, name="Prev_Actions") self.prev_actions_onehot = tf.one_hot( self.prev_actions, FLAGS.nb_actions, dtype=tf.float32, name="Prev_Actions_OneHot") if FLAGS.one_hot_reward: hidden = tf.concat([ self.elu, self.prev_rewards_onehot, self.prev_actions_onehot ], 1, name="Concatenated_input") else: hidden = tf.concat([ self.elu, self.prev_rewards, self.prev_actions_onehot, self.timestep ], 1, name="Concatenated_input") else: hidden = self.elu summary_hidden_act = tf.contrib.layers.summarize_activation(hidden) rnn_in = tf.expand_dims(hidden, [0], name="RNN_input") step_size = tf.shape(self.inputs)[:1] if FLAGS.fw: rnn_cell = LayerNormFastWeightsBasicRNNCell(48) # self.initial_state = rnn_cell.zero_state(tf.shape(self.inputs)[0], tf.float32) # self.initial_fast_weights = rnn_cell.zero_fast_weights(tf.shape(self.inputs)[0], tf.float32) h_init = np.zeros((1, 48), np.float32) fw_init = np.zeros((1, 48, 48), np.float32) self.state_init = [h_init, fw_init] h_in = tf.placeholder(tf.float32, [1, 48], name="hidden_state") fw_in = tf.placeholder(tf.float32, [1, 48, 48], name="fast_weights") self.state_in = (h_in, fw_in) rnn_outputs, rnn_state = tf.nn.dynamic_rnn( rnn_cell, rnn_in, initial_state=self.state_in, sequence_length=step_size, time_major=False) rnn_h, rnn_fw = rnn_state self.state_out = (rnn_h[:1, :], rnn_fw[:1, :]) rnn_out = tf.reshape(rnn_outputs, [-1, 48], name="RNN_out") else: lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(48) c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c], name="c_in") h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h], name="h_in") self.state_in = (c_in, h_in) state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn( lstm_cell, rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_outputs, [-1, 48], name="RNN_out") summary_rnn_act = tf.contrib.layers.summarize_activation(rnn_out) fc_pol_w = tf.get_variable( "FC_Pol_W", shape=[48, FLAGS.nb_actions], initializer=normalized_columns_initializer(0.01)) self.policy = tf.nn.softmax(tf.matmul(rnn_out, fc_pol_w, name="Policy"), name="Policy_soft") summary_policy_act = tf.contrib.layers.summarize_activation( self.policy) fc_value_w = tf.get_variable( "FC_Value_W", shape=[48, 1], initializer=normalized_columns_initializer(1.0)) self.value = tf.matmul(rnn_out, fc_value_w, name="Value") summary_value_act = tf.contrib.layers.summarize_activation( self.value) if scope != 'global': self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name="Actions") self.actions_onehot = tf.one_hot(self.actions, FLAGS.nb_actions, dtype=tf.float32, name="Actions_Onehot") self.target_v = tf.placeholder(shape=[None], dtype=tf.float32) self.advantages = tf.placeholder(shape=[None], dtype=tf.float32) self.responsible_outputs = tf.reduce_sum( self.policy * self.actions_onehot, [1]) # Loss functions self.value_loss = FLAGS.beta_v * tf.reduce_sum( tf.square(self.target_v - tf.reshape(self.value, [-1]))) self.entropy = -tf.reduce_sum( self.policy * tf.log(self.policy + 1e-7)) # starter_beta_e = 1.0 # end_beta_e = 0.0 # decay_steps = 20000 # self.beta_e = tf.train.polynomial_decay(starter_beta_e, global_step, # decay_steps, end_beta_e, # power=0.5) self.policy_loss = -tf.reduce_sum( tf.log(self.responsible_outputs + 1e-7) * self.advantages) - self.entropy * FLAGS.beta_e self.loss = self.value_loss + self.policy_loss local_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) grads, self.grad_norms = tf.clip_by_global_norm( self.gradients, FLAGS.gradient_clip_value) self.worker_summaries = [ summary_conv_act, summary_hidden_act, summary_rnn_act, summary_policy_act, summary_value_act ] for grad, weight in zip(grads, local_vars): self.worker_summaries.append( tf.summary.histogram(weight.name + '_grad', grad)) self.worker_summaries.append( tf.summary.histogram(weight.name, weight)) self.merged_summary = tf.summary.merge(self.worker_summaries) global_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients( zip(grads, global_vars))
def create_train_op(model_config, inputs, opt, num_gpus=1, histograms=False): with tf.get_default_graph().as_default(), tf.device('/cpu:0'): tower_grads = [] model = None losses = [] total_loss = [] global_step = slim.get_or_create_global_step() with tf.variable_scope(tf.get_variable_scope()): for i in xrange(num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: # Calculate the loss for one tower of the CIFAR model. This function # constructs the entire CIFAR model but shares the variables across # all towers. losses, total_loss, model = tower_loss(model_config, inputs, scope, is_train=True) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(total_loss) # Keep track of the gradients across all towers. tower_grads.append(grads) summaries = [] for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on tensorboard. loss_name = l.op.name loss_summary = tf.summary.scalar(loss_name, l) summaries.append(loss_summary) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add histograms for gradients. if histograms: for grad, var in grads: if grad is not None: summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. if histograms: for var in tf.trainable_variables(): summaries.append(tf.summary.histogram(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) with tf.name_scope('train_op'): # Ensure the train_tensor computes grad_updates. train_op = with_dependencies([train_op], total_loss) return train_op, model, summaries
def run_tester(args, server): env = new_env(args) env.reset() #env.max_history = args.eval_num if args.alg == 'A3C': agent = A3C(env, args) elif args.alg == 'Q': agent = Q(env, args) elif args.alg == 'VPN': agent = VPN(env, args) else: raise ValueError('Invalid algorithm: ' + args.alg) device = 'gpu' if args.gpu > 0 else 'cpu' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.15) config = tf.ConfigProto(device_filters=["/job:ps", "/job:worker/task:{}/{}:0".format(args.task, device)], gpu_options=gpu_options, allow_soft_placement=True) variables_to_save = [v for v in tf.global_variables() if \ not v.name.startswith("global") and not v.name.startswith("local/target/")] global_variables = [v for v in tf.global_variables() if not v.name.startswith("local")] init_op = tf.variables_initializer(global_variables) init_all_op = tf.global_variables_initializer() var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) logger.info('Trainable vars:') for v in var_list: logger.info(' %s %s', v.name, v.get_shape()) logger.info("Num parameters: %d", agent.local_network.num_param) def init_fn(ses): logger.info("Initializing all parameters.") ses.run(init_all_op) saver = FastSaver(variables_to_save, max_to_keep=0) sv = tf.train.Supervisor(is_chief=False, global_step=agent.global_step, summary_op=None, init_op=init_op, init_fn=init_fn, ready_op=tf.report_uninitialized_variables(global_variables), saver=saver, save_model_secs=0, save_summaries_secs=0) best_reward = -10000 with sv.managed_session(server.target, config=config) as sess, sess.as_default(): epoch = args.eval_epoch while args.eval_freq * epoch <= args.max_step: path = os.path.join(args.log, "e%d" % epoch) if not os.path.exists(path + ".index"): time.sleep(10) continue print "In terester run, ~~~~~~~~~~~~~~" logger.info("Start evaluation (Epoch %d)", epoch) saver.restore(sess, path) np.random.seed(args.seed) print "Start evaluation~~~~~~~~~~~~" reward = evaluate(env, agent.local_network, args.eval_num, eps=args.eps_eval) print "finish evaluation~~~~~~~~~~~" logfile = open(os.path.join(args.log, "eval.csv"), "a") print("Epoch: %d, Reward: %.2f" % (epoch, reward)) logfile.write("%d, %.3f\n" % (epoch, reward)) logfile.close() if reward > best_reward: best_reward = reward sv.saver.save(sess, os.path.join(args.log, 'best')) print("Saved to: %s" % os.path.join(args.log, 'best')) epoch += 1 logger.info('tester stopped.')
def build_rnn(self, global_maps, init_particle_states, observations, odometries, is_first_step): """ Unroll the PF-net RNN cell through time. Input arguments are the inputs to PF-net. The time dependent fields are expected to be broken into fixed-length segments defined by params.bptt_steps """ batch_size, trajlen = observations.shape.as_list()[:2] num_particles = init_particle_states.shape.as_list()[1] global_map_ch = global_maps.shape.as_list()[-1] init_particle_weights = tf.constant(np.log(1.0 / float(num_particles)), shape=(batch_size, num_particles), dtype=tf.float32) # create hidden state variable assert len( self.hidden_states) == 0 # no hidden state should be set before self.hidden_states = [ # identify variables tf.get_variable("particle_states", shape=init_particle_states.get_shape(), dtype=init_particle_states.dtype, initializer=tf.constant_initializer(0), trainable=False), tf.get_variable("particle_weights", shape=init_particle_weights.get_shape(), dtype=init_particle_weights.dtype, initializer=tf.constant_initializer(0), trainable=False), ] # choose state for the current trajectory segment state = tf.cond(is_first_step, true_fn=lambda: (init_particle_states, init_particle_weights), false_fn=lambda: tuple(self.hidden_states)) with tf.variable_scope("rnn"): # hack to create variables on GPU dummy_cell_func = PFCell(global_maps=tf.zeros( (1, 1, 1, global_map_ch), dtype=global_maps.dtype), params=self.params, batch_size=1, num_particles=1) dummy_cell_func( ( tf.zeros([1] + observations.get_shape().as_list()[2:], dtype=observations.dtype), # observation tf.zeros([1, 3], dtype=odometries.dtype)), # odometry ( tf.zeros( [1, 1, 3], dtype=init_particle_states.dtype), # particle_states tf.zeros([ 1, 1 ], dtype=init_particle_weights.dtype))) # particle_weights # variables are now created. set reuse tf.get_variable_scope().reuse_variables() # unroll real steps using the variables already created cell_func = PFCell(global_maps=global_maps, params=self.params, batch_size=batch_size, num_particles=num_particles) outputs, state = tf.nn.dynamic_rnn(cell=cell_func, inputs=(observations, odometries), initial_state=state, swap_memory=True, time_major=False, parallel_iterations=1, scope=tf.get_variable_scope()) particle_states, particle_weights = outputs # define an op to update the hidden state, i.e. the particle states and particle weights. # this should be evaluated after every input with tf.control_dependencies([particle_states, particle_weights]): self.update_state_op = tf.group( *(self.hidden_states[i].assign(state[i]) for i in range(len(self.hidden_states)))) return particle_states, particle_weights
def generator(self, z, y, gender, reuse_variables=False, enable_tile_label=True, tile_ratio=1.0): if reuse_variables: tf.get_variable_scope().reuse_variables() num_layers = int(np.log2(self.size_image)) - int(self.size_kernel / 2) if enable_tile_label: duplicate = int(self.num_z_channels * tile_ratio / self.num_categories) else: duplicate = 1 z = concat_label(z, y, duplicate=duplicate) if enable_tile_label: duplicate = int(self.num_z_channels * tile_ratio / 2) else: duplicate = 1 z = concat_label(z, gender, duplicate=duplicate) size_mini_map = int(self.size_image / 2**num_layers) # fc layer name = 'G_fc' current = fc(input_vector=z, num_output_length=self.num_gen_channels * size_mini_map * size_mini_map, name=name) # reshape to cube for deconv current = tf.reshape( current, [-1, size_mini_map, size_mini_map, self.num_gen_channels]) current = tf.nn.relu(current) # deconv layers with stride 2 for i in range(num_layers): name = 'G_deconv' + str(i) current = deconv2d(input_map=current, output_shape=[ self.size_batch, size_mini_map * 2**(i + 1), size_mini_map * 2**(i + 1), int(self.num_gen_channels / 2**(i + 1)) ], size_kernel=self.size_kernel, name=name) current = tf.nn.relu(current) name = 'G_deconv' + str(i + 1) current = deconv2d(input_map=current, output_shape=[ self.size_batch, self.size_image, self.size_image, int(self.num_gen_channels / 2**(i + 2)) ], size_kernel=self.size_kernel, stride=1, name=name) current = tf.nn.relu(current) name = 'G_deconv' + str(i + 2) current = deconv2d(input_map=current, output_shape=[ self.size_batch, self.size_image, self.size_image, self.num_input_channels ], size_kernel=self.size_kernel, stride=1, name=name) # output return tf.nn.tanh(current)
def __init__(self, is_training, config, input_): self._input = input_ batch_size = input_.batch_size num_steps = input_.num_steps size = config.hidden_size vocab_size = config.vocab_size def lstm_cell(): return tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0) attn_cell = lstm_cell if is_training and config.keep_prob < 1: def attn_cell(): return tf.contrib.rnn.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = tf.contrib.rnn.MultiRNNCell( [attn_cell() for _ in range(config.num_layers)]) self._initial_state = cell.zero_state(batch_size, data_type()) # self._initial_state = tf.Print(tf.identity(self._initial_state), # [tf.shape(self._initial_state)], # 'initial_state size:') with tf.device('/cpu:0'): embedding = tf.get_variable('embedding', [vocab_size, size], dtype=data_type()) inputs = tf.nn.embedding_lookup(embedding, input_.input_data) # inputs = tf.Print(tf.identity(inputs), [tf.shape(inputs)], # 'inputs shape: ') if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) #TODO code this using API r1.0 # inputs = tf.unstack(inputs, num=num_steps, axis=1) # outputs, state = tf.nn.dynamic_rnn(cell, inputs) ## how is this # dynamic..? outputs = [] state = self._initial_state with tf.variable_scope('RNN'): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() inputs.set_shape( (batch_size, num_steps, size)) #TODO is# this necessary? (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.concat(outputs, 1), [-1, size]) softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=data_type()) softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type()) logits = tf.matmul(output, softmax_w) + softmax_b loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( [logits], [tf.reshape(input_.targets, [-1])], [tf.ones([batch_size * num_steps], dtype=data_type())]) self._cost = cost = tf.reduce_sum(loss) / batch_size self._final_state = state if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self._lr) self._train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step()) self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) # Calculate the learning rate schedule. num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size) decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE, global_step, decay_steps, cifar10.LEARNING_RATE_DECAY_FACTOR, staircase=True) # Create an optimizer that performs gradient descent. opt = tf.train.GradientDescentOptimizer(lr) # Calculate the gradients for each model tower. tower_grads = [] for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope: # Calculate the loss for one tower of the CIFAR model. This function # constructs the entire CIFAR model but shares the variables across # all towers. loss = tower_loss(scope) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf.scalar_summary('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad: summaries.append( tf.histogram_summary(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.histogram_summary(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation from the last tower summaries. summary_op = tf.merge_summary(summaries) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / FLAGS.num_gpus format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def network(self, inputs, reuse=False): if reuse: tf.get_variable_scope().reuse_variables() with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)): conv1 = slim.conv2d(inputs, 96, [11, 11], 4, padding='VALID', scope='conv1') max1 = slim.max_pool2d(conv1, [3, 3], 2, padding='VALID', scope='max1') conv1a = slim.conv2d(max1, 256, [4, 4], 4, padding='VALID', scope='conv1a') conv2 = slim.conv2d(max1, 256, [5, 5], 1, scope='conv2') max2 = slim.max_pool2d(conv2, [3, 3], 2, padding='VALID', scope='max2') conv3 = slim.conv2d(max2, 384, [3, 3], 1, scope='conv3') conv3a = slim.conv2d(conv3, 256, [2, 2], 2, padding='VALID', scope='conv3a') conv4 = slim.conv2d(conv3, 384, [3, 3], 1, scope='conv4') conv5 = slim.conv2d(conv4, 256, [3, 3], 1, scope='conv5') pool5 = slim.max_pool2d(conv5, [3, 3], 2, padding='VALID', scope='pool5') concat_feat = tf.concat([conv1a, conv3a, pool5], 3) conv_all = slim.conv2d(concat_feat, 192, [1, 1], 1, padding='VALID', scope='conv_all') shape = int(np.prod(conv_all.get_shape()[1:])) fc_full = slim.fully_connected(tf.reshape(conv_all, [-1, shape]), 3072, scope='fc_full') fc_detection = slim.fully_connected(fc_full, 512, scope='fc_detection1') fc_landmarks = slim.fully_connected(fc_full, 512, scope='fc_landmarks1') fc_visibility = slim.fully_connected(fc_full, 512, scope='fc_visibility1') fc_pose = slim.fully_connected(fc_full, 512, scope='fc_pose1') fc_gender = slim.fully_connected(fc_full, 512, scope='fc_gender1') out_detection = slim.fully_connected(fc_detection, 2, scope='fc_detection2', activation_fn=None) out_landmarks = slim.fully_connected(fc_landmarks, 42, scope='fc_landmarks2', activation_fn=None) out_visibility = slim.fully_connected(fc_visibility, 21, scope='fc_visibility2', activation_fn=None) out_pose = slim.fully_connected(fc_pose, 3, scope='fc_pose2', activation_fn=None) out_gender = slim.fully_connected(fc_gender, 2, scope='fc_gender2', activation_fn=None) return [ out_detection, out_landmarks, out_visibility, out_pose, out_gender ]
def compute_cell_dynamics(args): with tf.Graph().as_default(): # You can change this around, but make sure to reset it to 41 when # submitting. np.random.seed(41) tf.set_random_seed(41) with tf.variable_scope("dynamics"): x_placeholder = tf.placeholder(tf.float32, shape=(None, 1)) h_placeholder = tf.placeholder(tf.float32, shape=(None, 1)) def mat(x): return np.atleast_2d(np.array(x, dtype=np.float32)) def vec(x): return np.atleast_1d(np.array(x, dtype=np.float32)) with tf.variable_scope("cell"): Ur, Wr, Uz, Wz, Uo, Wo = [ mat(3 * x) for x in np.random.randn(6) ] br, bz, bo = [vec(x) for x in np.random.randn(3)] params = [Ur, Wr, br, Uz, Wz, bz, Uo, Wo, bo] tf.get_variable("W_r", initializer=Wr) tf.get_variable("U_r", initializer=Ur) tf.get_variable("b_r", initializer=br) tf.get_variable("W_z", initializer=Wz) tf.get_variable("U_z", initializer=Uz) tf.get_variable("b_z", initializer=bz) tf.get_variable("W_o", initializer=Wo) tf.get_variable("U_o", initializer=Uo) tf.get_variable("b_o", initializer=bo) tf.get_variable_scope().reuse_variables() y_gru, h_gru = GRUCell(1, 1)(x_placeholder, h_placeholder, scope="cell") y_rnn, h_rnn = GRUCell(1, 1)(x_placeholder, h_placeholder, scope="cell") init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) x = mat(np.zeros(1000)).T h = mat(np.linspace(-3, 3, 1000)).T ht_gru = session.run([h_gru], feed_dict={ x_placeholder: x, h_placeholder: h }) ht_rnn = session.run([h_rnn], feed_dict={ x_placeholder: x, h_placeholder: h }) ht_gru = np.array(ht_gru)[0] ht_rnn = np.array(ht_rnn)[0] make_dynamics_plot(args, 0, h, ht_rnn, ht_gru, params) x = mat(np.ones(1000)).T h = mat(np.linspace(-3, 3, 1000)).T ht_gru = session.run([h_gru], feed_dict={ x_placeholder: x, h_placeholder: h }) ht_rnn = session.run([h_rnn], feed_dict={ x_placeholder: x, h_placeholder: h }) ht_gru = np.array(ht_gru)[0] ht_rnn = np.array(ht_rnn)[0] make_dynamics_plot(args, 1, h, ht_rnn, ht_gru, params)
def __init__(self, inputs, labels, bptt_steps=64, context_size=16, dim=(1024, 1024), q_levels=256, batch_size=64, n_rnn=3, n_mlp=3, generator=False): self.bptt_steps = bptt_steps self.context_size = context_size self.dim = dim self.q_levels = q_levels self.batch_size = batch_size self.n_rnn = n_rnn self.n_mlp = n_mlp self.mean = (q_levels - 1) / 2. # function to bound the initialisation weights def w_b(n_in, n_out): return (2. / (n_in + n_out))**0.5 # dictionary of weights and biases for fully connected layers with tf.variable_scope('sample_rnn') as scope: if generator == True: tf.get_variable_scope().reuse_variables() self.W = { 'samp': tf.get_variable( 'samp/W', [dim[0], dim[0]], tf.float32, tf.random_uniform_initializer(-w_b(dim[0], dim[0]), w_b(dim[0], dim[0]))), 'mlp0': tf.get_variable( 'mlp0/W', [2 * context_size, dim[1]], tf.float32, tf.random_uniform_initializer( -w_b(2 * context_size, dim[1]), w_b(2 * context_size, dim[1]))), 'mlp1': tf.get_variable( 'mlp1/W', [dim[1], dim[1]], tf.float32, tf.random_uniform_initializer(-w_b(dim[1], dim[1]), w_b(dim[1], dim[1]))), 'mlp2': tf.get_variable( 'mlp2/W', [dim[1], q_levels], tf.float32, tf.random_uniform_initializer(-w_b(dim[1], q_levels), w_b(dim[1], q_levels))) } self.b = { 'samp': tf.get_variable('samp/b', [dim[0]], tf.float32, tf.constant_initializer(0.0)), 'mlp0': tf.get_variable('mlp0/b', [dim[1]], tf.float32, tf.constant_initializer(0.0)), 'mlp1': tf.get_variable('mlp1/b', [dim[1]], tf.float32, tf.constant_initializer(0.0)), 'mlp2': tf.get_variable('mlp2/b', [q_levels], tf.float32, tf.constant_initializer(0.0)) } def stacked_mlps(inputs, n_mlps): assert n_mlps <= 3 mlp0_out = tf.nn.relu( tf.matmul(inputs, self.W['mlp0']) + self.b['mlp0']) if n_mlps == 1: return mlp0_out mlp1_out = tf.nn.relu( tf.matmul(mlp0_out, self.W['mlp1']) + self.b['mlp1']) if n_mlps == 2: return mlp1_out return tf.nn.relu( tf.matmul(mlp1_out, self.W['mlp2']) + self.b['mlp2']) cell = tf.contrib.rnn.BasicLSTMCell(dim[0]) stacked_lstm = tf.contrib.rnn.MultiRNNCell([cell] * n_rnn) self.initial_state = self.state = stacked_lstm.zero_state( batch_size, tf.float32) self.loss = [] self.generation_phase = tf.placeholder(tf.bool) inputs = tf.cond(self.generation_phase, lambda: inputs[:, :context_size, :], lambda: inputs) print 'Building computation graph..\n' with tf.variable_scope('sample_rnn') as scope: for i in range(bptt_steps): if generator == True: scope.reuse_variables() elif i > 0: scope.reuse_variables() lstm_inp = inputs[:, i * context_size:(i + 1) * context_size, :] lstm_inp = tf.reshape(lstm_inp, [batch_size, context_size]) lstm_out, self.state = stacked_lstm(lstm_inp, self.state) emb = tf.matmul(lstm_out, self.W['samp']) + self.b['samp'] print '\033[FGraph built: {:.2f} %'.format(i * 100. / bptt_steps) for j in range(context_size): global_context = emb[:, j * context_size:(j + 1) * context_size] pred_index = (i + 1) * context_size + j local_context = inputs[:, pred_index - context_size:pred_index, :] local_context = tf.reshape(local_context, [batch_size, context_size]) context = tf.concat([global_context, local_context], 1) mlps_out = stacked_mlps(context, n_mlp) out = tf.nn.softmax(mlps_out) # loss label = labels[:, pred_index - context_size, :] loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(onehot_labels=label, logits=out)) self.loss.append(loss) # sample if generator == True: sample = tf.multinomial(tf.nn.softmax(out), 1) last_pred = (tf.cast(sample, tf.float32) - self.mean) / self.mean last_pred = tf.reshape(last_pred, [batch_size, 1, 1]) inputs = tf.cond( self.generation_phase, lambda: tf.concat([inputs, last_pred], axis=1), lambda: inputs) print 'computation graph built..' self.final_state = self.state self.loss = tf.reduce_mean(self.loss) self.outputs = (inputs * self.mean + self.mean)
def train(self): # train/val dataset # Changed this because I keep less features than captions, see prepro # n_examples = self.data['captions'].shape[0] n_examples = self.data['features'].shape[0] n_iters_per_epoch = int(np.ceil(float(n_examples) / self.batch_size)) features = self.data['features'] captions = self.data['captions'] image_idxs = self.data['image_idxs'] val_features = self.val_data['features'] n_iters_val = int(np.ceil(float(val_features.shape[0]) / self.batch_size)) # build graphs for training model and sampling captions # This scope fixed things!! with tf.variable_scope(tf.get_variable_scope()): loss = self.model.build_model() tf.get_variable_scope().reuse_variables() _, _, generated_captions = self.model.build_sampler(max_len=20) # train op with tf.variable_scope(tf.get_variable_scope(), reuse=False): optimizer = self.optimizer(learning_rate=self.learning_rate) grads = tf.gradients(loss, tf.trainable_variables()) grads_and_vars = list(zip(grads, tf.trainable_variables())) train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars) # summary op # tf.scalar_summary('batch_loss', loss) tf.summary.scalar('batch_loss', loss) for var in tf.trainable_variables(): # tf.histogram_summary(var.op.name, var) tf.summary.histogram(var.op.name, var) for grad, var in grads_and_vars: # tf.histogram_summary(var.op.name+'/gradient', grad) tf.summary.histogram(var.op.name + '/gradient', grad) # summary_op = tf.merge_all_summaries() summary_op = tf.summary.merge_all() print("The number of epoch: %d" % self.n_epochs) print("Data size: %d" % n_examples) print("Batch size: %d" % self.batch_size) print("Iterations per epoch: %d" % n_iters_per_epoch) config = tf.ConfigProto(allow_soft_placement=True) # config.gpu_options.per_process_gpu_memory_fraction=0.9 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: tf.global_variables_initializer().run() # summary_writer = tf.train.SummaryWriter(self.log_path, graph=tf.get_default_graph()) summary_writer = tf.summary.FileWriter(self.log_path, graph=tf.get_default_graph()) saver = tf.train.Saver(max_to_keep=40) if self.pretrained_model is not None: print("Start training with pretrained Model..") saver.restore(sess, self.pretrained_model) prev_loss = -1 curr_loss = 0 start_t = time.time() for e in range(self.n_epochs): rand_idxs = np.random.permutation(n_examples) captions = captions[rand_idxs] image_idxs = image_idxs[rand_idxs] for i in range(n_iters_per_epoch): captions_batch = captions[i * self.batch_size:(i + 1) * self.batch_size] image_idxs_batch = image_idxs[i * self.batch_size:(i + 1) * self.batch_size] features_batch = features[image_idxs_batch] feed_dict = {self.model.features: features_batch, self.model.captions: captions_batch} _, l = sess.run([train_op, loss], feed_dict) curr_loss += l # write summary for tensorboard visualization if i % 10 == 0: summary = sess.run(summary_op, feed_dict) summary_writer.add_summary(summary, e * n_iters_per_epoch + i) if (i + 1) % self.print_every == 0: print("\nTrain loss at epoch %d & iteration %d (mini-batch): %.5f" % (e + 1, i + 1, l)) ground_truths = captions[image_idxs == image_idxs_batch[0]] decoded = decode_captions(ground_truths, self.model.idx_to_word) for j, gt in enumerate(decoded): print("Ground truth %d: %s" % (j + 1, gt)) gen_caps = sess.run(generated_captions, feed_dict) decoded = decode_captions(gen_caps, self.model.idx_to_word) print("Generated caption: %s\n" % decoded[0]) print("Previous epoch loss: ", prev_loss) print("Current epoch loss: ", curr_loss) print("Elapsed time: ", time.time() - start_t) prev_loss = curr_loss curr_loss = 0 # print out BLEU scores and file write if self.print_bleu: all_gen_cap = np.ndarray((val_features.shape[0], 20)) for i in range(n_iters_val): features_batch = val_features[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {self.model.features: features_batch} gen_cap = sess.run(generated_captions, feed_dict=feed_dict) all_gen_cap[i * self.batch_size:(i + 1) * self.batch_size] = gen_cap all_decoded = decode_captions(all_gen_cap, self.model.idx_to_word) save_pickle(all_decoded, "./data/val/val.candidate.captions.pkl") scores = evaluate(data_path='./data', split='val', get_scores=True) write_bleu(scores=scores, path=self.model_path, epoch=e) # save model's parameters if (e + 1) % self.save_every == 0: saver.save(sess, os.path.join(self.model_path, 'model'), global_step=e + 1) print("model-%s saved." % (e + 1))