def generate(self): inputs = tf.split( 1, self.args.seq_length, tf.nn.embedding_lookup(self.embedding, self.input_data)) inputs = map(lambda i: tf.nn.l2_normalize(i, 1), [tf.squeeze(input_, [1]) for input_ in inputs]) def loop(prev, i): return prev with tf.variable_scope('GEN', reuse=self.has_init_seq2seq) as scope: self.has_init_seq2seq = True if self.args.num_layers == 1: outputs, last_state = seq2seq.rnn_decoder( inputs, [self.initial_state1], self.cell, loop_function=loop, scope=scope) elif self.args.num_layers == 2: outputs, last_state = seq2seq.rnn_decoder( inputs, [self.initial_state1, self.initial_state2], self.cell, loop_function=loop, scope=scope) else: raise Exception( 'Unsupported number of layers. Use 1 or 2 layers for now..' ) outputs = map(lambda o: tf.nn.l2_normalize(o, 1), outputs) self.outputs = outputs return outputs
def rnn_decode(self, cell, enc_memory): dec_inp = (tf.unstack( tf.zeros([self.seq_len, self.batch_size, self.feat_dim], dtype=tf.float32, name="GO"))) with tf.variable_scope("stack_rnn_decoder"): dec_cell = copy.deepcopy(cell) dec_output, dec_state = seq2seq.rnn_decoder( dec_inp, enc_memory, dec_cell) for i in range(2, self.stack_num): with tf.variable_scope("stack_rnn_decoder_" + str(i)): dec_cell = copy.deepcopy(cell) dec_output, dec_state = core_rnn.static_rnn( dec_cell, dec_output, dtype=dtypes.float32) dec_reshape = tf.transpose( tf.reshape(dec_output, (self.seq_len * self.batch_size, self.p_memory_dim + self.s_memory_dim))) W_p = tf.get_variable( "output_proj_w", [self.feat_dim, self.p_memory_dim + self.s_memory_dim]) b_p = tf.get_variable("output_proj_b", shape=(self.feat_dim), initializer=tf.constant_initializer(0.0)) b_p = [b_p for i in range(self.seq_len * self.batch_size)] b_p = tf.transpose(b_p) dec_proj_outputs = tf.matmul(W_p, dec_reshape) + b_p return dec_proj_outputs
def basic_rnn_seq2seq_with_bottle_memory(encoder_inputs, decoder_inputs, cell, dtype=dtypes.float32, scope=None): """Basic RNN sequence-to-sequence model. Args: encoder_inputs: A list of 2D Tensors [batch_size x input_size] decoder_inputs: A list of 2D Tensors [batch_size x input_size] cell: core_rnn_cell.RNNCell defining the cell function and size. dtype: The dtype of the initial state of the RNN cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "rnn_seq2seq_BN" Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. enc_state: The state of each encoder cell in the final time-step. This is a 2D Tensor of shape [batch_size x cell.state_size] dec_state: The state of each decoder cell in the final time-step. This is a 2D Tensor of shape [batch_size x cell.state_size] """ with variable_scope.variable_scope(scope or "basic_rnn_seq2seq"): _, enc_state = core_rnn.static_rnn(cell, encoder_inputs, dtype=dtype) outputs, dec_state = seq2seq.rnn_decoder(decoder_inputs, enc_state, cell) return outputs, enc_state, dec_state
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = core_rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = core_rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = core_rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size, state_is_tuple=True) self.cell = cell = core_rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name="input_data") self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name="targets") self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) print "seq_length = ", args.seq_length, "embedding_lookup = ", tf.nn.embedding_lookup(embedding, self.input_data) #inputs = tf.split(1, args.seq_length, tf.nn.embedding_lookup(embedding, self.input_data)) inputs = tf.split( tf.nn.embedding_lookup(embedding, self.input_data) , args.seq_length,1) print "inputs 1:",inputs inputs = [tf.squeeze(input_, [1]) for input_ in inputs] print "inputs 2:",inputs def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) # yonghua # inputs, initial_state, cell, scope outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') #sys.stdout.write("outputs : %s\tlast_state : %s" % (outputs, last_state)) #output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) output = tf.reshape(tf.concat(outputs,1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits, name="prob_results") loss = seq2seq.sequence_loss_by_example([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False,name="LR_") tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def discriminate_wv(self, input_data_wv): with tf.variable_scope('DISC', reuse=self.has_init_seq2seq) as scope: self.has_init_seq2seq = True output_wv, states_wv = seq2seq.rnn_decoder(input_data_wv, self.initial_state, self.cell, scope=scope) predicted_classes_wv = tf.matmul(output_wv[-1], self.fc_layer) return predicted_classes_wv
def stack_rnn_seq2seq_with_bottle_memory(encoder_inputs, decoder_inputs, cell, stack_num=3, dtype=dtypes.float32, scope=None): """Stacking RNN seq2seq model with bottleneck. Args: encoder_inputs: A list of 2D Tensors [batch_size x input_size] decoder_inputs: A list of 2D Tensors [batch_size x input_size] cell: core_rnn_cell.RNNCell defining the cell function and size. stack_num: the number to stack in seq2seq model dtype: The dtype of the initial state of the RNN cell (default: tf.float32) Returns: outputs: A list of the same length as decoer_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. enc_state: The state of each encoder cell in the final time_step. This is a 2D Tensor of shape [batch_size x cell.state_size] dec_state: The state of each decoder cell in the final time-step. This is a 2D Tensor of shape [batch_size x cell.state_size] """ with variable_scope.variable_scope(scope or "stack_rnn_enc_1"): enc_cell = copy.copy(cell) enc_output, enc_state = core_rnn.static_rnn(enc_cell, encoder_inputs, dtype=dtype) for i in range(2, stack_num): with variable_scope.variable_scope(scope or "stack_rnn_encoder_" + str(i)): enc_cell = copy.copy(cell) enc_output, enc_state = core_rnn.static_rnn(enc_cell, enc_output, dtype=dtype) with variable_scope.variable_scope(scope or "stack_rnn_dec_1"): dec_cell = copy.copy(cell) dec_output, dec_state = seq2seq.rnn_decoder(decoder_inputs, enc_state, dec_cell) for i in range(2, stack_num): with variable_scope.variable_scope(scope or "stack_rnn_decoder_" + str(i)): dec_cell = copy.copy(cell) dec_output, dec_state = core_rnn.static_rnn(dec_cell, dec_output, dtype=dtype) return dec_output, enc_state, dec_state
def testRNNDecoder(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): inp = [constant_op.constant(0.5, shape=[2, 2])] * 2 _, enc_state = rnn.static_rnn( rnn_cell.GRUCell(2), inp, dtype=dtypes.float32) dec_inp = [constant_op.constant(0.4, shape=[2, 2])] * 3 cell = core_rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(2), 4) dec, mem = seq2seq_lib.rnn_decoder(dec_inp, enc_state, cell) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 4), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].shape)
def __call__(self, img_ph, location_network, retina_sensor, glimpse_network): # lstm cell cell = BasicLSTMCell(self.hidden_size) # helper func for feeding glimpses to every step of lstm # h_t_prev: a 2D tensor of shape (B, hidden_size). The hidden state vector for the previous timestep `t-1`. loc_ts, mean_ts = [], [] ## at time step t, location-->pths-->glimpse def loop_function(h_prev, _): # predict location from previous hidden state loc_t, mean_t = location_network(h_prev) loc_ts.append(loc_t) mean_ts.append(mean_t) # crop pths from image based on the predicted location pths_t = retina_sensor(img_ph, loc_t) # generate glimpse image from current pths_t and loc_t glimpse = glimpse_network(pths_t, loc_t) return glimpse # lstm init h_t init_state = cell.zero_state(self.batch_size, tf.float32) # lstm inputs at every step init_loc = tf.random_uniform((self.batch_size, self.loc_dim), minval=-1, maxval=1) init_pths = retina_sensor(img_ph, init_loc) init_glimpse = glimpse_network(init_pths, init_loc) rnn_inputs = [init_glimpse] rnn_inputs.extend([0] * self.num_glimpses) # get hidden state of every step from lstm h_ts, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=loop_function) return loc_ts, mean_ts, h_ts
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 additional_cell_args = {} if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell elif args.model == 'gridlstm': cell_fn = grid_rnn.Grid2LSTMCell additional_cell_args.update({ 'use_peepholes': True, 'forget_bias': 1.0 }) elif args.model == 'gridgru': cell_fn = grid_rnn.Grid2GRUCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size, **additional_cell_args) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(axis=1, num_or_size_splits=args.seq_length, value=tf.nn.embedding_lookup( embedding, self.input_data)) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.nn.xw_plus_b(prev, softmax_w, softmax_b) prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = seq2seq.rnn_decoder( inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') # output = tf.reshape(tf.concat(1, outputs), [-1, args.rnn_size]) output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, args.rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def r2rtdecoder(self): """ Create a mask that we will use for the cost function This mask is the same shape as x and y_, and is equal to 1 for all non-PAD time steps (where a prediction is made), and 0 for all PAD time steps (no pred -> no loss) The number 30, used when creating the lower_triangle_ones matrix, is the maximum sequence length in our dataset """ lower_triangular_ones = tf.constant(np.tril( np.ones([self._max_length, self._max_length])), dtype=tf.float32) seqlen_mask = tf.slice( tf.gather(lower_triangular_ones, self.seqlen - 1), [0, 0], [self._batch_size2, self._max_length]) # RNN state_size = self._emb_dim num_classes = self._class_num cell = tf.contrib.rnn.BasicRNNCell(state_size) init_state = tf.get_variable('init_state', [1, state_size], initializer=tf.constant_initializer(0.0)) init_state = tf.tile(init_state, [self._batch_size2, 1]) rnn_outputs, final_state = tf.nn.dynamic_rnn( cell, self.x_embedding, sequence_length=self.seqlen, initial_state=init_state) y_reshaped = tf.reshape(self.y, [-1]) """ decoder use the last step output of encoder as the input """ # en_last_output = self.last_relevant(rnn_outputs, self.seqlen) idx = tf.range(self._batch_size2) * \ tf.shape(rnn_outputs)[1] + (self.seqlen - 1) last_rnn_output = tf.gather(tf.reshape(rnn_outputs, [-1, state_size]), idx) with tf.variable_scope('decoder'): decoder_cell = tf.contrib.rnn.BasicRNNCell(self._emb_dim) dec_input = last_rnn_output dec_in_state = final_state dec_outputs = [] with tf.variable_scope('multi_decoder') as scope: for id in range(self._max_length): if id > 0: scope.reuse_variables() dec_output, dec_out_state = seq2seq_lib.rnn_decoder( [dec_input], dec_in_state, decoder_cell) # variable_scope.get_variable_scope().reuse_variables() dec_input = dec_output[0] dec_in_state = dec_out_state dec_outputs += dec_output # dec_outputs: [batch_size, max_length, state_size] # [batch_size*maxlenth, state_size] dec_final_output = tf.concat(dec_outputs, axis=0) # Softmax layer # with tf.variable_scope('softmax'): # W = tf.get_variable('W', [state_size, num_classes]) # b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0)) # weight = tf.Variable([self._emb_dim, self._class_num]) W = tf.Variable( tf.truncated_normal([self._emb_dim, self._class_num], stddev=0.01)) b = tf.Variable(tf.constant(0.1, shape=[ self._class_num, ])) logits = tf.matmul(dec_final_output, W) + b # order not the same as y with tf.concat l1 = tf.reshape(logits, [self._max_length, -1, self._class_num]) l2 = tf.transpose(l1, [1, 0, 2]) logits = tf.reshape(l2, [-1, self._class_num]) preds = tf.nn.softmax(logits) final_output = tf.argmax(preds, 1) """ Accuracy """ # To calculate the number of correctly predicted value(we want to count # padded steps as incorrect) correct = tf.cast(tf.equal(tf.cast(final_output, tf.int32), y_reshaped), tf.int32) * \ tf.cast(tf.reshape(seqlen_mask, [-1]), tf.int32) truevalue = y_reshaped # To calculate accuracy we want to divide by the number of non-padded time-steps, # rather than taking the mean accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / tf.reduce_sum( tf.cast(self.seqlen, tf.float32)) """ Loss function """ loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y_reshaped, logits=logits) loss = loss * tf.reshape(seqlen_mask, [-1]) # To calculate average loss, we need to divide by number of non-padded time-steps, # rather than taking the mean loss = tf.reduce_sum(loss) / tf.reduce_sum(seqlen_mask) optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(loss) saver = tf.train.Saver() """ Training """ with tf.Session() as sess: sess.run(tf.global_variables_initializer()) e_loss = [] e_acc = [] learning_rate = 2 * 1e-3 for epoch in range(self._epoch_num): total_loss = [] total_acc = [] for batch in range(self._batch_num): batch_X, batch_y, batch_len = self.getNextBatch( self._batch_size, batch) batch_size_2 = batch_X.shape[0] feed = { self.x_embedding: batch_X, self.y: batch_y, self.seqlen: batch_len, self._batch_size2: batch_size_2, self.learning_rate: learning_rate } cor, dec_out, y_re, log, acc, cost, _ = sess.run( [ correct, dec_outputs, y_reshaped, logits, accuracy, loss, optimizer ], feed_dict=feed) total_loss.append(cost) total_acc.append(acc) total_loss = np.sum(np.array(total_loss)) total_acc = np.mean(np.array(total_acc)) e_loss.append(total_loss) e_acc.append(total_acc) print("Epoch" + str(epoch) + ":") print("Loss: " + str(total_loss) + " " + "Accuracy: " + str(total_acc)) if total_loss < 30: learning_rate = 1e-3 if total_loss < 15: learning_rate = 1e-4 # print("Learning rate changed.") if epoch == self._epoch_num - 1 or total_loss < 0.5: # or total_acc>0.985: hidden_code = [] rnn_code = [] total_acc = [] for test_batch in range(self._batch_num_test): if test_batch == self._batch_num_test - 1: a = 1 batch_testX, batch_y, batch_testlen = self.getNextTestBatch( self._batch_size, test_batch) batch_testsize_2 = batch_testX.shape[0] feed = { self.x_embedding: batch_testX, self.y: batch_y, self.seqlen: batch_testlen, self._batch_size2: batch_testsize_2, self.learning_rate: learning_rate } last_rnno, rnno, t, f, code, acc = sess.run( [ last_rnn_output, rnn_outputs, truevalue, final_output, final_state, accuracy ], feed_dict=feed) code = code.reshape([-1, self._emb_dim]) hidden_code.extend(code) total_acc.append(acc) # print("Batch: "+str(test_batch)) print("True" + str(t[0:self._max_length])) print("Pred" + str(f[0:self._max_length])) total_acc = np.mean(np.array(total_acc)) print("Accuracy:" + str(total_acc)) codes = np.array(hidden_code).reshape(-1, self._emb_dim) df = pd.DataFrame(codes[0:len(self.testdata), :]) file_hidden = "toydata/covmat_hiddencode_split" + \ str(self._emb_dim) + ".csv" df.to_csv(file_hidden, float_format='%.5f') break # Save the variables to disk. # save_path = saver.save(sess, "savemodel/twornn3.ckpt") # print("Model saved in file: " + save_path) self.plot(np.array(e_loss), np.array(e_acc)) return
def __init__(self, img_channel, img_size, pth_size, g_size, l_size, glimpse_output_size, loc_dim, variance, cell_size, num_glimpses, num_classes, learning_rate, learning_rate_decay_factor, min_learning_rate, training_steps_per_epoch, max_gradient_norm, fc1_size, base_channels, output_dim, is_training=False): self.img_ph = tf.placeholder(tf.float32, [None, img_size * img_size * img_channel]) self.lbl_ph = tf.placeholder(tf.float32, [None, output_dim]) self.global_step = tf.Variable(0, trainable=False) self.learning_rate = tf.maximum( tf.train.exponential_decay(learning_rate, self.global_step, training_steps_per_epoch, learning_rate_decay_factor, staircase=True), min_learning_rate) cell = BasicLSTMCell(cell_size) with tf.variable_scope('GlimpseNetwork'): glimpse_network = GlimpseNetwork(img_channel, img_size, pth_size, loc_dim, g_size, l_size, glimpse_output_size) with tf.variable_scope('Agent'): # the agent is resposibale for select a windows and est a gain with tf.variable_scope('LocationNetwork'): location_network = LocationNetwork( loc_dim=loc_dim, rnn_output_size=cell.output_size, variance=variance, is_sampling=is_training) with tf.variable_scope('WhiteBalanceNetwork'): wb_network = WhiteBalanceNetwork( rnn_output_size=cell.output_size, output_dim=output_dim) if FLAGS.USE_CRITIC: with tf.variable_scope('Critic'): critic_network = CriticNetwork(fc1_size, base_channels) # Core Network batch_size = tf.shape(self.img_ph)[0] init_loc = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) init_state = cell.zero_state(batch_size, tf.float32) init_glimpse = glimpse_network(self.img_ph, init_loc) rnn_inputs = [init_glimpse] rnn_inputs.extend([0] * num_glimpses) locs, loc_means = [], [] gains = [] img_retouched = [] def _apply_gain(ill, loc, img, patch_wise=False): if patch_wise: retina = RetinaSensor(img_channel, img_size, pth_size) pth = retina(img, loc, serial=False) img = tf.reshape( img, [tf.shape(img)[0], img_size, img_size, img_channel]) retouched_channel = [] for i in range(3): tmp = pth[:, :, :, i] tmp = tf.reshape(tmp, [tf.shape(tmp)[0], -1]) tmp_ill = tf.reshape(ill[:, i] / ill[:, 1], [tf.shape(img)[0], 1]) tmp_ill = tf.tile(tmp_ill, [1, pth_size * pth_size]) tmp *= tmp_ill retouched_channel.append(tmp) retouched = tf.concat(retouched_channel, -1) img[:, round(img_size * loc[0]) - pth_size:round(img_size * loc[0]) + pth_size, round(img_size * loc[1]) - pth_size:round(img_size * loc[1]) + pth_size, :] = retouched else: img = tf.reshape( img, [tf.shape(img)[0], img_size, img_size, img_channel]) retouched_channel = [] for i in range(3): tmp = img[:, :, :, i] tmp = tf.reshape(tmp, [tf.shape(tmp)[0], -1]) tmp_ill = tf.reshape(ill[:, i] / ill[:, 1], [tf.shape(img)[0], 1]) tmp_ill = tf.tile(tmp_ill, [1, img_size * img_size]) tmp *= tmp_ill retouched_channel.append(tmp) img = tf.concat(retouched_channel, -1) return img def _loop_function(prev, _): loc, loc_mean = location_network(prev) locs.append(loc) loc_means.append(loc_mean) gain = wb_network(prev) gains.append(gain) if img_retouched: img_retouched.append(_apply_gain(gain, loc, img_retouched[-1])) glimpse = glimpse_network(img_retouched[-1], loc) else: img_retouched.append(_apply_gain(gain, loc, self.img_ph)) glimpse = glimpse_network(self.img_ph, loc) return glimpse rnn_outputs, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=_loop_function) assert len(gains) == len(locs) # Time independent baselines with tf.variable_scope('Baseline'): baseline_w = weight_variable((cell.output_size, 1)) baseline_b = bias_variable((1, )) baselines = [] for output in rnn_outputs[1:]: baseline = tf.nn.xw_plus_b(output, baseline_w, baseline_b) baseline = tf.squeeze(baseline) baselines.append(baseline) baselines = tf.stack(baselines) # [timesteps, batch_sz] baselines = tf.transpose(baselines) # [batch_sz, timesteps] # Classification. Take the last step only. rnn_last_output = rnn_outputs[-1] with tf.variable_scope('Classification'): logit_w = weight_variable((cell.output_size, num_classes)) logit_b = bias_variable((num_classes, )) logits = tf.nn.xw_plus_b(rnn_last_output, logit_w, logit_b) # batch_size *3 self.prediction = tf.nn.l2_normalize(logits, axis=1) self.locations = locs if is_training: # angular loss self.xent = get_angular_loss(self.prediction, self.lbl_ph) tf.summary.scalar('xent', self.xent) # RL reward # reward shape [batchsize, 1] if FLAGS.USE_CRITIC: img_critic = tf.reshape(self.img_ph, [ tf.shape(self.img_ph)[0], img_size, img_size, img_channel ]) img_real = apply_gain(img_critic, self.lbl_ph) img_real = tf.reshape( img_real, [tf.shape(img_real)[0], img_size, img_size, img_channel]) img_fake = apply_gain(img_critic, self.prediction) img_fake = tf.reshape( img_fake, [tf.shape(img_fake)[0], img_size, img_size, img_channel]) real_logit = critic_network(img_real, is_train=is_training, reuse=False) fake_logit = critic_network(img_fake, is_train=is_training, reuse=True) rnn_fake_logits = [] for index_sequence in range(len(img_retouched)): rnn_img_fake = tf.reshape(img_retouched[index_sequence], [ tf.shape(img_retouched[index_sequence])[0], img_size, img_size, img_channel ]) rnn_fake_logit = critic_network(rnn_img_fake, is_train=is_training, reuse=True) rnn_fake_logits.append(rnn_fake_logit) rewards = tf.stop_gradient( tf.convert_to_tensor( rnn_fake_logits)) # shape (timesteps, batch_sz, 1) rewards = tf.transpose(tf.squeeze( rewards, 2)) # shape [batch_sz, timesteps] self.c_loss = tf.reduce_mean(fake_logit - real_logit) if FLAGS.grad_penalty < 0: # use grad clip gradients = tf.gradients(self.c_loss, theta_c) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.opt_c = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) else: # Critic gradient norm and penalty alpha_dist = tf.contrib.distributions.Uniform(low=0., high=1.) alpha = alpha_dist.sample((batch_size, 1, 1, 1)) interpolated = img_real + alpha * (img_fake - img_real) inte_logit = critic_network(images=interpolated, is_train=is_training, reuse=True) gradients = tf.gradients(inte_logit, [ interpolated, ])[0] gradient_norm = tf.sqrt( 1e-6 + tf.reduce_sum(gradients**2, axis=[1, 2, 3])) gradient_penalty = FLAGS.grad_penalty * tf.reduce_mean( tf.maximum(gradient_norm - 1.0, 0.0)**2) self.c_loss += gradient_penalty theta_c = tf.trainable_variables(scope='critic') gradients = tf.gradients(self.c_loss, theta_c) self.opt_c = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(gradients, theta_c), global_step=self.global_step) else: reward = tf.norm(self.prediction - self.lbl_ph, axis=1) rewards = tf.expand_dims(reward, 1) rewards = tf.tile(rewards, (1, num_glimpses)) # [batch_sz, timesteps] advantages = rewards - tf.stop_gradient(baselines) self.advantage = tf.reduce_mean(advantages) logll = log_likelihood(loc_means, locs, variance) logllratio = tf.reduce_mean(logll * advantages) self.reward = tf.reduce_mean(rewards) tf.summary.scalar('reward', self.reward) # baseline loss self.baselines_mse = tf.reduce_mean( tf.square((rewards - baselines))) # hybrid loss self.loss = -logllratio + self.xent + self.baselines_mse tf.summary.scalar('loss', self.loss) # exclude the variables in critic scope params_all = tf.trainable_variables() params = [] for var in params_all: if not 'critic' in var.op.name: params.append(var) gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.train_op = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) img = tf.reshape( self.img_ph, [tf.shape(self.img_ph)[0], img_size, img_size, img_channel]) tf.summary.image('input', img) tf.summary.image('gt', apply_gain(img, self.lbl_ph)) tf.summary.image('est', apply_gain(img, self.prediction)) self.sum_total = tf.summary.merge_all() self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
def r2rtdecoder(self): """ Create a mask that we will use for the cost function This mask is the same shape as x and y_, and is equal to 1 for all non-PAD time steps (where a prediction is made), and 0 for all PAD time steps (no pred -> no loss) The number 30, used when creating the lower_triangle_ones matrix, is the maximum sequence length in our dataset """ lower_triangular_ones = tf.constant(np.tril( np.ones([self._max_length, self._max_length])), dtype=tf.float32) seqlen_mask = tf.slice(tf.gather(lower_triangular_ones, self.seqlen - 1), \ [0, 0], [self._batch_size, self._max_length]) # RNN state_size = self._emb_dim num_classes = self._class_num cell = tf.contrib.rnn.BasicRNNCell(state_size) init_state = tf.get_variable('init_state', [1, state_size], initializer=tf.constant_initializer(0.0)) init_state = tf.tile(init_state, [self._batch_size, 1]) rnn_outputs, final_state = tf.nn.dynamic_rnn( cell, self.x_one_hot, sequence_length=self.seqlen, initial_state=init_state) y_reshaped = tf.reshape(self.y, [-1]) """ decoder """ #en_last_output = self.last_relevant(rnn_outputs, self.seqlen) idx = tf.range(self._batch_size) * tf.shape(rnn_outputs)[1] + ( self.seqlen - 1) last_rnn_output = tf.gather(tf.reshape(rnn_outputs, [-1, state_size]), idx) with tf.variable_scope('decoder'): decoder_cell = tf.contrib.rnn.BasicRNNCell(self._emb_dim) dec_input = last_rnn_output dec_in_state = final_state dec_outputs = [] with tf.variable_scope('multi_decoder') as scope: for id in range(self._max_length): if id > 0: scope.reuse_variables() dec_output, dec_out_state = seq2seq_lib.rnn_decoder( [dec_input], dec_in_state, decoder_cell) # variable_scope.get_variable_scope().reuse_variables() dec_input = dec_output[0] dec_in_state = dec_out_state dec_outputs += dec_output dec_final_output = tf.concat(dec_outputs, axis=0) # Softmax layer with tf.variable_scope('softmax'): #W = tf.get_variable('W', [state_size, num_classes]) #b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0)) W = tf.Variable( tf.truncated_normal([self._emb_dim, self._class_num], stddev=0.01)) # weight = tf.Variable([self._emb_dim, self._class_num]) b = tf.Variable(tf.constant(0.1, shape=[ self._class_num, ])) logits = tf.matmul(dec_final_output, W) + b #order not the same as y l1 = tf.reshape(logits, [self._max_length, -1, self._class_num]) l2 = tf.transpose(l1, [1, 0, 2]) logits = tf.reshape(l2, [-1, self._class_num]) preds = tf.nn.softmax(logits) # To calculate the number correct, we want to count padded steps as incorrect correct = tf.cast(tf.equal(tf.cast(tf.argmax(preds, 1), tf.int32), y_reshaped), tf.int32) * \ tf.cast(tf.reshape(seqlen_mask, [-1]), tf.int32) final_output = tf.argmax(preds, 1) truevalue = y_reshaped # To calculate accuracy we want to divide by the number of non-padded time-steps, # rather than taking the mean accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / tf.reduce_sum( tf.cast(self.seqlen, tf.float32)) loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y_reshaped, logits=logits) loss = loss * tf.reshape(seqlen_mask, [-1]) # To calculate average loss, we need to divide by number of non-padded time-steps, # rather than taking the mean loss = tf.reduce_sum(loss) / tf.reduce_sum(seqlen_mask) optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(loss) saver = tf.train.Saver() with tf.Session() as sess: #sess.run(tf.global_variables_initializer()) saver.restore(sess, "savemodel/twornn.ckpt") print("Model restored.") learning_rate = 5 * 1e-3 hidden_code = [] rnn_code = [] total_acc = [] for test_batch in range(self._batch_num_test + 1): batch_testX, batch_y, batch_testlen = self.getNextTestBatch( self._batch_size, test_batch) feed = { self.x: batch_testX, self.y: batch_y, self.seqlen: batch_testlen, self.learning_rate: learning_rate } t, f, code, lro, acc = sess.run([ truevalue, final_output, final_state, last_rnn_output, accuracy ], feed_dict=feed) code = code.reshape([-1, self._emb_dim]) hidden_code.append(code) lro = lro.reshape([-1, self._emb_dim]) rnn_code.append(lro) total_acc.append(acc) #print("Batch: "+str(test_batch)) print("True" + str(t[0:self._max_length])) print("Pred" + str(f[0:self._max_length])) total_acc = np.mean(np.array(total_acc)) print("Accuracy:" + str(total_acc)) codes = np.array(hidden_code).reshape(-1, self._emb_dim) df = pd.DataFrame(codes[0:len(self.testdata), :]) #file_hidden="twornn_hidden"+train_filename[4:len(train_filename)-4]+"_"+str(self._emb_dim)+".csv" file_hidden = "code2.csv" df.to_csv(file_hidden, float_format='%.5f') #df = pd.DataFrame(np.array(rnn_code).reshape(-1, self._emb_dim)) #df.to_csv("twornn_output_airline12.csv", float_format='%.5f') return
def __init__(self, img_width, img_height, nb_locations, glimpse_width, glimpse_height, g_size, l_size, glimpse_output_size, loc_dim, time_dim, variance, cell_size, nb_glimpses, nb_classes, learning_rate, learning_rate_decay_factor, min_learning_rate, nb_training_batch, max_gradient_norm, is_training=False): self.img_ph = tf.placeholder(tf.float32, [None, img_height, img_width]) self.lbl_ph = tf.placeholder(tf.int64, [None]) self.global_step = tf.Variable(0, trainable=False) # decayed_learning_rate = learning_rate * decay_rate ^ (global_step / training_batch_num) self.learning_rate = tf.maximum(tf.train.exponential_decay( learning_rate, self.global_step, nb_training_batch, # batch number learning_rate_decay_factor, # If the argument staircase is True, # then global_step / decay_steps is an integer division # and the decayed learning rate follows a staircase function. staircase=True), min_learning_rate) cell = BasicLSTMCell(cell_size) with tf.variable_scope('GlimpseNetwork'): glimpse_network = GlimpseNetwork(img_width, img_height, glimpse_width, glimpse_height, loc_dim+time_dim, g_size, l_size, glimpse_output_size, nb_locations) with tf.variable_scope('LocationNetwork'): location_network = LocationNetwork(loc_dim=loc_dim*nb_locations+time_dim, rnn_output_size=cell.output_size, # cell_size variance=variance, is_sampling=is_training) # with tf.variable_scope('CNN'): # cnn = CNN(nb_locations, glimpse_output_size) # with tf.variable_scope('CDD'): # cdd = CDD(glimpse_height, nb_locations*glimpse_output_size) # Core Network batch_size = tf.shape(self.img_ph)[0] init_loc_1 = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) init_loc_2 = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) init_loc_3 = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) init_t = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) # shape: (batch_size, loc_dim), range: [-1,1) init_state = cell.zero_state(batch_size, tf.float32) self.init_glimpse = glimpse_network(self.img_ph, init_loc_1, init_loc_2, init_loc_3, init_t) # self.init_glimpse_cooperate = cnn(self.init_glimpse) # self.imgs_ph, self.imgs_ph_re, self.h_fc1, self.conv_2d_1st, self.conv_2d_2nd, self.conv_2d_flat = cdd(self.init_glimpse) rnn_inputs = [self.init_glimpse] rnn_inputs.extend([0] * nb_glimpses) locs, loc_means = [], [] def loop_function(prev, _): loc, loc_mean = location_network(prev) locs.append(loc) loc_means.append(loc_mean) glimpse = glimpse_network(self.img_ph, tf.reshape(loc[:,0],[-1,1]), tf.reshape(loc[:, 1], [-1, 1]), tf.reshape(loc[:, 2], [-1, 1]), tf.reshape(loc[:, 3], [-1, 1])) # glimpse_cooperate = cnn(glimpse) return glimpse rnn_outputs, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=loop_function) # Time independent baselines with tf.variable_scope('Baseline'): baseline_w = _weight_variable((cell.output_size, 1)) baseline_b = _bias_variable((1,)) baselines = [] for output in rnn_outputs[1:]: baseline = tf.nn.xw_plus_b(output, baseline_w, baseline_b) baseline = tf.squeeze(baseline) baselines.append(baseline) baselines = tf.stack(baselines) # [timesteps, batch_sz] baselines = tf.transpose(baselines) # [batch_sz, timesteps] # Classification. Take the last step only. rnn_last_output = rnn_outputs[-1] with tf.variable_scope('Classification'): logit_w = _weight_variable((cell.output_size, nb_classes)) logit_b = _bias_variable((nb_classes,)) logits = tf.nn.xw_plus_b(rnn_last_output, logit_w, logit_b) # self.prediction = tf.argmax(logits, 1) self.softmax = tf.nn.softmax(logits) self.pred = tf.argmax(self.softmax, 1) self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.pred, self.lbl_ph), tf.float32)) if is_training: # classification loss self.cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.lbl_ph, logits=logits)) # RL reward reward = tf.cast(tf.equal(self.pred, self.lbl_ph), tf.float32) rewards = tf.expand_dims(reward, 1) # [batch_sz, 1] rewards = tf.tile(rewards, (1, nb_glimpses)) # [batch_sz, timesteps] advantages = rewards - tf.stop_gradient(baselines) self.advantage = tf.reduce_mean(advantages) logll = _log_likelihood(loc_means, locs, variance) logllratio = tf.reduce_mean(logll * advantages) self.reward = tf.reduce_mean(reward) # baseline loss self.baselines_mse = tf.reduce_mean(tf.square((rewards - baselines))) # hybrid loss self.loss = -logllratio + self.cross_entropy + self.baselines_mse params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.train_op = tf.train.AdamOptimizer(self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
def __init__(self, img_size_width, img_size_height, CNN_patch_width, CNN_patch_height, CNN_patch_number, patch_window_width, patch_window_height, g_size, l_size, glimpse_output_size, loc_dim, variance, cell_size, num_glimpses, num_classes, learning_rate, learning_rate_decay_factor, min_learning_rate, training_batch_num, max_gradient_norm, last_lstm_size, n_time_window, is_training=False): self.img_ph = tf.placeholder(tf.float32, [None, img_size_width * img_size_height]) self.lbl_ph = tf.placeholder(tf.int64, [None]) self.global_step = tf.Variable(0, trainable=False) # decayed_learning_rate = learning_rate * decay_rate ^ (global_step / training_batch_num) self.learning_rate = tf.maximum( tf.train.exponential_decay( learning_rate, self.global_step, training_batch_num, # batch number learning_rate_decay_factor, # If the argument staircase is True, # then global_step / decay_steps is an integer division # and the decayed learning rate follows a staircase function. staircase=True), min_learning_rate) cell = BasicLSTMCell(cell_size) with tf.variable_scope('CNN'): cnn_network = CNN(img_size_width, img_size_height, CNN_patch_width, CNN_patch_height, CNN_patch_number) with tf.variable_scope('GlimpseNetwork'): glimpse_network = GlimpseNetwork(img_size_width, img_size_height, patch_window_width, patch_window_height, loc_dim, g_size, l_size, glimpse_output_size) with tf.variable_scope('LocationNetwork'): location_network = LocationNetwork( loc_dim=loc_dim, rnn_output_size=cell.output_size, # cell_size variance=variance, is_sampling=is_training) # Core Network self.img_ph = cnn_network(self.img_ph) batch_size = tf.shape(self.img_ph)[0] # training_batch_size * M init_loc = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) # shape: (batch_size, loc_dim), range: [-1,1) init_state = cell.zero_state(batch_size, tf.float32) init_glimpse = glimpse_network(self.img_ph, init_loc) rnn_inputs = [init_glimpse] rnn_inputs.extend([0] * num_glimpses) self.locs, loc_means = [], [] def loop_function(prev, _): loc, loc_mean = location_network(prev) self.locs.append(loc) loc_means.append(loc_mean) glimpse = glimpse_network(self.img_ph, loc) return glimpse rnn_outputs, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=loop_function) # Time independent baselines with tf.variable_scope('Baseline'): baseline_w = _weight_variable((cell.output_size, 1)) baseline_b = _bias_variable((1, )) baselines = [] for output in rnn_outputs[1:]: baseline = tf.nn.xw_plus_b(output, baseline_w, baseline_b) baseline = tf.squeeze(baseline) baselines.append(baseline) baselines = tf.stack(baselines) # [timesteps, batch_sz] baselines = tf.transpose(baselines) # [batch_sz, timesteps] # Classification. Take the last step only. rnn_last_output = rnn_outputs[-1] with tf.variable_scope('Classification'): logit_w = _weight_variable((cell.output_size, num_classes)) logit_b = _bias_variable((num_classes, )) logits = tf.nn.xw_plus_b(rnn_last_output, logit_w, logit_b) self.prediction = tf.argmax(logits, 1) self.softmax = tf.nn.softmax(logits) with tf.variable_scope('LSTM_Classification'): last_lstm_w_in = _weight_variable( (cell.output_size, last_lstm_size)) last_lstm_b_in = _bias_variable((last_lstm_size, )) last_lstm_in = tf.matmul(rnn_last_output, last_lstm_w_in) + last_lstm_b_in last_lstm_in = tf.reshape(last_lstm_in, [-1, n_time_window, last_lstm_size]) if int((tf.__version__).split('.')[1]) < 12 and int( (tf.__version__).split('.')[0]) < 1: cell = tf.nn.rnn_cell.BasicLSTMCell(last_lstm_size, forget_bias=1.0, state_is_tuple=True) else: cell = tf.contrib.rnn.BasicLSTMCell(last_lstm_size) # lstm cell is divided into two parts (c_state, h_state) init_state_last_lstm = cell.zero_state(batch_size // n_time_window, dtype=tf.float32) lstm_outputs, final_state = tf.nn.dynamic_rnn( cell, last_lstm_in, initial_state=init_state_last_lstm, time_major=False) last_lstm_w_out = _weight_variable((cell.output_size, num_classes)) last_lstm_b_out = _bias_variable((num_classes, )) if int((tf.__version__).split('.')[1]) < 12 and int( (tf.__version__).split('.')[0]) < 1: lstm_outputs = tf.unpack(tf.transpose( lstm_outputs, [1, 0, 2])) # states is the last outputs else: lstm_outputs = tf.unstack(tf.transpose(lstm_outputs, [1, 0, 2])) lstm_logits = tf.matmul(lstm_outputs[-1], last_lstm_w_out) + last_lstm_b_out lstm_logits = tf.reshape(tf.tile(lstm_logits, (1, n_time_window)), [-1, num_classes]) self.lstm_prediction = tf.argmax(lstm_logits, 1) self.lstm_softmax = tf.nn.softmax(lstm_logits) if is_training: # classification loss self.cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.lbl_ph, logits=logits)) self.lstm_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.lbl_ph, logits=lstm_logits)) # RL reward reward = tf.cast(tf.equal(self.prediction, self.lbl_ph), tf.float32) rewards = tf.expand_dims(reward, 1) # [batch_sz, 1] rewards = tf.tile(rewards, (1, num_glimpses)) # [batch_sz, timesteps] advantages = rewards - tf.stop_gradient(baselines) self.advantage = tf.reduce_mean(advantages) logll = _log_likelihood(loc_means, self.locs, variance) logllratio = tf.reduce_mean(logll * advantages) self.reward = tf.reduce_mean(reward) # baseline loss self.baselines_mse = tf.reduce_mean( tf.square((rewards - baselines))) # hybrid loss self.loss = -logllratio + self.cross_entropy + self.baselines_mse + self.lstm_cross_entropy params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.train_op = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
def __init__(self, args, infer=False): # infer is set to true during sampling. self.args = args if infer: # Worry about one character at a time during sampling; no batching or BPTT. args.batch_size = 1 args.seq_length = 1 # Set cell_fn to the type of network cell we're creating -- RNN, GRU or LSTM. if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) # Call tensorflow library tensorflow-master/tensorflow/python/ops/rnn_cell # to create a layer of rnn_size cells of the specified basic type (RNN/GRU/LSTM). if args.model == "gru": cell = cell_fn(args.rnn_size) else: cell = cell_fn(args.rnn_size, state_is_tuple=True) # Use the same rnn_cell library to create a stack of these cells # of num_layers layers. Pass in a python list of these cells. # (The [cell] * arg.num_layers syntax literally duplicates cell multiple times in # a list. The syntax is such that [5, 6] * 3 would return [5, 6, 5, 6, 5, 6].) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers, state_is_tuple=True) # Create two TF placeholder nodes of 32-bit ints (NOT floats!), # each of shape batch_size x seq_length. This shape matches the batches # (listed in x_batches and y_batches) constructed in create_batches in utils.py. # input_data will receive input batches, and targets will be what it compares against # to calculate loss. self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) # Using the zero_state function in the RNNCell master class in rnn_cell library, # create a tensor of zeros such that we can swap it in for the network state at any time # to zero out the network's state. # State dimensions are: cell_fn state size (2 for LSTM) x rnn_size x num_layers. # So an LSTM network with 100 cells per layer and 3 layers would have a state size of 600, # and initial_state would have a dimension of none x 600. self.initial_state = self.cell.zero_state(args.batch_size, tf.float32) # Scope our new variables to the scope identifier string "rnnlm". with tf.variable_scope('rnnlm'): # Create new variable softmax_w and softmax_b for output. # softmax_w is a weights matrix from the top layer of the model (of size rnn_size) # to the vocabulary output (of size vocab_size). softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) # softmax_b is a bias vector of the ouput characters (of size vocab_size). softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) # [TODO: Why specify CPU? Same as the TF translation tutorial, but don't know why.] with tf.device("/cpu:0"): # Create new variable named 'embedding' to connect the character input to the base layer # of the RNN. Its role is the conceptual inverse of softmax_w. # It contains the trainable weights from the one-hot input vector to the lowest layer of RNN. embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) # Create an embedding tensor with tf.nn.embedding_lookup(embedding, self.input_data). # This tensor has dimensions batch_size x seq_length x rnn_size. # tf.split splits that embedding lookup tensor into seq_length tensors (along dimension 1). # Thus inputs is a list of seq_length different tensors, # each of dimension batch_size x 1 x rnn_size. inputs = tf.split(tf.nn.embedding_lookup( embedding, self.input_data), args.seq_length, axis=1) # Iterate through these resulting tensors and eliminate that degenerate second dimension of 1, # i.e. squeeze each from batch_size x 1 x rnn_size down to batch_size x rnn_size. # Thus we now have a list of seq_length tensors, each with dimension batch_size x rnn_size. inputs = [tf.squeeze(input_, [1]) for input_ in inputs] # THIS LOOP FUNCTION IS NEVER ACTUALLY USED. # IT IS EXPLICITLY NOT USED DURING TRAINING. # DURING INFERENCE, SEQ_LENGTH == 1, SO SEQ2SEQ.RNN_DECODER() ONLY USES THE LOOP ARGUMENT # ON SEQUENCE LENGTH ITEMS SUBSEQUENT TO THE FIRST. # This looping function is used as part of seq2seq.rnn_decoder only during sampling -- not training. # prev is a 2D Tensor of shape [batch_size x cell.output_size]. # returns a 2D Tensor of shape [batch_size x cell.input_size]. def loop(prev, _): # prev is initially the top cell state. # Convert the top cell state into character logits. prev = tf.matmul(prev, softmax_w) + softmax_b # Pull the character with the greatest logit (no sampling, just argmaxing). # WHY IS THIS ARGMAXING WHEN ACTUAL SAMPLING IS DONE PROBABILISTICALLY? # DOESN'T THIS CAUSE OUTPUTS NOT TO MATCH INPUTS DURING SEQUENCE GENERATION? prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) # Re-embed that symbol as the next step's input, and return that. return tf.nn.embedding_lookup(embedding, prev_symbol) # Set up a seq2seq decoder from the seq2seq.py library. # This constructs the outputs and states nodes of the network. # Outputs is a list (of len seq_length, same as inputs) of tensors of shape [batch_size x rnn_size]. # These are the raw output values of the top layer of the network at each time step. # They have NOT been fed through the decoder projection; they are still in network space, # not character space. # State is a tensor of shape [batch_size x cell.state_size]. # This is also the step where all of the trainable parameters for the LSTM (weights and biases) are defined. outputs, self.final_state = seq2seq.rnn_decoder( inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') # tf.concat concatenates the output tensors along the rnn_size dimension, # to make a single tensor of shape [batch_size x (seq_length * rnn_size)]. # This gives the following 2D outputs matrix: # [(rnn output: batch 0, seq 0) (rnn output: batch 0, seq 1) ... (rnn output: batch 0, seq seq_len-1)] # [(rnn output: batch 1, seq 0) (rnn output: batch 1, seq 1) ... (rnn output: batch 1, seq seq_len-1)] # ... # [(rnn output: batch batch_size-1, seq 0) (rnn output: batch batch_size-1, seq 1) ... (rnn output: batch batch_size-1, seq seq_len-1)] # tf.reshape then reshapes it to a tensor of shape [(batch_size * seq_length) x rnn_size]. # Output will now be the following matrix: # [rnn output: batch 0, seq 0] # [rnn output: batch 0, seq 1] # ... # [rnn output: batch 0, seq seq_len-1] # [rnn output: batch 1, seq 0] # [rnn output: batch 1, seq 1] # ... # [rnn output: batch 1, seq seq_len-1] # ... # ... # [rnn output: batch batch_size-1, seq seq_len-1] # Note the following comment in rnn_cell.py: # Note: in many cases it may be more efficient to not use this wrapper, # but instead concatenate the whole sequence of your outputs in time, # do the projection on this batch-concatenated sequence, then split it # if needed or directly feed into a softmax. output = tf.reshape(tf.concat(outputs, axis=1), [-1, args.rnn_size]) # Obtain logits node by applying output weights and biases to the output tensor. # Logits is a tensor of shape [(batch_size * seq_length) x vocab_size]. # Recall that outputs is a 2D tensor of shape [(batch_size * seq_length) x rnn_size], # and softmax_w is a 2D tensor of shape [rnn_size x vocab_size]. # The matrix product is therefore a new 2D tensor of [(batch_size * seq_length) x vocab_size]. # In other words, that multiplication converts a loooong list of rnn_size vectors # to a loooong list of vocab_size vectors. # Then add softmax_b (a single vocab-sized vector) to every row of that list. # That gives you the logits! self.logits = tf.matmul(output, softmax_w) + softmax_b # Convert logits to probabilities. Probs isn't used during training! That node is never calculated. # Like logits, probs is a tensor of shape [(batch_size * seq_length) x vocab_size]. # During sampling, this means it is of shape [1 x vocab_size]. self.probs = tf.nn.softmax(self.logits) # seq2seq.sequence_loss_by_example returns 1D float Tensor containing the log-perplexity # for each sequence. (Size is batch_size * seq_length.) # Targets are reshaped from a [batch_size x seq_length] tensor to a 1D tensor, of the following layout: # target character (batch 0, seq 0) # target character (batch 0, seq 1) # ... # target character (batch 0, seq seq_len-1) # target character (batch 1, seq 0) # ... # These targets are compared to the logits to generate loss. # Logits: instead of a list of character indices, it's a list of character index probability vectors. # seq2seq.sequence_loss_by_example will do the work of generating losses by comparing the one-hot vectors # implicitly represented by the target characters against the probability distrutions in logits. # It returns a 1D float tensor (a vector) where item i is the log-perplexity of # the comparison of the ith logit distribution to the ith one-hot target vector. loss = seq2seq.sequence_loss_by_example( [self.logits], # logits: 1-item list of 2D Tensors of shape [batch_size x vocab_size] [tf.reshape(self.targets, [-1])], # targets: 1-item list of 1D batch-sized int32 Tensors of the same length as logits [tf.ones([args.batch_size * args.seq_length])], # weights: 1-item list of 1D batch-sized float-Tensors of the same length as logits args.vocab_size ) # num_decoder_symbols: integer, number of decoder symbols (output classes) # Cost is the arithmetic mean of the values of the loss tensor # (the sum divided by the total number of elements). # It is a single-element floating point tensor. This is what the optimizer seeks to minimize. self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length # Create a summary for our cost. tf.summary.scalar("cost", self.cost) # Create a node to track the learning rate as it decays through the epochs. self.lr = tf.Variable(args.learning_rate, trainable=False) self.global_epoch_fraction = tf.Variable(0.0, trainable=False) self.global_seconds_elapsed = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables( ) # tvars is a python list of all trainable TF Variable objects. # tf.gradients returns a list of tensors of length len(tvars) where each tensor is sum(dy/dx). grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer( self.lr) # Use ADAM optimizer with the current learning rate. # Zip creates a list of tuples, where each tuple is (variable tensor, gradient tensor). # Training op nudges the variables along the gradient, with the given learning rate, using the ADAM optimizer. # This is the op that a training session should be instructed to perform. self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.summary_op = tf.summary.merge_all()
def __init__(self, img_shape, pth_size, g_size, l_size, glimpse_output_size, loc_dim, variance, cell_size, num_glimpses, num_classes, learning_rate, learning_rate_decay_factor, min_learning_rate, training_steps_per_epoch, max_gradient_norm, is_training=False): self.is_training = is_training self.img_ph = tf.placeholder( tf.float32, [None, img_shape[0], img_shape[1], img_shape[2]]) self.lbl_ph = tf.placeholder(tf.int64, [None]) self.global_step = tf.Variable(0, trainable=False) self.learning_rate = tf.maximum( tf.train.exponential_decay(learning_rate, self.global_step, training_steps_per_epoch, learning_rate_decay_factor, staircase=True), min_learning_rate) cell = BasicLSTMCell(cell_size) with tf.variable_scope('GlimpseNetwork'): glimpse_network = GlimpseNetwork(img_shape, pth_size, loc_dim, g_size, l_size, glimpse_output_size) with tf.variable_scope('LocationNetwork'): location_network = LocationNetwork( loc_dim=loc_dim, rnn_output_size=cell.output_size, variance=variance, is_sampling=self.is_training) # Core Network batch_size = tf.shape(self.img_ph)[0] init_loc = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1) init_state = cell.zero_state(batch_size, tf.float32) init_glimpse = glimpse_network(self.img_ph, init_loc) rnn_inputs = [init_glimpse] rnn_inputs.extend([0] * num_glimpses) locs, loc_means = [], [] def loop_function(prev, _): loc, loc_mean = location_network(prev, self.is_training) locs.append(loc) loc_means.append(loc_mean) glimpse = glimpse_network(self.img_ph, loc) return glimpse rnn_outputs, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=loop_function) # to be displyed self.locs = locs # Time independent baselines with tf.variable_scope('Baseline'): baseline_w = _weight_variable((cell.output_size, 1)) baseline_b = _bias_variable((1, )) baselines = [] for output in rnn_outputs[1:]: baseline = tf.nn.xw_plus_b(output, baseline_w, baseline_b) baseline = tf.squeeze(baseline) baselines.append(baseline) baselines = tf.stack(baselines) # [timesteps, batch_sz] baselines = tf.transpose(baselines) # [batch_sz, timesteps] # Classification. Take the last step only. rnn_last_output = rnn_outputs[-1] with tf.variable_scope('Classification'): logit_w = _weight_variable((cell.output_size, num_classes)) logit_b = _bias_variable((num_classes, )) logits = tf.nn.xw_plus_b(rnn_last_output, logit_w, logit_b) self.prediction = tf.argmax(logits, 1) self.softmax = tf.nn.softmax(logits) if self.is_training: # classification loss #self.xent = focal_loss(logits, self.lbl_ph)# self.xent = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.lbl_ph, logits=logits)) # RL reward reward = tf.cast(tf.equal(self.prediction, self.lbl_ph), tf.float32) # reward = tf.multiply(tf.cast(tf.equal(self.prediction, self.lbl_ph), tf.float32),0.1) + tf.multiply(tf.cast(tf.multiply(self.prediction, self.lbl_ph), tf.float32),0.9) rewards = tf.expand_dims(reward, 1) # [batch_sz, 1] rewards = tf.tile(rewards, (1, num_glimpses)) # [batch_sz, timesteps] advantages = rewards - tf.stop_gradient(baselines) self.advantage = tf.reduce_mean(advantages) logll = _log_likelihood(loc_means, locs, variance) logllratio = tf.reduce_mean(logll * advantages) self.reward = tf.reduce_mean(reward) # baseline loss self.baselines_mse = tf.reduce_mean( tf.square((rewards - baselines))) # hybrid loss self.loss = -logllratio + self.xent + self.baselines_mse params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.train_op = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
def __init__(self, imgSize, vocabSize, embedSize, use_lstm, rnnHiddenSize, rnnLayers, start, end, batch_size, learning_rate, learning_rate_decay_factor, min_learning_rate, training_steps_per_epoch, keep_prob=0.5, max_gradient_norm=5.0, is_training=True): if is_training: self.global_step = tf.Variable(0, trainable=False) self.learning_rate = tf.maximum( tf.train.exponential_decay(learning_rate, self.global_step, training_steps_per_epoch, learning_rate_decay_factor, staircase=True), min_learning_rate) self.answers_ph = tf.placeholder(tf.int32, shape=[batch_size, 10, 20], name="answers") self.answer_lengths_ph = tf.placeholder(tf.int32, shape=[batch_size, 10], name="answer_lengths") self.targets_ph = tf.placeholder(tf.int32, shape=[batch_size, 10, 21], name="targets") self.image_feature_ph = tf.placeholder(tf.float32, shape=[batch_size, imgSize], name="image_feature") self.caption_ph = tf.placeholder(tf.int32, shape=[batch_size, 40], name="caption") self.caption_length_ph = tf.placeholder(tf.int32, shape=[batch_size], name="caption_length") self.questions_ph = tf.placeholder(tf.int32, shape=[batch_size, 10, 20], name="questions") self.question_lengths_ph = tf.placeholder(tf.int32, shape=[batch_size, 10], name="question_lengths") START = tf.constant(value=[start] * batch_size) END = tf.constant(value=[end] * batch_size) # Embedding (share) with ops.device("/cpu:0"): if vs.get_variable_scope().initializer: initializer = vs.get_variable_scope().initializer else: # Default initializer for embeddings should have variance=1. sqrt3 = math.sqrt( 3) # Uniform(-sqrt(3), sqrt(3)) has variance=1. initializer = init_ops.random_uniform_initializer( -sqrt3, sqrt3) embedding = vs.get_variable("embedding", [vocabSize, embedSize], initializer=initializer, dtype=tf.float32) START_EMB = embedding_ops.embedding_lookup(embedding, START) END_EMB = embedding_ops.embedding_lookup(embedding, END) # split placeholders and embed questions = tf.split( value=self.questions_ph, num_or_size_splits=10, axis=1) # list with length 10; questions[0]: [batch_size, 1, 20] questions = [ tf.squeeze(input=question, axis=1) for question in questions ] # list with length 10; questions[0]: [batch_size, 20] questions = [ embedding_ops.embedding_lookup(embedding, question) for question in questions ] # list with length 10; questions[0]: [batch_size, 20, embedSize] question_lengths = tf.split(value=self.question_lengths_ph, num_or_size_splits=10, axis=1) question_lengths = [ tf.squeeze(question_length) for question_length in question_lengths ] if is_training: answers = tf.split(value=self.answers_ph, num_or_size_splits=10, axis=1) answers = [tf.squeeze(input=answer, axis=1) for answer in answers] answers = [ embedding_ops.embedding_lookup(embedding, answer) for answer in answers ] answer_lengths = tf.split(value=self.answer_lengths_ph, num_or_size_splits=10, axis=1) answer_lengths = [ tf.squeeze(answer_length) for answer_length in answer_lengths ] targets = tf.split(value=self.targets_ph, num_or_size_splits=10, axis=1) targets = [tf.squeeze(input=target, axis=1) for target in targets] weights = [] for r in range(10): weight = [] answer_length = answer_lengths[r] for i in range(21): weight.append(tf.greater_equal(x=answer_length, y=i)) weight = tf.cast(x=tf.stack(values=weight, axis=1), dtype=tf.float32) # [batch_size, 21] weights.append(weight) # make RNN cell def single_cell(): return GRUCell(rnnHiddenSize) if use_lstm: def single_cell(): return BasicLSTMCell(rnnHiddenSize, state_is_tuple=False) make_cell = single_cell if rnnLayers > 1: def make_cell(): return MultiRNNCell([single_cell() for _ in range(rnnLayers)], state_is_tuple=False) encoder_cell = make_cell() decoder_cell = OutputProjectionWrapper(cell=make_cell(), output_size=vocabSize, activation=None) # caption feature caption = embedding_ops.embedding_lookup( embedding, self.caption_ph) # [batch_size, 40, embedSize] caption_length = tf.squeeze(self.caption_length_ph) with tf.variable_scope('EncoderRNN') as varscope: _, captionState = dynamic_rnn( cell=encoder_cell, inputs=caption, sequence_length=caption_length, dtype=tf.float32, scope=varscope) # [batch_size, encoder_cell.state_size] if is_training: losses = [] else: ans_word_probs = [] for r in range(10): # 1. question with tf.variable_scope('EncoderRNN', reuse=True) as varscope: _, questionState = dynamic_rnn( cell=encoder_cell, inputs=questions[r], sequence_length=question_lengths[r], dtype=tf.float32, scope=varscope) # 2. history if r == 0: historyState = captionState # 3. fusion concat = tf.concat( values=[self.image_feature_ph, questionState, historyState], axis=1) if is_training: concat = tf.nn.dropout(x=concat, keep_prob=keep_prob) with tf.variable_scope('Fusion', reuse=(r > 0)) as varscope: encoder_state = tf.contrib.layers.fully_connected( inputs=concat, num_outputs=decoder_cell.state_size, activation_fn=tf.nn.tanh, scope=varscope) # 4. decoder with tf.variable_scope('DecoderRNN', reuse=(r > 0)) as varscope: if is_training: answer = [ tf.squeeze(input=word, axis=1) for word in tf.split( value=answers[r], num_or_size_splits=20, axis=1) ] decoder_outputs, _ = rnn_decoder( decoder_inputs=[START_EMB] + answer, initial_state=encoder_state, cell=decoder_cell, loop_function=None, scope=varscope) else: self_answer = [] self_answer_emb = [] def loop_function(prev, _): prev_symbol = math_ops.argmax(prev, 1) self_answer.append( tf.cast(x=prev_symbol, dtype=tf.int32)) emb_prev = embedding_ops.embedding_lookup( embedding, prev_symbol) self_answer_emb.append(emb_prev) return emb_prev decoder_outputs, _ = rnn_decoder( decoder_inputs=[START_EMB] * 21, initial_state=encoder_state, cell=decoder_cell, loop_function=loop_function, scope=varscope) # 5. update history with tf.variable_scope('EncoderRNN', reuse=True) as varscope: _, historyState = dynamic_rnn( cell=encoder_cell, inputs=questions[r], sequence_length=question_lengths[r], initial_state=historyState, scope=varscope) if is_training: _, historyState = dynamic_rnn( cell=encoder_cell, inputs=answers[r], sequence_length=answer_lengths[r], initial_state=historyState, scope=varscope) else: self_answer = tf.stack(values=self_answer + [END], axis=1) # [batch_size, 21] self_answer_length = tf.argmax(input=tf.cast( x=tf.equal(x=self_answer, y=end), dtype=tf.float32), axis=1) self_answer_emb = tf.stack( values=self_answer_emb, axis=1) # [batch_size, 20, embSize] _, historyState = dynamic_rnn( cell=encoder_cell, inputs=self_answer_emb, sequence_length=self_answer_length, initial_state=historyState, scope=varscope) if is_training: decoder_outputs = tf.stack( values=decoder_outputs, axis=1) # [batch_size, 21, vocabSize] loss = tf.contrib.seq2seq.sequence_loss( logits=decoder_outputs, targets=targets[r], weights=weights[r], average_across_batch=False) # [batch_size] losses.append(loss) else: decoder_outputs = [ tf.log(tf.nn.softmax(decoder_output)) for decoder_output in decoder_outputs ] ans_word_probs.append( tf.stack(values=decoder_outputs, axis=1)) # [batch_size, 21, vocabSize] if is_training: losses = tf.stack(values=losses, axis=1) # [batch_size, 10] self.loss = tf.reduce_mean(losses) params = tf.trainable_variables() gradients = tf.gradients(self.loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.opt_op = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) else: self.ans_word_probs = tf.stack( values=ans_word_probs, axis=1) # [batch_size, 10, 21, vocabSize] self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999)
def build(self): params = self.params N, L, Q, F = params.batch_size, params.max_sent_size, params.max_ques_size, params.max_fact_count V, d, A = params.glove_size, params.hidden_size, self.words.vocab_size # initialize self # placeholders input = tf.placeholder( tf.float32, shape=[N, L, V], name='x') # [num_batch, sentence_len, glove_dim] question = tf.placeholder( tf.float32, shape=[N, Q, V], name='q') # [num_batch, sentence_len, glove_dim] answer = tf.placeholder(tf.int64, shape=[N], name='y') # [num_batch] - one word answer input_mask = tf.placeholder(tf.bool, shape=[N, L], name='x_mask') # [num_batch, sentence_len] is_training = tf.placeholder(tf.bool) # Prepare parameters gru = rnn_cell.GRUCell(d) # Input module with tf.variable_scope('input') as scope: input_list = self.make_decoder_batch_input(input) input_states, _ = seq2seq.rnn_decoder( input_list, gru.zero_state(N, tf.float32), gru) # Question module scope.reuse_variables() ques_list = self.make_decoder_batch_input(question) questions, _ = seq2seq.rnn_decoder(ques_list, gru.zero_state(N, tf.float32), gru) question_vec = questions[-1] # use final state # Masking: to extract fact vectors at end of sentence. (details in paper) input_states = tf.transpose(tf.stack(input_states), [1, 0, 2]) # [N, L, D] facts = [] for n in range(N): filtered = tf.boolean_mask(input_states[n, :, :], input_mask[n, :]) # [?, D] padding = tf.zeros(tf.stack([F - tf.shape(filtered)[0], d])) facts.append(tf.concat(0, [filtered, padding])) # [F, D] facked = tf.stack(facts) # packing for transpose... I hate TF so much facts = tf.unstack(tf.transpose(facked, [1, 0, 2]), num=F) # F x [N, D] # Episodic Memory with tf.variable_scope('episodic') as scope: episode = EpisodeModule(d, question_vec, facts) memory = tf.identity(question_vec) for t in range(params.memory_step): memory = gru(episode.new(memory), memory)[0] scope.reuse_variables() # Regularizations if params.batch_norm: memory = batch_norm(memory, is_training=is_training) memory = dropout(memory, params.keep_prob, is_training) with tf.name_scope('Answer'): # Answer module : feed-forward version (for it is one word answer) w_a = weight('w_a', [d, A]) logits = tf.matmul(memory, w_a) # [N, A] with tf.name_scope('Loss'): # Cross-Entropy loss cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits, answer) loss = tf.reduce_mean(cross_entropy) total_loss = loss + params.weight_decay * tf.add_n( tf.get_collection('l2')) with tf.variable_scope('Accuracy'): # Accuracy predicts = tf.cast(tf.argmax(logits, 1), 'int32') corrects = tf.equal(predicts, answer) num_corrects = tf.reduce_sum(tf.cast(corrects, tf.float32)) accuracy = tf.reduce_mean(tf.cast(corrects, tf.float32)) # Training optimizer = tf.train.AdadeltaOptimizer(params.learning_rate) opt_op = optimizer.minimize(total_loss, global_step=self.global_step) # placeholders self.x = input self.q = question self.y = answer self.mask = input_mask self.is_training = is_training # tensors self.total_loss = total_loss self.num_corrects = num_corrects self.accuracy = accuracy self.opt_op = opt_op
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.LayerNormBasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size) #self.cell = cell = tf.nn.rnn_cell.MultiRNNCell([cell] * args.num_layers) #changed self.cell = cell #tf.nn.rnn_cell.BasicRNNCell([cell] * args.num_layers) #self.cell = rnn_cell.BasicRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) self.batch_pointer = tf.Variable(0, name="batch_pointer", trainable=False, dtype=tf.int32) self.inc_batch_pointer_op = tf.assign(self.batch_pointer, self.batch_pointer + 1) self.epoch_pointer = tf.Variable(0, name="epoch_pointer", trainable=False) self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False) tf.summary.scalar("time_batch", self.batch_time) def variable_summaries(var): """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) #with tf.name_scope('stddev'): # stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) #tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) #tf.summary.histogram('histogram', var) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) variable_summaries(softmax_w) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) variable_summaries(softmax_b) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split(axis=1, num_or_size_splits=args.seq_length, value=tf.nn.embedding_lookup( embedding, self.input_data)) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = seq2seq.rnn_decoder( inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length tf.summary.scalar("cost", self.cost) self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, embedding): self.args = args if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cell = cell_fn(args.rnn_size) self.cell = cell = rnn_cell.MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_input') self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length], name='STAND_targets') self.initial_state = cell.zero_state(args.batch_size, tf.float32) self.embedding = embedding with tf.variable_scope('STAND'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) inputs = tf.split( 1, args.seq_length, tf.nn.embedding_lookup(self.embedding, self.input_data)) inputs = map(lambda i: tf.nn.l2_normalize(i, 1), [tf.squeeze(input_, [1]) for input_ in inputs]) def loop(prev, i): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.l2_normalize( tf.nn.embedding_lookup(embedding, prev_symbol), 1) o, _ = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=None, scope='STAND') with tf.variable_scope('STAND', reuse=True) as scope: sf_o, _ = seq2seq.rnn_decoder(inputs, self.initial_state, cell, loop_function=loop, scope=scope) output = tf.reshape(tf.concat(1, o), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) sf_output = tf.reshape(tf.concat(1, sf_o), [-1, args.rnn_size]) self_feed_logits = tf.matmul(sf_output, softmax_w) + softmax_b self.self_feed_probs = tf.nn.softmax(self_feed_logits) loss = seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.loss = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), args.grad_clip) for g, v in zip(grads, tvars): print v.name optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.rnncell == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.rnncell == 'gru': cell_fn = GRUCell elif args.rnncell == 'lstm': cell_fn = core_rnn_cell_impl.BasicLSTMCell else: raise Exception("rnncell type not supported: {}".format( args.rnncell)) cell = cell_fn(args.rnn_size) self.cell = MultiRNNCell([cell] * args.num_layers) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = self.cell.zero_state(args.batch_size, tf.float32) self.attn_length = 5 self.attn_size = 32 self.attention_states = tf.placeholder( tf.float32, [args.batch_size, self.attn_length, self.attn_size]) with tf.variable_scope('rnnlm'): softmax_w = build_weight([args.rnn_size, args.vocab_size], name='soft_w') softmax_b = build_weight([args.vocab_size], name='soft_b') self.word_embedding = build_weight( [args.vocab_size, args.embedding_size], name='word_embedding') inputs_list = tf.split( tf.nn.embedding_lookup(self.word_embedding, self.input_data), args.seq_length, 1) inputs_list = [tf.squeeze(input_, [1]) for input_ in inputs_list] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(self.word_embedding, prev_symbol) if not args.attention: outputs, last_state = seq2seq.rnn_decoder( inputs_list, self.initial_state, self.cell, loop_function=loop if infer else None, scope='rnnlm') else: outputs, last_state = attention_decoder( inputs_list, self.initial_state, self.attention_states, self.cell, loop_function=loop if infer else None, scope='rnnlm') self.final_state = last_state output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) # average loss for each word of each timestep self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length self.lr = tf.Variable(0.0, trainable=False) self.var_trainable_op = tf.trainable_variables() grads, _ = tf.clip_by_global_norm( tf.gradients(self.cost, self.var_trainable_op), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients( zip(grads, self.var_trainable_op)) self.initial_op = tf.global_variables_initializer() self.logfile = args.log_dir + str( datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S') + '.txt').replace( ' ', '').replace('/', '') self.var_op = tf.global_variables() self.saver = tf.train.Saver(self.var_op, max_to_keep=4, keep_checkpoint_every_n_hours=1)
def build_graph(self, test): """ Builds an graph in TensorFlow. """ if test: self.batch_size = 1 self.seq_len = 1 ## # Cells ## lstm_cell = rnn_cell.BasicLSTMCell(self.cell_size) self.cell = rnn_cell.MultiRNNCell([lstm_cell] * self.num_layers) ## # Data ## # inputs and targets are 2D tensors of shape self.inputs = tf.placeholder(tf.int32, [self.batch_size, self.seq_len]) self.targets = tf.placeholder(tf.int32, [self.batch_size, self.seq_len]) self.initial_state = self.cell.zero_state(self.batch_size, tf.float32) ## # Variables ## with tf.variable_scope('lstm_vars'): self.ws = tf.get_variable('ws', [self.cell_size, self.vocab_size]) self.bs = tf.get_variable('bs', [self.vocab_size]) # TODO: initializer? with tf.device('/cpu:0'): # put on CPU to parallelize for faster training/ self.embeddings = tf.get_variable('embeddings', [self.vocab_size, self.cell_size]) # get embeddings for all input words input_embeddings = tf.nn.embedding_lookup(self.embeddings, self.inputs) # The split splits this tensor into a seq_len long list of 3D tensors of shape # [batch_size, 1, rnn_size]. The squeeze removes the 1 dimension from the 1st axis # of each tensor inputs_split = tf.split(input_embeddings, self.seq_len, 1) inputs_split = [tf.squeeze(input_, [1]) for input_ in inputs_split] def loop(prev, _): prev = tf.matmul(prev, self.ws) + self.bs prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(self.embeddings, prev_symbol) lstm_outputs_split, self.final_state = seq2seq.rnn_decoder(inputs_split, self.initial_state, self.cell, loop_function=loop if test else None, scope='lstm_vars') lstm_outputs = tf.reshape(tf.concat(lstm_outputs_split, 1), [-1, self.cell_size]) logits = tf.matmul(lstm_outputs, self.ws) + self.bs self.probs = tf.nn.softmax(logits) ## # Train ## total_loss = seq2seq.sequence_loss_by_example([logits], [tf.reshape(self.targets, [-1])], [tf.ones([self.batch_size * self.seq_len])], self.vocab_size) self.loss = tf.reduce_sum(total_loss) / self.batch_size / self.seq_len self.global_step = tf.Variable(0, trainable=False, name='global_step') self.optimizer = tf.train.AdamOptimizer(learning_rate=c.L_RATE, name='optimizer') self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step, name='train_op')
def __init__(self, config, decay_step, is_training=False, is_translate=False): # image means feed-in images: batch_size * img_size^2 # label: labels of images not one hot representation self.config = config self.decay_step = decay_step self.is_training = is_training self.is_translate = is_translate # input data placeholders with tf.name_scope('input'): self.image = tf.placeholder( tf.float32, [None, config.input_img_size * config.input_img_size]) self.label = tf.placeholder(tf.int64, [None]) with tf.name_scope('image_translate'): # translate MNIST data if need if self.is_translate: img = tf.reshape(self.image, [ tf.shape(self.image)[0], config.input_img_size, config.input_img_size, 1 ], name='2D_2_4D') self.proc_image = self._translate_image(img) # reshape into 2D tensor: [batch_size, img_size^2] # new_img_size = self.proc_image.get_shape().as_list() # print(new_img_size) self.proc_image = tf.reshape(self.proc_image, [ tf.shape(self.image)[0], config.img_size * config.img_size ], name='4D_2_2D') else: self.proc_image = self.image with tf.name_scope('global_step'): self.global_step = tf.Variable(0, trainable=False) # define learning rate with tf.name_scope('learning_rate'): self.learning_rate = tf.maximum( tf.train.exponential_decay(config.learning_rate, self.global_step, decay_step, config.decay_factor, staircase=True), config.min_learning_rate) tf.summary.scalar("learning_rate", self.learning_rate) # Glimpse Network with tf.name_scope('glimpse_net'): self.glimpse_network = GlimpseNetwork( config=config, is_translate=self.is_translate) # Actor Network with tf.name_scope('actor_net'): self.actor_network = ActorNetwork(config=config, rnn_output_size=config.cell_size, is_sampling=self.is_training) # LSTM Network with tf.name_scope('lstm'): cell = BasicLSTMCell(config.cell_size, name='basic_lstm_cell') with tf.name_scope('initialization'): with tf.name_scope('batch_size'): batch_size = tf.shape(self.image)[0] with tf.name_scope('init_locs'): init_locs = tf.random_uniform( shape=[batch_size, config.loc_dim], minval=-1, maxval=1, name='sampling') with tf.name_scope('init_state'): init_state = cell.zero_state(batch_size, tf.float32) # transfer glimpse network output into 2D list # rnn_inputs: 3D list [[batch_size, 256], ...] with tf.name_scope('init_glimpse'): init_glimpse = self.glimpse_network( self.proc_image, init_locs) with tf.name_scope('rnn_inputs'): rnn_inputs = [init_glimpse] rnn_inputs.extend([0] * config.num_glimpses) with tf.name_scope('init_list'): self.locs, self.loc_means, self.retina_reprsent = [], [], [] # with tf.name_scope('rnn_decoder'): def loop_function(prev, _): loc, loc_mean = self.actor_network(prev) self.locs.append(loc) self.loc_means.append(loc_mean) glimpse = self.glimpse_network(self.proc_image, loc) self.retina_reprsent.append( self.glimpse_network.retina_sensor.retina_reprsent) return glimpse self.rnn_outputs, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=loop_function) # Critic Network with tf.name_scope('critic_net'): self.critic_network = CriticNetwork( config=config, rnn_output_size=cell.output_size) # Classify Network with tf.name_scope('classify_net'): self.classify_network = ClassifyNetwork( config=config, rnn_output_size=cell.output_size) rnn_last_output = self.rnn_outputs[-1] self.logits = self.classify_network(rnn_last_output) with tf.name_scope('argmax'): self.prediction = tf.argmax(self.logits, 1) # [batch_size] with tf.name_scope('softmax'): self.softmax = tf.nn.softmax(self.logits) if is_training: # hybrid loss: classification loss, RL reward, baseline loss with tf.name_scope('total_loss'): self.loss = self.total_loss() tf.summary.scalar("total_loss", self.loss) with tf.name_scope('train'): var_list = tf.trainable_variables() gradients = tf.gradients(self.loss, var_list) clipped_gradients, norm = tf.clip_by_global_norm( gradients, config.max_gradient_norm) self.train_op = tf.train.AdamOptimizer( self.learning_rate).apply_gradients( zip(clipped_gradients, var_list), global_step=self.global_step) with tf.name_scope('merge'): self.merged = tf.summary.merge_all()