def __init__(self): self.training = tf.placeholder(tf.bool, name='training') self.inputs = tf.placeholder(dtype=tf.float32, shape=[None, 5, 224, 224, 3]) self.inputs = tf.unstack(self.inputs, axis=1) self.sequence_length = tf.placeholder(dtype=tf.int32, shape=[None]) LSTM_inputs = [] for i in self.inputs: LSTM_inputs.append(self.get_features(i)) self.LSTM_inputs = LSTM_inputs # seq_length*32*128 print('Image feature extraction is successful') lstm_f_cell = BasicLSTMCell(num_units=hidden_size) lstm_b_cell = BasicLSTMCell(num_units=hidden_size) init_fw = lstm_f_cell.zero_state(batch_size, dtype=tf.float32) init_bw = lstm_b_cell.zero_state(batch_size, dtype=tf.float32) outputs, output_state_fw, output_state_bw = static_bidirectional_rnn( lstm_f_cell, lstm_b_cell, self.LSTM_inputs, initial_state_fw=init_fw, initial_state_bw=init_bw, sequence_length=self.sequence_length) self.predict = tf.layers.dense(outputs[-1], classes) self.finally_pre = tf.nn.softmax(self.predict) self.finally_pre = tf.argmax(self.predict) self.targets = tf.placeholder(dtype=tf.int32, shape=[None]) self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=self.targets, logits=self.predict)) with tf.control_dependencies(tf.get_collection( tf.GraphKeys.UPDATE_OPS)): self.train_op = tf.train.AdamOptimizer().minimize(self.loss)
def BasicLSTM_init_state(batch_size, output_size): with tf.name_scope('LSTM_init_state'): # init_state=tf.zeros(dtype='float32',shape=(batch_size,2*output_size)) # init_state=tf.zeros_like(init_state) # init_state=tf.split(init_state,num_or_size_splits=2,axis=-1) cell = BasicLSTMCell(output_size) init_state = cell.zero_state(batch_size=batch_size, dtype='float32') return init_state
def add_cell(self): lstm_cell = BasicLSTMCell(self.cell_size) self.cell_init_state = lstm_cell.zero_state(self.batch_size, dtype=tf.float32) self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn( lstm_cell, self.l_in_y, initial_state=self.cell_init_state, time_major=False)
def rnn(features, mode, params): """ Recurrent model """ if params.model == "LSTM": cell = BasicLSTMCell(params.hidden_size) elif params.model == "GRU": cell = GRUCell(params.hidden_size) else: cell = BasicRNNCell(params.hidden_size) initial_state = cell.zero_state(params.batch_size, dtype=tf.float64) if params.per_frame: # convert input from (batch_size, max_time, ...) to # (max_time, batch_size, ...) inputs = tf.transpose(features['feature'], [1, 0, 2]) sequence_length = tf.reshape(features['sequence_length'], shape=(params.batch_size, )) outputs, state = tf.nn.dynamic_rnn(cell, inputs=inputs, initial_state=initial_state, sequence_length=sequence_length, time_major=True) # get output from the last state outputs = outputs[features['sequence_length'][0] - 1] else: # reshape MFCC vector to fit in one time step inputs = tf.reshape(features['feature'], shape=(1, params.batch_size, params.max_length * params.feature_length)) outputs, state = tf.nn.dynamic_rnn(cell, inputs=inputs, initial_state=initial_state, time_major=True) outputs = tf.reshape(outputs, shape=(params.batch_size, params.hidden_size)) # apply dropout dropout = tf.layers.dropout(outputs, rate=params.dropout, training=mode == tf.estimator.ModeKeys.TRAIN) logits = tf.layers.dense(dropout, units=params.num_classes, activation=None) return logits
def _add_bilstm_cell(self): # init the lstm cells. one for fwlstm, another for bwlstm. fw_lstm = BasicLSTMCell(num_units=self.hidden_layer, forget_bias=1.0, state_is_tuple=True) bw_lstm = BasicLSTMCell(num_units=self.hidden_layer, forget_bias=1.0, state_is_tuple=True) # define the init state for bwlstm and fwlstm self.fw_init_state = fw_lstm.zero_state(self.batch_size, dtype=tf.float32) self.bw_init_state = bw_lstm.zero_state(self.batch_size, dtype=tf.float32) output, final_state = tf.nn.bidirectional_dynamic_rnn( cell_fw=fw_lstm, cell_bw=bw_lstm, sequence_length=self.batch_size, inputs=self.input_layer_data, initial_state_bw=self.bw_init_state, initial_state_fw=self.fw_init_state) self.bilstm_output = tf.concat(output, 2) self.fw_final_state = final_state[0] self.bw_final_state = final_state[1]
class RecurrentController(BaseController): def network_vars(self): self.lstm_cell = BasicLSTMCell(256) self.state = self.lstm_cell.zero_state(self.batch_size, tf.float32) def network_op(self, X, state): X = tf.convert_to_tensor(X) return self.lstm_cell(X, state) def get_state(self): return self.state def update_state(self, new_state): return tf.no_op()
class DilatedLSTM(object): def __init__(self, inputs, initial_state, hidden_state_size, max_steps, num_cores=10, pool_size=10): self.shared_cell = BasicLSTMCell(hidden_state_size) self.initial_state = initial_state self.max_steps = max_steps self.num_cores = num_cores self.pool_size = pool_size self.inputs = inputs self._build_ops() def _build_ops(self): i0 = tf.constant(0, dtype=tf.int32) loop_condition = lambda i, inputs, state: tf.less(i, self.max_steps) def body(i, inputs, full_state): idx = i % self.num_cores prev_state = full_state[idx] inputs, full_state[idx] = self.shared_cell(inputs, prev_state) return i + 1, inputs, full_state _, inputs, full_state = tf.while_loop( loop_condition, body, loop_vars=[i0, self.inputs, self.initial_state]) lstm_outputs = tf.reshape(tf.concat(full_state, 1), [-1, 256]) self.outpus = tf.avg_pool(tf.expand(lstm_outputs, -1), [1, self.pool_size, 1, 1], strides=[1, 1, 1, 1], padding='SAME') def zero_state(self): return [ self.shared_cell.zero_state( tf.shape(self.max_steps)[0], tf.float32) for _ in range(self.stride) ]
def _outputs(self): cell = BasicLSTMCell(num_units=hidden_size) initial_state = cell.zero_state(batch_size, tf.float32) outputs_d_rnn, _states = tf.nn.dynamic_rnn(cell, self.Input_data, initial_state=initial_state, dtype=tf.float32) # outputs_d_rnn = tf.Print(outputs_d_rnn,[outputs_d_rnn],"\n--PRINT-- outputs_d_rnn:\n",summarize=1000) # return outputs_d_rnn X_for_fc = tf.reshape(outputs_d_rnn, [-1, hidden_size]) outputs_fc = fully_connected(inputs=X_for_fc, num_outputs=num_classes, activation_fn=None) outputs = tf.reshape(outputs_fc, [batch_size, sequence_length, num_classes]) return outputs
X_train_vocab, X_train_vocab_rev = create_vocabulary(X_train) hidden_size = len(X_train_vocab) num_classes = len(X_train_vocab) X_train_ids = sentence_to_token_ids(X_train, X_train_vocab) X_data = X_train_ids[:-1] Y_data = X_train_ids[1:] X_data_one_hot = [token_ids_to_one_hot(X_data, num_classes)] Y_data = [Y_data] # ============================================================================== X = tf.placeholder(tf.float32, [None, sequence_length, hidden_size]) Y = tf.placeholder(tf.int32, [None, sequence_length]) cell = BasicLSTMCell(num_units=hidden_size) initial_state = cell.zero_state(batch_size, tf.float32) outputs, _states = tf.nn.dynamic_rnn(cell, X, initial_state=initial_state, dtype=tf.float32) X_for_fc = tf.reshape(outputs, [-1, hidden_size]) outputs = fully_connected(inputs=X_for_fc, num_outputs=num_classes, activation_fn=None) outputs = tf.reshape(outputs, [batch_size, sequence_length, num_classes]) weights = tf.ones([batch_size, sequence_length]) sequence_loss = sequence_loss(logits=outputs, targets=Y, weights=weights) loss = tf.reduce_mean(sequence_loss)
def build_model(self): # Encoder q(a|x) a_seq, a_mu, a_var = self.encoder(self.x) a_vae = a_seq # Initial state for the alpha RNN dummy_lstm = BasicLSTMCell( self.config.alpha_units * 2 if self.config.learn_u else self.config.alpha_units) state_init_rnn = dummy_lstm.zero_state(self.config.batch_size, tf.float32) # Initialize Kalman filter (LGSSM) self.kf = KalmanFilter( dim_z=self.config.dim_z, dim_y=self.config.dim_a, dim_u=self.config.dim_u, dim_k=self.config.K, A=self.init_vars['A'], # state transition function B=self.init_vars['B'], # control matrix C=self.init_vars['C'], # Measurement function R=self.init_vars['R'], # measurement noise Q=self.init_vars['Q'], # process noise y=a_seq, # output u=None, mask=self.mask, mu=self.init_vars['mu'], Sigma=self.init_vars['Sigma'], y_0=self.init_vars['a_0'], alpha=self.alpha, state=state_init_rnn) # Get smoothed posterior over z smooth, A, B, C, alpha_plot = self.kf.smooth() # Get filtered posterior, used only for imputation plots filter, _, _, C_filter, _ = self.kf.filter() # Get a from the prior z (for plotting) a_mu_pred = tf.matmul(C, tf.expand_dims(smooth[0], 2), transpose_b=True) a_mu_pred_seq = tf.reshape( a_mu_pred, tf.stack((-1, self.ph_steps, self.config.dim_a))) if self.config.sample_z: a_seq = a_mu_pred_seq # Decoder p(x|a) x_hat, x_mu, x_var = self.decoder(a_seq) # Compute variables for generation from the model (for plotting) self.n_steps_gen = self.config.n_steps_gen # We sample for this many iterations, self.out_gen_det = self.kf.sample_generative_tf( smooth, self.n_steps_gen, deterministic=True, init_fixed_steps=self.config.t_init_mask) self.out_gen = self.kf.sample_generative_tf( smooth, self.n_steps_gen, deterministic=False, init_fixed_steps=self.config.t_init_mask) self.out_gen_det_impute = self.kf.sample_generative_tf( smooth, self.test_data.timesteps, deterministic=True, init_fixed_steps=self.config.t_init_mask) self.out_alpha, _, _, _ = self.alpha(self.a_prev, state=state_init_rnn, u=None, init_buffer=True, reuse=True) # Collect generated model variables self.model_vars = dict(x_hat=x_hat, x_mu=x_mu, x_var=x_var, a_seq=a_seq, a_mu=a_mu, a_var=a_var, a_vae=a_vae, smooth=smooth, A=A, B=B, C=C, alpha_plot=alpha_plot, a_mu_pred_seq=a_mu_pred_seq, filter=filter, C_filter=C_filter) return self
def _add_cell(self): # init the lstm cell lstm = BasicLSTMCell(num_units=self.cell_size, forget_bias=1.0, state_is_tuple=True) self.init_state = lstm.zero_state(self.batch_size, dtype=tf.float32) self.cell_outputs, self.cell_final_state = tf.nn.dynamic_rnn(cell=lstm, inputs=self.input_layer_data, initial_state=self.init_state, time_major=False)
def main(model, T, n_epochs, n_batch, n_hidden, capacity, comp, FFT, learning_rate, decay): # --- Set data params ---------------- #Create Data max_len_data = 100000000 epoch_train, vocab_to_idx = file_data('train', n_batch, max_len_data, T, n_epochs, None) n_input = len(vocab_to_idx) epoch_val, _ = file_data('valid', n_batch, max_len_data, T, 10000, vocab_to_idx) epoch_test, _ = file_data('test', n_batch, max_len_data, T, 1, vocab_to_idx) n_output = n_input # --- Create graph and compute gradients ---------------------- x = tf.placeholder("int32", [None, T]) y = tf.placeholder("int64", [None, T]) input_data = tf.one_hot(x, n_input, dtype=tf.float32) # Input to hidden layer cell = None h = None #h_b = None if model == "LSTM": cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1) if h == None: h = cell.zero_state(n_batch, tf.float32) hidden_out, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GRU": cell = GRUCell(n_hidden) if h == None: h = cell.zero_state(n_batch, tf.float32) hidden_out, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "RNN": cell = BasicRNNCell(n_hidden) if h == None: h = cell.zero_state(n_batch, tf.float32) hidden_out, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "EURNN": cell = EURNNCell(n_hidden, capacity, FFT, comp) if h == None: h = cell.zero_state(n_batch, tf.float32) if comp: hidden_out_comp, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.complex64) hidden_out = tf.real(hidden_out_comp) else: hidden_out, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GORU": cell = GORUCell(n_hidden, capacity, FFT, comp) if h == None: h = cell.zero_state(n_batch, tf.float32) if comp: hidden_out_comp, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.complex64) hidden_out = tf.real(hidden_out_comp) else: hidden_out, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) # Hidden Layer to Output V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input) V_weights = tf.get_variable("V_weights", shape = [n_hidden, n_output], \ dtype=tf.float32, initializer=tf.random_uniform_initializer(-V_init_val, V_init_val)) V_bias = tf.get_variable("V_bias", shape=[n_output], \ dtype=tf.float32, initializer=tf.constant_initializer(0.01)) hidden_out_list = tf.unstack(hidden_out, axis=1) temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list]) output_data = tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias) # define evaluate process cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_data, labels=y)) correct_pred = tf.equal(tf.argmax(output_data, 2), y) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # --- Initialization ---------------------- optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=decay).minimize(cost) init = tf.global_variables_initializer() for i in tf.global_variables(): print(i.name) # --- save result ---------------------- filename = "./output/character/text8/T=" + str( T) + "/" + model + "_N=" + str( n_hidden ) # + "_lambda=" + str(learning_rate) + "_beta=" + str(decay) if model == "EURNN" or model == "GORU": print(model) if FFT: filename += "_FFT" else: filename = filename + "_L=" + str(capacity) filename = filename + ".txt" if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise f = open(filename, 'w') f.write("########\n\n") f.write("## \tModel: %s with N=%d" % (model, n_hidden)) if model == "EURNN" or model == "GORU": if FFT: f.write(" FFT") else: f.write(" L=%d" % (capacity)) f.write("\n\n") f.write("########\n\n") # --- baseline ----- # --- Training Loop --------------------------------------------------------------- # if saveTo == "my-model": # print("Autogenerating the save name") # saveTo = "nlp_"+str(model)+"_"+str(n_hidden)+"_"+str(capacity)+"_"+str(approx)+"_"+str(num_layers) # print("Save name is: " , saveTo) # savename="./output/nlp/"+str(saveTo) # if not os.path.exists(os.path.dirname(savename)): # try: # os.makedirs(os.path.dirname(savename)) # except OSError as exc: # Guard against race condition # if exc.errno != errno.EEXIST: # raise def do_validation(): j = 0 val_losses = [] for val in epoch_val: j += 1 if j >= 2: break print("Running validation...") val_state = None for stepb, (X_val, Y_val) in enumerate(val): val_batch_x = X_val val_batch_y = Y_val val_dict = {x: val_batch_x, y: val_batch_y} if val_state is not None: #This needs to be initialized from the original net creation. val_dict[h] = val_state if notstates: val_acc, val_loss = sess.run([accuracy, cost], feed_dict=val_dict) else: val_acc, val_loss, val_state = sess.run( [accuracy, cost, states], feed_dict=val_dict) val_losses.append(val_loss) print("Validations:", ) validation_losses.append(sum(val_losses) / len(val_losses)) print("Validation Loss= " + \ "{:.6f}".format(validation_losses[-1])) f.write("%d\t%f\n" % (t, validation_losses[-1])) f.flush() # saver = tf.train.Saver() step = 0 with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=False)) as sess: print("Session Created") # if loadFrom != "": # new_saver = tf.train.import_meta_graph(loadFrom+'.meta') # new_saver.restore(sess, tf.train.latest_checkpoint('./')) # print("Session loaded from: " , loadFrom) # else: # #summary_writer = tf.train.SummaryWriter('/tmp/logdir', sess.graph) # sess.run(init) steps = [] losses = [] accs = [] validation_losses = [] sess.run(init) training_state = None i = 0 t = 0 for epoch in epoch_train: print("Epoch: ", i) for step, (X, Y) in enumerate(epoch): batch_x = X batch_y = Y myfeed_dict = {x: batch_x, y: batch_y} if training_state is not None: myfeed_dict[h] = training_state # if training_state is not None: # # #This needs to be initialized from the original net creation. #myfeed_dict[h] = training_state # #print("State: " , training_state) #print("Comp : ", training_state[0]) #print("Sum: " , sum([i*i for i in training_state[0]])) #print("Feed dict: " , myfeed_dict) if notstates: _, acc, loss = sess.run([optimizer, accuracy, cost], feed_dict=myfeed_dict) else: empty, acc, loss, training_state = sess.run( [optimizer, accuracy, cost, states], feed_dict=myfeed_dict) #print("Sum: " , sum([i*i for i in training_state[0]])) print("Iter " + str(step) + ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc)) steps.append(t) losses.append(loss) accs.append(acc) t += 1 if step % 5000 == 4999: do_validation() # saver.save(sess,savename) #Now I need to take an epoch and go through it. I will average the losses at the end # f2.write("%d\t%f\t%f\n"%(step, loss, acc)) # f.flush() # f2.flush() # mystates = sess.run(states, feed_dict=myfeed_dict) # print ("States",training_state) i += 1 print("Optimization Finished!") j = 0 test_losses = [] for test in epoch_test: j += 1 if j >= 2: break print("Running validation...") test_state = None for stepb, (X_test, Y_test) in enumerate(test): test_batch_x = X_test test_batch_y = Y_test test_dict = {x: test_batch_x, y: test_batch_y} # if test_state is not None: #This needs to be initialized from the original net creation. # test_dict[h] = test_state test_acc, test_loss = sess.run([accuracy, cost], feed_dict=test_dict) test_losses.append(test_loss) print("test:", ) test_losses.append(sum(test_losses) / len(test_losses)) print("test Loss= " + \ "{:.6f}".format(test_losses[-1])) f.write("Test result: %d\t%f\n" % (t, test_losses[-1]))
X = np.reshape(dataX, (n_patterns, seq_length, 1)) # normalize X = X / float(n_vocab) # one hot encode the output variable Y = to_categorical(dataY) ''' Create TF model ''' units = 256 seq_length = 100 data_x = tf.placeholder(tf.float32, shape=(None, seq_length, 1)) data_y = tf.placeholder(tf.float32, shape=(None, n_vocab)) batch_size = tf.shape(data_x)[0] #Create tf cell, api refrence: https://www.tensorflow.org/api_docs/python/tf/contrib/rnn rnn_cell = BasicLSTMCell(num_units=units, forget_bias=1) initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32) #Compute RNN outputs, state = tf.nn.dynamic_rnn(cell=rnn_cell, inputs=data_x, initial_state=initial_state, dtype=tf.float32) #Got from: https://danijar.com/introduction-to-recurrent-networks-in-tensorflow/ outputs = tf.transpose(outputs, [1, 0, 2]) last = tf.gather(outputs, int(outputs.get_shape()[0]) - 1) to_forward = tf.nn.dropout(x=last, keep_prob=0.2) #Dens: activation(dot(input, kernel) + bias) w_kernel = tf.Variable(tf.random_uniform(shape=(units, n_vocab)))
n_hidden_units, ])), 'out': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[ n_classes, ])) } #X==>[128,28,28] X = tf.reshape(x, [-1, n_input]) #x==>[128*28,28] X_in = tf.matmul(X, weights['in']) + biases['in'] #X_in ==>[128*28,128] X_in = tf.reshape(X_in, [-1, n_step, n_hidden_units]) #X_in ==>[128,28,128] cell = BasicLSTMCell(n_hidden_units) init_state = cell.zero_state(batch_size, dtype=tf.float32) outputs, final_state = tf.nn.dynamic_rnn(cell, X_in, initial_state=init_state) outputs = tf.unstack(tf.transpose(outputs, [1, 0, 2])) pred = tf.matmul(outputs[-1], weights['out']) + biases['out'] loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)) train_op = tf.train.AdamOptimizer(0.001).minimize(loss) correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32)) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) step = 0
def __init__(self, **kwargs): '''The following arguments are accepted: Parameters ---------- vocab_size : int Size of the vocabulary for creating embeddings embedding_matrix : int Dimensionality of the embedding space memory_size : int LSTM memory size keep_prob : float Inverse of dropout percentage for embedding and LSTM subsequence_length : int Length of the subsequences (all embeddings are padded to this length) optimizer : OptimizerSpec ''' ############################################################################################ # Get all hyperparameters # ############################################################################################ vocab_size = kwargs['vocab_size'] embedding_size = kwargs['embedding_size'] memory_size = kwargs['memory_size'] keep_prob = kwargs['keep_prob'] subsequence_length = kwargs['subsequence_length'] optimizer_spec = kwargs['optimizer'] optimizer = optimizer_spec.create() self.learning_rate = optimizer_spec.learning_rate self.step_counter = optimizer_spec.step_counter ############################################################################################ # Net inputs # ############################################################################################ self.batch_size = placeholder(tf.int32, shape=[], name='batch_size') self.is_training = placeholder(tf.bool, shape=[], name='is_training') self.word_ids = placeholder(tf.int32, shape=(None, subsequence_length), name='word_ids') self.labels = placeholder(tf.int32, shape=(None, ), name='labels') self.hidden_state = placeholder(tf.float32, shape=(None, memory_size), name='hidden_state') self.cell_state = placeholder(tf.float32, shape=(None, memory_size), name='cell_state') lengths = sequence_lengths(self.word_ids) ############################################################################################ # Embedding # ############################################################################################ self.embedding_matrix, _bias = get_weights_and_bias( (vocab_size, embedding_size)) embeddings = cond( self.is_training, lambda: nn.dropout(nn.embedding_lookup( self.embedding_matrix, self.word_ids), keep_prob=keep_prob), lambda: nn.embedding_lookup(self.embedding_matrix, self.word_ids)) ############################################################################################ # LSTM layer # ############################################################################################ cell = BasicLSTMCell(memory_size, activation=tf.nn.tanh) # during inference, use entire ensemble keep_prob = cond(self.is_training, lambda: constant(keep_prob), lambda: constant(1.0)) cell = DropoutWrapper(cell, output_keep_prob=keep_prob) # what's the difference to just creating a zero-filled tensor tuple? self.zero_state = cell.zero_state(self.batch_size, tf.float32) state = LSTMStateTuple(h=self.cell_state, c=self.hidden_state) # A dynamic rnn creates the graph on the fly, so it can deal with embeddings of different # lengths. We do not need to unstack the embedding tensor to get rows, instead we compute # the actual sequence lengths and pass that # We are not sure how any of this works. Do we need to mask the cost function so the cell # outputs for _NOT_A_WORD_ inputs are ignored? Is the final cell state really relevant if it # was last updated with _NOT_A_WORD_ input? Does static_rnn absolve us of any of those # issues? outputs, self.state = nn.dynamic_rnn(cell, embeddings, sequence_length=lengths, initial_state=state) # Recreate tensor from list outputs = reshape(concat(outputs, 1), [-1, subsequence_length * memory_size]) self.outputs = reduce_mean(outputs) ############################################################################################ # Fully connected layer, loss, and training # ############################################################################################ ff1 = fully_connected(outputs, 2, with_activation=False, use_bias=True) loss = reduce_mean( nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=ff1)) self.train_step = optimizer.minimize(loss, global_step=self.step_counter) self.predictions = nn.softmax(ff1) correct_prediction = equal(cast(argmax(self.predictions, 1), tf.int32), self.labels) self.accuracy = reduce_mean(cast(correct_prediction, tf.float32)) ############################################################################################ # Create summaraies # ############################################################################################ with tf.variable_scope('summary'): self.summary_loss = tf.summary.scalar('loss', loss) self.summary_accuracy = tf.summary.scalar('accuracy', self.accuracy)
def __init__(self, args, batch_size, mode='train'): """The standard __init__ function.""" logger = logging.getLogger(__name__) self.args = args self.config = config = self.args.config # Defining the epoch variables self.epoch = tf.Variable(0, trainable=False) self.epoch_incr = self.epoch.assign(self.epoch + 1) self.global_step = tf.Variable(0, trainable=False) # Used to update training schedule self.best_ppl = tf.Variable(10000.0, trainable=False, dtype=tf.float32) self.best_ppl_new = tf.placeholder(tf.float32, shape=()) self.best_ppl_assign = self.best_ppl.assign(self.best_ppl_new) self.margin_ppl = tf.Variable(10000.0, trainable=False, dtype=tf.float32) self.margin_ppl_new = tf.placeholder(tf.float32, shape=()) self.margin_ppl_assign = self.margin_ppl.assign(self.margin_ppl_new) self.last_ppl_update = tf.Variable(0, trainable=False) self.last_ppl_update_new = tf.placeholder(tf.int32, shape=()) self.last_ppl_update_assign = self.last_ppl_update.assign(self.last_ppl_update_new) # Defining the loss interpolation constant self.l1 = tf.Variable(1.0, trainable=False, dtype=tf.float32) self.l1_new = tf.placeholder(tf.float32, shape=()) self.l1_assign = self.l1.assign(self.l1_new) self.l2 = 1.0 - self.l1 self.input_data = tf.placeholder(tf.int32, [batch_size, config.timesteps]) self.targets = tf.placeholder(tf.int32, [batch_size, config.timesteps]) # Taking inputs, applying dropout, passing through embeddings self.embedding = embedding = tf.get_variable("embedding", [args.vocab_size, config.rnn_size]) inputs = tf.nn.embedding_lookup(embedding, self.input_data) if mode == 'train': inputs = tf.nn.dropout(inputs, keep_prob=config.input_keep_prob) # The whole BasicLSTMCell network cells = [] initial_states = [] for i in range(config.num_layers): cell = BasicLSTMCell( config.rnn_size, forget_bias=0.0, state_is_tuple=True, reuse=tf.get_variable_scope().reuse ) if mode == 'train': cell = DropoutWrapper( cell=cell, output_keep_prob=config.intra_keep_prob, state_keep_prob=config.state_keep_prob, variational_recurrent=True, dtype=tf.float32 ) cells.append(cell) initial_states.append(cell.zero_state(batch_size, tf.float32)) self.cells = tuple(cells) self.initial_states = tuple(initial_states) # The actual LSTM computation, `self.initial_state` will be fed later on final_states = [] outputs = [] for i in range(config.num_layers): with tf.variable_scope("layer%d" % i): inputs, final_state = tf.nn.dynamic_rnn( self.cells[i], inputs, initial_state=self.initial_states[i] ) outputs.append(inputs) final_states.append(final_state) self.final_states = tuple(final_states) # Skip connections to make training easier self.outputs = tf.add_n(outputs) with tf.variable_scope('logits'): # Layer of logits before softmax after RNN if config.shared_embeddings is True: self.softmax_w = softmax_w = tf.transpose(embedding, [1, 0]) else: self.softmax_w = softmax_w = tf.get_variable("softmax_w", [config.rnn_size, args.vocab_size]) self.softmax_b = softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) # The output dropout has been applied in the DropoutWrapper output = tf.reshape(self.outputs, [-1, config.rnn_size]) self.logits = tf.nn.xw_plus_b(output, softmax_w, softmax_b) # Store the actual probability values. # Used by evaluation function in some cases self.probs = tf.nn.softmax(self.logits) # Converting the distribution to a one hot vector self.distro1 = tf.reshape(tf.one_hot(self.targets, args.vocab_size), [-1, args.vocab_size]) # Finding 1-D cross entropy loss tensor self.loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.stop_gradient(self.distro1), logits=self.logits) # Scaling by interpolation values of L1 self.cost = tf.reduce_sum(self.loss) / batch_size self.final_cost = self.cost if mode == 'eval': return # Defining the learning rate variables self.lr = tf.Variable(config.lr, trainable=False) self.lr_decay = self.lr.assign(self.lr * config.lr_decay) # Standard tricks to train LSTMs tvars = tf.trainable_variables() for variable in tvars: logger.info("%s - %s", variable.name, str(variable.get_shape())) self.grads, _ = tf.clip_by_global_norm( tf.gradients(self.final_cost, tvars), config.grad_clip ) if config.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(self.lr) else: optimizer = tf.train.GradientDescentOptimizer(self.lr) self.train_op = optimizer.apply_gradients( zip(self.grads, tvars), global_step=self.global_step ) # Model savers self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) self.best_saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
class LanguageModel(object): built = False sen_comp_setup = False def __init__(self, dataset, lstm_hidden_size, pretrained=False, embedding_size=100, project_size=512, project=False, restore_from=None, model_dir=None, log_dir=None): """ Parameters ---------- dataset: Dataset, Dataset instance holding train, test, eval datasets lstm_hidden_size: int, Number of hidden units in the LSTM pretrained: bool, default False Whether to use pretrained embeddings project: bool, False Whether to project after using larger LSTM project_size: int, default 512 Final size to project to restore_from: str, default None Path to restore model from model_dir: str, default None Directory to save model to log_dir: str, default None Directory to write summaries to """ graph = tf.Graph() graph.seed = SEED self.dataset = dataset self.lstm_hidden_size = lstm_hidden_size self.embedding_size = embedding_size self.project = False if project: self.project_size = project_size self.session = tf.Session(graph=graph) self.len_corpus = len(dataset.vocab) self.time_steps = dataset.train.shape[1] -1 self.model_dir = model_dir with self.session.graph.as_default(): self._embeddings(pretrained=pretrained) self._compute_cross_entropy_loss() self._optimizer() self._sentence_completion_setup() self._savers(log_dir=log_dir) self._summaries() if restore_from is not None: self.saver.restore(self.session, restore_from) else: self.session.run(tf.global_variables_initializer()) def _savers(self, log_dir=None): """Creates saver and summary writer. Parameters ---------- log_dir: str, default None Directory to log results to """ self.summary_writer = tf.summary.FileWriter(log_dir) self.summary_writer.add_graph(self.session.graph) self.saver = tf.train.Saver(max_to_keep=1000) def _embeddings(self, pretrained=False, scope_name=None): """Compute word embeddings for sentence. Parameters ---------- pretrained: bool, default False Whether to use pretrained embeddings scope_name: str, default None Variable scope """ if not scope_name: scope_name = "Embedding" self.sentence_ph = tf.placeholder(dtype=tf.int32, shape=[None, self.time_steps + 1], name="Sentence_placeholder") with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE): self.embedding_matrix = tf.get_variable( name="embedding_matrix", shape=[self.len_corpus, self.embedding_size], initializer=xav_init() ) if pretrained: print("Loading pretrained embeddings...") load_embedding(session=self.session, vocab=self.dataset.word_to_idx, emb=self.embedding_matrix, path=self.dataset.embedding_file, vocab_size=self.len_corpus, dim_embedding=self.embedding_size) self.word_embeddings = tf.nn.embedding_lookup(self.embedding_matrix, self.sentence_ph) def _build_rnn(self, trainable_zero_state=False, scope_name=None): """Sets up the LSTM and its unrolling.""" if not scope_name: scope_name = "LSTM" with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE): self.lstm = BasicLSTMCell(num_units=self.lstm_hidden_size) batch_size = tf.shape(self.sentence_ph)[0] if not trainable_zero_state: state = self.lstm.zero_state(batch_size=batch_size, dtype=tf.float32) else: state = self._trainable_zero_state() if self.project: self._projection_layer() self._unroll_lstm(state=state) self._output_layer() self.built = True def _projection_layer(self, scope_name=None): """Creates the weight matrix for projection, when a larger LSTM is used.""" if scope_name is None: scope_name = "Projection" with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE): self.project_W = tf.get_variable(name="proj_weights", shape=[self.lstm_hidden_size, self.project_size], dtype=tf.float32, initializer=xav_init() ) def _unroll_lstm(self, state): """Unrolls the LSTM.""" outputs = list() for time_step in range(self.time_steps): out, state = self.lstm(self.word_embeddings[:, time_step, :], state) out = tf.reshape(out, [-1, 1, self.lstm_hidden_size]) outputs.append(out) self.output = tf.concat(outputs, axis=1) if self.project: self.output = tf.tensordot(self.output, self.project_W, axes=1) def _output_layer(self, scope_name=None): """Self explanatory.""" if scope_name is None: scope_name = "Output_layer" if self.project: shape = [self.project_size, self.len_corpus] else: shape = [self.lstm_hidden_size, self.len_corpus] with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE): self.output_layer = dict() self.output_layer['weights'] = tf.get_variable(name="weights", shape=shape, dtype=tf.float32, initializer=xav_init()) self.output_layer['bias'] = tf.get_variable(name='bias', shape=[self.len_corpus], dtype=tf.float32, initializer=xav_init() ) def _compute_cross_entropy_loss(self): """Computes the loss for the LSTM. Masks out <pad> tokens from final loss.""" if not self.built: print("Building the RNN Graph...") self._build_rnn() # Expected shape: 64 x 29 x 20000 logits = tf.tensordot(self.output, self.output_layer["weights"], axes=1) logits = tf.add(logits, self.output_layer["bias"]) #Expected shape: 64 x 29 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.sentence_ph[:,1:] ) # Include a mask that filters out the pad tokens from the loss computation. pad_index = self.dataset.word_to_idx["<pad>"] # Mask Tensor, with 0s whereever <pad> token is present self.not_pads = tf.not_equal(self.sentence_ph[:, 1:], 2) self.not_pads = tf.cast(self.not_pads, cross_entropy.dtype) self.cross_entropy_masked = tf.multiply(cross_entropy, self.not_pads) self.sentence_lengths = tf.reduce_sum(self.not_pads, axis=1) # Expected shape: (64, ) cross_entropy_batch = tf.reduce_sum(self.cross_entropy_masked, axis=1) self.batch_loss = cross_entropy_batch / self.sentence_lengths self.batch_perplexity = tf.exp(self.batch_loss) self.loss_avg = tf.reduce_mean(self.batch_loss) self.perplexity_avg = tf.reduce_mean(self.batch_perplexity) # Batch averaged perplexity def _summaries(self): """Creates summaries to log.""" # Train summaries self.train_loss_summary = tf.summary.scalar('train/batch_averaged_loss', self.loss_avg) self.train_perplexity_summary = tf.summary.scalar('train/batch_averaged_perplexity', self.perplexity_avg) train_summaries = [self.train_loss_summary, self.train_perplexity_summary] self.train_summaries = tf.summary.merge(train_summaries, name="train_summaries") # Test summaries self.eval_loss_ph = tf.placeholder(tf.float32) self.eval_perplexity_ph = tf.placeholder(tf.float32) self.eval_loss_summary = tf.summary.scalar('eval/averaged_loss', self.eval_loss_ph) self.eval_perplexity_summary = tf.summary.scalar('eval/averaged_perplexity', self.eval_perplexity_ph) eval_summaries = [self.eval_loss_summary, self.eval_perplexity_summary] self.eval_summaries = tf.summary.merge(eval_summaries, name="eval_summaries") def _optimizer(self): """Defines the optimizer.""" with tf.variable_scope("Optimizer", reuse=tf.AUTO_REUSE): self.optimizer = tf.train.AdamOptimizer() gradients, variables = zip(*self.optimizer.compute_gradients(self.loss_avg)) gradients, _ = tf.clip_by_global_norm(gradients, clip_norm=5.0) self.optimize_op = self.optimizer.apply_gradients(zip(gradients, variables)) def evaluate(self, batch_size=64, timestep=None, verbose=False): """Computes loss and perplexity on the eval dataset.""" losses, perplexities = [], [] fetches = [self.batch_loss, self.batch_perplexity] for batch in self.dataset.batch_generator(mode="eval", batch_size=batch_size): feed_dict = {self.sentence_ph: batch} batch_loss, batch_perplexity = self.session.run(fetches=fetches, feed_dict=feed_dict) losses.extend(batch_loss) perplexities.extend(batch_perplexity) mean_eval_loss = np.mean(losses) mean_eval_perplexity = np.mean(perplexities) fetches = self.eval_summaries feed_dict = {self.eval_loss_ph: mean_eval_loss, self.eval_perplexity_ph: mean_eval_perplexity} eval_summaries = self.session.run(fetches=fetches, feed_dict=feed_dict) self.summary_writer.add_summary(eval_summaries, timestep) if verbose: print("Evaluation Loss: {0:.3f}".format(mean_eval_loss)) print("Evaluation Perplexity: {0:.3f}".format(mean_eval_perplexity)) def fit(self, num_epochs=10, batch_size=64, eval_every=10, verbose=False): """Trains the LSTM.""" start_time = time.time() for epoch in range(num_epochs): model_dir_epoch = os.path.join(self.model_dir, str(epoch+1)) if not os.path.exists(model_dir_epoch): os.makedirs(model_dir_epoch) for n_batch, train_batch in enumerate(self.dataset.batch_generator(mode="train", batch_size=batch_size, shuffle=True)): fetches = [self.loss_avg, self.perplexity_avg, self.optimize_op, self.train_summaries] feed_dict = {self.sentence_ph: train_batch} timestep = self.dataset.train.shape[0]/batch_size * epoch + n_batch loss, perplexity, _, train_summaries = self.session.run(fetches=fetches, feed_dict=feed_dict) if (n_batch + 1) % eval_every == 0: self.summary_writer.add_summary(train_summaries, timestep) if verbose: print("Epoch {}, Batch: {}".format(epoch+1, n_batch+1)) print("Training loss: {0:.3f}".format(loss)) print("Training perplexity: {0:.3f}".format(perplexity)) print("Computing loss and perplexity on eval data. Epoch {}, Timestep: {}".format(epoch+1, timestep)) self.evaluate(timestep=timestep, verbose=verbose) print() model_savepath = os.path.join(model_dir_epoch, "model.ckpt") save_path = self.saver.save(sess=self.session, save_path=model_savepath) def _sentence_completion_setup(self): """Setup for the sentence completion task.""" self.state_c = tf.placeholder(tf.float32, [1, self.lstm_hidden_size]) self.state_h = tf.placeholder(tf.float32, [1, self.lstm_hidden_size]) self.word_ph = tf.placeholder(dtype=tf.int32, shape=[1], name="Word_placeholder") self.word_embedding = tf.nn.embedding_lookup(self.embedding_matrix, self.word_ph) state = tf.contrib.rnn.LSTMStateTuple(self.state_c, self.state_h) out, self.next_state = self.lstm(self.word_embedding, state) if(self.project): out = tf.matmul(out, self.projection["weights"]) logits_word = tf.matmul(out, self.output_layer["weights"]) logits_word = tf.add(logits_word, self.output_layer["bias"]) self.logits = tf.reshape(logits_word, [20000]) self.sen_comp_setup = True def complete_sentence(self, words, max_len=20): """Completes a sentence, given the initial words. Parameters ---------- words: list, List of starting words max_len: int, default 20 Maximum length of sentence if <eos> is not generated. """ words_copied = words.copy() words_copied.insert(0, "<bos>") sentence = list() state_c = np.zeros((1, self.lstm_hidden_size)) state_h = np.zeros((1, self.lstm_hidden_size)) word_predicted = None sentence_length = 0 unk_idx = self.dataset.word_to_idx["<unk>"] for word in words_copied: sentence.append(word) word_idx = self.dataset.word_to_idx.get(word, unk_idx) fetches = [self.next_state, self.logits] word_idx_array = np.array([word_idx]) feed_dict = {self.word_ph: word_idx_array, self.state_c: state_c, self.state_h: state_h} state, logits = self.session.run(fetches, feed_dict) state_c, state_h = (state.c, state.h) logits[0] = np.finfo(float).min logits[2:4] = np.finfo(float).min word_predicted = self.dataset.idx_to_word[np.argmax(logits)] sentence_length = len(sentence) - 1 while (sentence_length < max_len and word_predicted != "<eos>"): word = word_predicted word_idx = self.dataset.word_to_idx.get(word, unk_idx) fetches = [self.next_state, self.logits] word_idx_array = np.array([word_idx]) feed_dict = {self.word_ph: word_idx_array, self.state_c: state_c, self.state_h: state_h} state, logits = self.session.run(fetches, feed_dict) state_c, state_h = (state.c, state.h) # Decide next word logits[0] = np.finfo(float).min logits[2:4] = np.finfo(float).min word_predicted = self.dataset.idx_to_word[np.argmax(logits)] sentence.append(word_predicted) sentence_length += 1 sentence = " ".join(sentence[1:]) return sentence def complete_sentences(self, data_filename, sol_filename, max_len=20, log_every=100): """Completes the sentences in given file. Parameters ---------- data_filename: str, Filename containing the sentences to complete sol_filename: str, Filename to write the completed sentence to max_len: int, default 20 Maximum allowed length of sentence. """ if not self.sen_comp_setup: self._sentence_completion_setup() print("Starting to write sentences...") f1 = open(sol_filename, "w") f2 = open(data_filename, "r") num_lines = 0 for idx, sentence in enumerate(f2.readlines()): words = sentence.strip().split(" ") completed_sentence = self.complete_sentence(words, max_len=max_len) f1.write(completed_sentence + "\n") num_lines += 1 if num_lines % log_every == 0: print("Finished writing {} sentences.".format(num_lines)) f1.close() f2.close() print("Finished writing sentences.") def compute_perplexity(self, batch): """Wrapper function to compute batch perplexity, one for each sentence.""" fetches = self.perplexity_avg feed_dict = {self.sentence_ph: batch} return self.session.run(fetches, feed_dict) def save_perplexity_to_file(self, filename, log_every=100): """Saves perplexity computations to file. Parameters ---------- Filename: str, File to write perplexity values to """ print("Starting to save perplexity values...") with open(filename, "w") as f: num_lines = 0 for idx, test_sentence in enumerate(self.dataset.batch_generator(mode="test", batch_size=1, shuffle=False)): perplexity = self.compute_perplexity(test_sentence) f.write(str(perplexity) + "\n") num_lines += 1 if num_lines % log_every == 0: print("Finished calculating perplexity for {} sentences.".format(num_lines)) print("Finished writing perplexity values.")
input_Y = tf.placeholder(tf.float32, (None, 2, 1), 'input_Y') conv1_out = tf.layers.conv1d(input_X, 2, 13, activation=tf.nn.relu, name='conv1') conv2_out = tf.layers.conv1d(conv1_out, 2, 13, activation=tf.nn.relu, name='conv2') pooling_out = tf.layers.average_pooling1d(conv2_out, 2, 2, name='pooling') conv3_out = tf.layers.conv1d(pooling_out, 4, 5, activation=tf.nn.relu, name='conv3') conv4_out = tf.layers.conv1d(conv3_out, 4, 5, activation=tf.nn.relu, name='conv4') pooling1_out = tf.layers.average_pooling1d(conv4_out, 2, 2, name='pooling1') resort_out = tf.transpose(pooling1_out, [1, 0, 2], name='resort') lstm_layer = BasicLSTMCell(1) state = lstm_layer.zero_state(batch_size, tf.float32) out = [] for i in range(2): output, state = lstm_layer(resort_out[i], state) out.append(output) out_gather = [conv4_out, pooling1_out, resort_out, out] init_op = tf.global_variables_initializer() sess = tf.Session(graph=df_graph) sess.run(init_op) train_writer = tf.summary.FileWriter('./cnn_lstm', sess.graph, flush_secs=5) out_run = sess.run(out_gather, feed_dict={input_X: X, input_Y: Y, batch_size: X.shape[0]}) [print(np.array(x).shape) for x in out_run]
class LstmAgent(AbstractAgent): @property def seq_len(self): if self._seq_len: return self._seq_len return 8 def __init__(self, batch_size: int, layer_size: int, device_num: int, **kwargs): self.batch_size = batch_size self.layer_size = layer_size with tf.device('/gpu:' + str(device_num)): state_args = tf.float32, [batch_size, layer_size] self.S = LSTMStateTuple(c=tf.placeholder(*state_args, name='C'), h=tf.placeholder(*state_args, name='H')) self.lstm = BasicLSTMCell(layer_size) super().__init__(batch_size=batch_size, layer_size=layer_size, device_num=device_num, **kwargs) self.initial_state = self.sess.run( self.lstm.zero_state(batch_size, tf.float32)) assert np.shape(self.initial_state) == (2, batch_size, layer_size) assert self.S.c.shape == self.S.h.shape == (batch_size, layer_size) def network(self, inputs: tf.Tensor, reuse=False) -> tf.Tensor: split_inputs = tf.split(inputs, self.seq_len, axis=1) s = self.S for x in split_inputs: x = tf.squeeze(x, axis=1) outputs = NetworkOutput(*self.lstm(x, s)) return outputs def state_feed(self, states): return dict(zip(self.S, states)) def train_step(self, step: Step) -> dict: assert np.shape(step.s) == np.shape(self.initial_state) if feed_dict is None: feed_dict = { **self.state_feed(step.s), **{ self.O1: step.o1, self.A: step.a, self.R: np.array(step.r) * self.reward_scale, self.O2: step.o2, self.T: step.t } } return super().train_step(step) def q_network(self, o: tf.Tensor, a: tf.Tensor, name: str, reuse: bool = None) \ -> tf.Tensor: with tf.variable_scope(name, reuse=reuse): o = self.network(o).output oa = tf.concat([o, a], axis=1) return tf.reshape(tf.layers.dense(oa, 1, name='q'), [-1]) def get_actions(self, o: ArrayLike, sample: bool = True, state=None) \ -> Tuple[np.ndarray, LSTMStateTuple]: assert len(np.shape(o)) == 1 assert np.shape(state) == np.shape(self.initial_state) feed_dict = {**{self.O1: [[o]]}, **self.state_feed(state)} A = self.A_sampled1 if sample else self.A_max_likelihood return self.sess.run([A[0], self.S_new], feed_dict)
def build_graph(self): self.logger.info("start building graph") english_input = tf.placeholder(tf.int32, [self.batch_size, self.topic_num], name="english_input") chinese_input = tf.placeholder(tf.int32, [self.batch_size, self.topic_num], name="chinese_input") Y = tf.placeholder(tf.float32, [self.batch_size], name="scores") # embedding layer with tf.variable_scope("embdding"): en_embeddings = [] zh_embeddings = [] for i in range(self.topic_num): english_ids = tf.slice(english_input, [0, i], [self.batch_size, 1]) chinese_ids = tf.slice(chinese_input, [0, i], [self.batch_size, 1]) embedding_en = tf.Variable( tf.random_normal([self.fea_dim, self.hidden_size]), name="en_topic_%d_embedding" % (i + 1), dtype=tf.float32) embedding_zh = tf.Variable( tf.random_normal([self.fea_dim, self.hidden_size]), name="zh_topic_%d_embedding" % (i + 1), dtype=tf.float32) en_embeddings.append( tf.nn.embedding_lookup(embedding_en, english_ids)) zh_embeddings.append( tf.nn.embedding_lookup(embedding_zh, chinese_ids)) english_embedding = tf.concat(en_embeddings, 1) chinese_embedding = tf.concat(zh_embeddings, 1) english_embedding = tf.reshape( english_embedding, [self.batch_size, self.topic_num, self.hidden_size]) chinese_embedding = tf.reshape( chinese_embedding, [self.batch_size, self.topic_num, self.hidden_size]) # lstm layer two_lstm_outputs = [] for i in range(2): with tf.variable_scope("lstm-%s" % chr(ord('a') + i)): if i == 0: X = english_embedding else: X = chinese_embedding cell = BasicLSTMCell(num_units=self.hidden_size) initial_state = cell.zero_state(self.batch_size, tf.float32) outputs, _states = tf.nn.dynamic_rnn( cell, X, initial_state=initial_state, dtype=tf.float32) outputs = tf.slice(outputs, [0, self.topic_num - 1, 0], [self.batch_size, 1, self.hidden_size]) two_lstm_outputs.append( tf.reshape(outputs, [-1, self.hidden_size])) # concat and reshape output # concat_outputs = tf.concat(two_lstm_outputs, 1) # concat_outputs = tf.reshape(concat_outputs, [-1, 2*self.hidden_size]) # full connected layer # w = tf.Variable(tf.random_normal([2*self.hidden_size, 1]), name="weight", dtype=tf.float32) # b = tf.Variable(tf.constant(1.0), name="bias", dtype=tf.float32) # y = tf.matmul(concat_outputs, w) + b # y = tf.exp(-tf.nn.relu(y)) # get lstm_a output and lstm_b output lstm_a_output = two_lstm_outputs[0] lstm_b_output = two_lstm_outputs[1] # cosine similariy numerator = tf.reduce_sum(lstm_a_output * lstm_b_output, 1) denominator = tf.sqrt(tf.reduce_sum( tf.square(lstm_a_output), 1)) * tf.sqrt( tf.reduce_sum(tf.square(lstm_b_output), 1)) y = 1 - (tf.acos((numerator / denominator)) / tf.constant(3.141592653)) # Euclidean distance # y = tf.exp(-tf.sqrt(tf.reduce_sum(tf.square(lstm_a_output - lstm_b_output), 1))) # reshape y y = tf.reshape(y, [self.batch_size]) self.global_step = tf.Variable(0, trainable=False) self.learning_rate = tf.train.exponential_decay(0.1, self.global_step, 10, 2, staircase=False) self.loss_op = tf.reduce_mean(tf.square(y - Y)) self.train_op = tf.train.AdamOptimizer(learning_rate=0.1).minimize( self.loss_op, global_step=self.global_step) tf.summary.scalar("loss", self.loss_op) tf.summary.histogram("prediction", y) tf.summary.histogram("labels", Y) self.prediction = y self.init = tf.global_variables_initializer() # 导出图 # print("exporting meta graph......") # tf.train.export_meta_graph(filename=self.model_path+"model.ckpt.meta") self.logger.info("Done! building graph")