class LSTMLM_Model(LanguageModel): def load_own_data(self, filename, filename2, filename3, debug=False, encoding='utf-8'): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_datafile(filename)) # self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array([ self.vocab.encode(word) for word in get_datafile(filename, encoding=encoding) ], dtype=np.int32) self.encoded_valid = np.array([ self.vocab.encode(word) for word in get_datafile(filename2, encoding=encoding) ], dtype=np.int32) self.encoded_test = np.array([ self.vocab.encode(word) for word in get_datafile(filename3, encoding=encoding) ], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): self.input_placeholder = tf.placeholder(tf.int32, [None, self.config.num_steps], name='Input') self.labels_placeholder = tf.placeholder(tf.int32, [None, self.config.num_steps], name='Target') self.dropout_placeholder = tf.placeholder(tf.float32, name='Dropout') self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") def add_projection(self, rnn_outputs): with tf.variable_scope('Projection'): U = tf.get_variable('Matrix', [self.config.hidden_size, len(self.vocab)]) proj_b = tf.get_variable('Bias', [len(self.vocab)]) outputs = [tf.matmul(o, U) + proj_b for o in rnn_outputs] # END YOUR CODE return outputs def add_embedding(self): with tf.device('/cpu:0'): embedding = tf.get_variable( 'Embedding', [len(self.vocab), self.config.embed_size], trainable=True) inputs = tf.nn.embedding_lookup(embedding, self.input_placeholder) # inputs = [ # tf.squeeze(x, [1]) for x in tf.split(inputs, self.config.num_steps, 1)] return inputs def add_projection(self, lstm_output): with tf.variable_scope('Projection'): size = self.config.hidden_size vocab_size = self.vocab.__len__() softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=data_type()) softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type()) logits = tf.nn.xw_plus_b(lstm_output, softmax_w, softmax_b) # Reshape logits to be a 3-D tensor for sequence loss logits = tf.reshape( logits, [self.config.batch_size, self.config.num_steps, vocab_size]) return logits def add_loss_op(self, output): # Use the contrib sequence loss and average over the batches # all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] # cross_entropy = sequence_loss( # output, [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab)) # [tf.reshape(self.labels_placeholder, [-1])], # cost = tf.reduce_sum(cross_entropy) loss_1 = tf.contrib.seq2seq.sequence_loss( output, self.labels_placeholder, tf.ones([self.config.batch_size, self.config.num_steps], dtype=data_type()), average_across_timesteps=False, average_across_batch=True) self.cost = tf.reduce_sum(loss_1) tf.add_to_collection('total_loss', self.cost) loss = tf.add_n(tf.get_collection('total_loss')) # END YOUR CODE return loss def assign_lr(self, session, lr_value): session.run(self._lr_update, feed_dict={self._new_lr: lr_value}) def add_training_op(self): self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self._lr) train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.train.get_or_create_global_step()) self._lr_update = tf.assign(self._lr, self._new_lr) # optimizer = tf.train.AdamOptimizer(self.config.lr) # train_op = optimizer.minimize(self.calculate_loss) return train_op def _get_lstm_cell(self, is_training): return tf.contrib.rnn.BasicLSTMCell(self.config.hidden_size, forget_bias=0.0, state_is_tuple=True, reuse=not is_training) def add_model(self, inputs, is_training): ''' Create the LSTM model ''' print(inputs.shape) with tf.variable_scope('InputDropout'): if is_training and self.config.dropout < 1: inputs = tf.nn.dropout(inputs, self.config.dropout) with tf.variable_scope('LSTMMODEL') as scope: def make_cell(): cell = self._get_lstm_cell(is_training) if is_training and self.config.dropout < 1: cell = tf.contrib.rnn.DropoutWrapper( cell, output_keep_prob=self.config.dropout) return cell cell = tf.contrib.rnn.MultiRNNCell( [make_cell() for _ in range(self.config.num_layers)], state_is_tuple=True) self.initial_state = cell.zero_state(self.config.batch_size, data_type()) state = self.initial_state # inputs = tf.unstack(inputs, num=self.config.num_steps, axis=1) # outputs, state = tf.nn.static_rnn( # cell, inputs, initial_state=self.initial_state) outputs = [] with tf.variable_scope("RNNV"): for time_step in range(self.config.num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.concat(outputs, 1), [-1, self.config.hidden_size]) # return output, state # outputs, states = tf.nn.dynamic_rnn( # cell, inputs, dtype=tf.float32) self.final_state = state return output def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) # total_loss = [] # state = self.initial_state.eval() costs = 0.0 iters = 0 for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history #self.initial_state: state, feed = { self.input_placeholder: x, self.labels_placeholder: y, self.dropout_placeholder: dp } loss, state, cost, _ = session.run( [self.calculate_loss, self.final_state, self.cost, train_op], feed_dict=feed) # total_loss.append(loss) costs += cost iters += self.config.num_steps if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(costs / iters))) # sys.stdout.write('\r{} / {} : pp = {}'.format( # step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') # return np.exp(np.mean(total_loss)) return np.exp(costs / iters) def assign_lr(self, session, lr_value): session.run(self._lr_update, feed_dict={self._new_lr: lr_value}) def __init__(self, config): self.config = config dirname = "./data/" self.load_own_data(filename=dirname + "train_data", filename2=dirname + "dev_data", filename3=dirname + "test_data", debug=False, encoding='Latin-1') self.add_placeholders() # self._lr = tf.Variable(0.0, trainable=False) # self._lr_update = tf.assign(self._lr, self._new_lr) self.inputs = self.add_embedding() self.lstm_outputs = self.add_model(self.inputs, self.config.is_training) self.outputs = self.add_projection(self.lstm_outputs) vocab_size = self.vocab.__len__() logits2 = tf.reshape( self.outputs, [self.config.batch_size * self.config.num_steps, vocab_size]) local_pred = tf.nn.softmax(tf.cast(logits2, tf.float64)) local_pred2 = tf.reshape( local_pred, [self.config.batch_size, self.config.num_steps, vocab_size]) self.predictions = tf.transpose(local_pred2, [1, 0, 2]) self.calculate_loss = self.add_loss_op(self.outputs) self.train_step = self.add_training_op()