def __init__(self, args): #self.cell = args.cell self.cell = GRUCell(Config.embed_size, Config.hidden_size) if "output_path" in args: # Where to save things. self.output_path = args.output_path else: self.output_path = "results/{}/{:%Y%m%d_%H%M%S}/".format( "RNN", datetime.now()) self.model_output = self.output_path + "model.weights" self.eval_output = self.output_path + "results.txt" #self.conll_output = self.output_path + "{}_predictions.conll".format(self.cell) self.log_output = self.output_path + "log"
def add_prediction_op(self): x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! if Config.cell_type == "rnn": cell = RNNCell(Config.embed_size, Config.hidden_size) elif Config.cell_type == "gru": cell = GRUCell(Config.embed_size, Config.hidden_size) else: assert False, "Cell type undefined" self.U = tf.get_variable( 'U', [Config.hidden_size, Config.n_classes], initializer=tf.contrib.layers.xavier_initializer()) self.b2 = tf.get_variable( 'b2', [ Config.n_classes, ], initializer=tf.contrib.layers.xavier_initializer()) h = tf.zeros([tf.shape(x)[0], Config.hidden_size]) with tf.variable_scope("RNN"): for time_step in range(Config.max_length): if time_step >= 1: tf.get_variable_scope().reuse_variables() o, h = cell(x[:, time_step, :], h) o_drop = tf.nn.dropout(o, dropout_rate) preds.append(tf.matmul(o_drop, self.U) + self.b2) self.raw_preds = tf.stack(preds) preds = tf.reshape(tf.transpose(self.raw_preds, [1, 0, 2]), [-1, Config.max_length, Config.n_classes]) return preds
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() if Config.cell_type == "lstm": print "lstm" cell_state = tf.zeros([tf.shape(x)[0], Config.hidden_size]) hidden_state = tf.zeros([tf.shape(x)[0], Config.hidden_size]) init_state = tf.nn.rnn_cell.LSTMStateTuple(cell_state, hidden_state) cell = tf.nn.rnn_cell.BasicLSTMCell(Config.hidden_size, state_is_tuple=True) inputs_series = tf.split(1, Config.max_length, x) outputs, current_state = tf.nn.rnn(cell, inputs_series, init_state) self.U = tf.get_variable( 'U', [Config.hidden_size, Config.n_classes], initializer=tf.contrib.layers.xavier_initializer()) self.b2 = tf.get_variable( 'b2', [ Config.n_classes, ], initializer=tf.contrib.layers.xavier_initializer()) h = tf.zeros([tf.shape(x)[0], Config.hidden_size]) preds = [tf.matmul(o, self.U) + self.b2 for o in outputs] preds = tf.pack(preds) preds = tf.reshape(preds, [-1, Config.max_length, Config.n_classes]) return preds else: dropout_rate = self.dropout_placeholder # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! #cell = RNNCell(Config.embed_size, Config.hidden_size) #cell = GRUCell(Config.embed_size, Config.hidden_size) preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! if Config.cell_type == "rnn": cell = RNNCell(Config.embed_size, Config.hidden_size) elif Config.cell_type == "gru": cell = GRUCell(Config.embed_size, Config.hidden_size) else: assert False, "Cell type undefined" # Define U and b2 as variables. # Initialize state as vector of zeros. self.U = tf.get_variable( 'U', [Config.hidden_size, Config.n_classes], initializer=tf.contrib.layers.xavier_initializer()) self.b2 = tf.get_variable( 'b2', [ Config.n_classes, ], initializer=tf.contrib.layers.xavier_initializer()) h = tf.zeros([tf.shape(x)[0], Config.hidden_size]) with tf.variable_scope("RNN"): for time_step in range(Config.max_length): if time_step >= 1: tf.get_variable_scope().reuse_variables() o, h = cell(x[:, time_step, :], h) o_drop = tf.nn.dropout(o, dropout_rate) preds.append(tf.matmul(o_drop, self.U) + self.b2) # Make sure to reshape @preds here. preds = tf.pack(preds) preds = tf.reshape(preds, [-1, Config.max_length, Config.n_classes]) return preds
def add_prediction_op(self): x = self.add_embedding() #x = self.input_placeholder if Config.cell_type == "lstm": print "lstm" cell_state = tf.zeros([tf.shape(x)[0], Config.hidden_size]) hidden_state = tf.zeros([tf.shape(x)[0], Config.hidden_size]) init_state = tf.nn.rnn_cell.LSTMStateTuple(cell_state, hidden_state) cell = tf.nn.rnn_cell.BasicLSTMCell(Config.hidden_size, state_is_tuple=True) inputs_series = tf.split(x, Config.max_length, 1) inputs_series = [ tf.reshape(one_input, [-1, Config.embed_size]) for one_input in inputs_series ] outputs, current_state = tf.nn.static_rnn(cell, inputs_series, init_state) self.U = tf.get_variable( 'U', [Config.hidden_size, Config.n_classes], initializer=tf.contrib.layers.xavier_initializer()) self.b2 = tf.get_variable( 'b2', [ Config.n_classes, ], initializer=tf.contrib.layers.xavier_initializer()) h = tf.zeros([tf.shape(x)[0], Config.hidden_size]) preds = [tf.matmul(o, self.U) + self.b2 for o in outputs] preds = tf.stack(preds) preds = tf.reshape(tf.transpose(preds, [1, 0, 2]), [-1, Config.max_length, Config.n_classes]) return preds else: dropout_rate = self.dropout_placeholder self.raw_preds = [ ] # Predicted output at each timestep should go here! if Config.cell_type == "rnn": cell = RNNCell(Config.embed_size, Config.hidden_size) elif Config.cell_type == "gru": cell = GRUCell(Config.embed_size, Config.hidden_size) else: assert False, "Cell type undefined" self.U = tf.get_variable( 'U', [Config.hidden_size, Config.n_classes], initializer=tf.contrib.layers.xavier_initializer()) self.b2 = tf.get_variable( 'b2', [ Config.n_classes, ], initializer=tf.contrib.layers.xavier_initializer()) h = tf.zeros([tf.shape(x)[0], Config.hidden_size]) with tf.variable_scope("RNN"): for time_step in range(config.max_length): if time_step >= 1: tf.get_variable_scope().reuse_variables() o, h = cell(x[:, time_step, :], h) o_drop = tf.nn.dropout(o, dropout_rate) self.raw_preds.append(tf.matmul(o_drop, self.U) + self.b2) preds = tf.stack(self.raw_preds) preds = tf.reshape(tf.transpose(preds, [1, 0, 2]), [-1, Config.max_length, Config.n_classes]) return preds