def __init__(self, num_symbols, num_embed_units, num_units, num_layers, num_labels, embed, learning_rate=0.5, max_gradient_norm=5.0, model='LSTM'): #todo: implement placeholders self.texts = tf.placeholder(dtype=tf.string, shape=[None, None]) # shape: batch*len self.texts_length = tf.placeholder(dtype=tf.int32, shape=None) # shape: batch self.labels = tf.placeholder(dtype=tf.int64, shape=None) # shape: batch self.keep_prob = tf.placeholder(dtype=tf.float32) self.symbol2index = MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=UNK_ID, shared_name="in_table", name="in_table", checkpoint=True) # build the vocab table (string to index) # initialize the training process self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) self.global_step = tf.Variable(0, trainable=False) self.epoch = tf.Variable(0, trainable=False) self.epoch_add_op = self.epoch.assign(self.epoch + 1) self.index_input = self.symbol2index.lookup(self.texts) # batch*len # build the embedding table (index to vector) if embed is None: # initialize the embedding randomly self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32) else: # initialize the embedding by pre-trained word vectors self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed) self.embed_input = tf.nn.embedding_lookup( self.embed, self.index_input) #batch*len*embed_unit #todo: implement unfinished networks if num_layers == 1: if model == 'LSTM': cell = BasicLSTMCell(num_units) elif model == 'RNN': cell = BasicRNNCell(num_units) elif model == 'GRU': cell = GRUCell(num_units) else: print("Wrong model!") return cell_dr = tf.nn.rnn_cell.DropoutWrapper( cell, input_keep_prob=1.0, output_keep_prob=self.keep_prob) outputs, states = dynamic_rnn(cell_dr, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn") if model == 'LSTM': h_state = states[0] else: h_state = states else: if model == 'LSTM': cell = BasicLSTMCell(num_units) elif model == 'RNN': cell = BasicRNNCell(num_units) elif model == 'GRU': cell = GRUCell(num_units) else: print("Wrong model!") return cell_dr = tf.nn.rnn_cell.DropoutWrapper( cell, input_keep_prob=1.0, output_keep_prob=self.keep_prob) multi_cell = tf.contrib.rnn.MultiRNNCell([cell_dr] * num_layers, state_is_tuple=True) init_state = multi_cell.zero_state(16, tf.float32) outputs, state = tf.nn.dynamic_rnn(multi_cell, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn", initial_state=init_state, time_major=False) h_state = outputs[:, -1, :] logits = tf.layers.dense(h_state, num_labels) self.loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss') mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32) predict_labels = tf.argmax(logits, 1, 'predict_labels') self.accuracy = tf.reduce_sum(tf.cast( tf.equal(self.labels, predict_labels), tf.int32), name='accuracy') self.params = tf.trainable_variables() # calculate the gradient of parameters opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(mean_loss, self.params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step) tf.summary.scalar('loss/step', self.loss) for each in tf.trainable_variables(): tf.summary.histogram(each.name, each) self.merged_summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
def __init__(self, num_symbols, num_embed_units, num_units, num_layers, num_labels, embed, learning_rate=0.5, max_gradient_norm=5.0): #todo: implement placeholders self.texts = tf.placeholder(tf.string, [None, None], name="texts") # shape: batch*len self.texts_length = tf.placeholder(tf.int64, [None], name="texts_length") # shape: batch self.labels = tf.placeholder(tf.int64, [None], name="labels") # shape: batch self.symbol2index = MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=UNK_ID, shared_name="in_table", name="in_table", checkpoint=True) # build the vocab table (string to index) # initialize the training process self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) learning_rate_decay_factor = 0.9 self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.epoch = tf.Variable(0, trainable=False) self.epoch_add_op = self.epoch.assign(self.epoch + 1) self.index_input = self.symbol2index.lookup(self.texts) # batch*len # build the embedding table (index to vector) if embed is None: # initialize the embedding randomly self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32) else: # initialize the embedding by pre-trained word vectors self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed) self.embed_input = tf.nn.embedding_lookup( self.embed, self.index_input) #batch*len*embed_unit model = 'lstm' if num_layers == 1: if (model == 'rnn'): cell = BasicRNNCell(num_units) elif (model == 'gru'): cell = GRUCell(num_units) elif (model == 'lstm'): cell = BasicLSTMCell(num_units) cell_do = tf.nn.rnn_cell.DropoutWrapper( cell, input_keep_prob=1.0, output_keep_prob=FLAGS.keep_prob) outputs, states = dynamic_rnn(cell_do, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn") #todo: implement unfinished networks outputs_flat = tf.reduce_mean(outputs, 1) if (model == 'lstm'): states = states[0] # W_f = weight_variable([tf.app.flags.FLAGS.units, 5]) # b_f = bias_variable([5]) # logits = tf.matmul(outputs_flat, W_f) + b_f # fc_layer = tf.layers.dense(inputs = states, units = 32, activation = tf.nn.relu) logits = tf.layers.dense(inputs=states, units=5, activation=None) else: self.reverse_texts = tf.placeholder( tf.string, [None, None], name="reverse_texts") # shape: batch*len self.index_reverse_input = self.symbol2index.lookup( self.reverse_texts) self.embed_reverse_input = tf.nn.embedding_lookup( self.embed, self.index_reverse_input) #batch*len*embed_unit if (model == 'rnn'): cell1 = BasicRNNCell(num_units) cell2 = BasicRNNCell(num_units) elif (model == 'gru'): cell1 = GRUCell(num_units) cell2 = GRUCell(num_units) elif (model == 'lstm'): cell1 = BasicLSTMCell(num_units) cell2 = BasicLSTMCell(num_units) cell1_do = tf.nn.rnn_cell.DropoutWrapper( cell1, input_keep_prob=1.0, output_keep_prob=FLAGS.keep_prob) cell2_do = tf.nn.rnn_cell.DropoutWrapper( cell2, input_keep_prob=1.0, output_keep_prob=FLAGS.keep_prob) outputs1, states1 = dynamic_rnn(cell1_do, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn") outputs2, states2 = dynamic_rnn(cell2_do, self.embed_reverse_input, self.texts_length, dtype=tf.float32, scope="rnn") if (model == 'lstm'): states = states1[0] + states2[0] else: states = states1 + states2 # fc_layer = tf.layers.dense(inputs = states, units = 32, activation = tf.nn.relu) logits = tf.layers.dense(inputs=states, units=5, activation=None) self.loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss') mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32) predict_labels = tf.argmax(logits, 1, 'predict_labels') self.accuracy = tf.reduce_sum(tf.cast( tf.equal(self.labels, predict_labels), tf.int32), name='accuracy') self.params = tf.trainable_variables() # calculate the gradient of parameters opt = tf.train.GradientDescentOptimizer(self.learning_rate) # opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(mean_loss, self.params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step) tf.summary.scalar('loss/step', self.loss) for each in tf.trainable_variables(): tf.summary.histogram(each.name, each) self.merged_summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
def __init__(self, num_symbols, num_embed_units, num_units, num_layers, num_labels, embed, learning_rate=0.001, max_gradient_norm=5.0, learning_rate_decay_factor=0.1): #todo: implement placeholders # PROBLEMS REMAIN self.texts = tf.placeholder(dtype=tf.string, shape=[None, None]) # shape: batch*len self.texts_length = tf.placeholder(dtype=tf.int32, shape=[None]) # shape: batch self.labels = tf.placeholder(dtype=tf.int32, shape=[None]) # shape: batch self.output_keep_prob = tf.placeholder(dtype=tf.float32, shape=[]) self.symbol2index = MutableHashTable(key_dtype=tf.string, value_dtype=tf.int64, default_value=UNK_ID, shared_name="in_table", name="in_table", checkpoint=True) # build the vocab table (string to index) # initialize the training process self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) self.learning_rate_update_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.epoch = tf.Variable(0, trainable=False) self.epoch_add_op = self.epoch.assign(self.epoch + 1) self.index_input = self.symbol2index.lookup(self.texts) # batch*len # build the embedding table (index to vector) if embed is None: # initialize the embedding randomly self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32) else: # initialize the embedding by pre-trained word vectors self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed) self.embed_input = tf.nn.embedding_lookup( self.embed, self.index_input) #batch*len*embed_unit if num_layers == 1: # basic rnn # cell = BasicRNNCell(num_units) # outputs, states = dynamic_rnn(cell, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn") # gru # cell = GRUCell(num_units) # outputs, states = dynamic_rnn(cell, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn") # lstm # cell = BasicLSTMCell(num_units) # outputs, states = dynamic_rnn(cell, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn") # states = states[1] # final model cell = tf.nn.rnn_cell.DropoutWrapper( BasicLSTMCell(num_units), output_keep_prob=self.output_keep_prob) cell_bw = tf.nn.rnn_cell.DropoutWrapper( BasicLSTMCell(num_units), output_keep_prob=self.output_keep_prob) outputs, states = bidirectional_dynamic_rnn(cell, cell_bw, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn") states = states[0][1] + states[1][1] else: cells = [] cells_bw = [] for _ in range(num_layers): cell = tf.nn.rnn_cell.DropoutWrapper( GRUCell(num_units), output_keep_prob=output_keep_prob) cells.append(cell) cell_bw = tf.nn.rnn_cell.DropoutWrapper( GRUCell(num_units), output_keep_prob=output_keep_prob) cells_bw.append(cell_bw) cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True) cell_bw = tf.contrib.rnn.MultiRNNCell(cells_bw, state_is_tuple=True) outputs, states = bidirectional_dynamic_rnn(cell, cell_bw, self.embed_input, self.texts_length, dtype=tf.float32, scope="stacked_rnn") states = states[0][num_layers - 1] + states[1][num_layers - 1] #todo: implement unfinished networks self.w1 = tf.Variable( tf.random_normal(shape=[num_units, num_labels], stddev=tf.sqrt(2.0 / (num_units + num_labels)))) self.b1 = tf.Variable(tf.constant(0.0, shape=[num_labels])) logits = tf.matmul(states, self.w1) + self.b1 self.loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss') mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32) self.predict_labels = tf.argmax(logits, 1, 'predict_labels', output_type=tf.int32) self.accuracy = tf.reduce_sum(tf.cast( tf.equal(self.labels, self.predict_labels), tf.int32), name='accuracy') self.params = tf.trainable_variables() # calculate the gradient of parameters opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(mean_loss, self.params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm( gradients, max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step) tf.summary.scalar('loss/step', self.loss) for each in tf.trainable_variables(): tf.summary.histogram(each.name, each) self.merged_summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
def __init__(self, num_symbols, num_embed_units, num_units, num_layers, num_labels, embed, learning_rate=0.5, max_gradient_norm=5.0, keep_prob=1., weight_decay=1e-10, RNN_type="BasicRNN"): #todo: implement placeholders self.texts = tf.placeholder(dtype = tf.string, shape = [None, None]) self.texts_length = tf.placeholder(dtype = tf.int32, shape = [None]) self.labels = tf.placeholder(dtype = tf.int64, shape = [None]) ''' self.texts = tf.placeholder() # shape: batch*len self.texts_length = tf.placeholder() # shape: batch self.labels = tf.placeholder() # shape: batch ''' self.symbol2index = MutableHashTable( key_dtype=tf.string, value_dtype=tf.int64, default_value=UNK_ID, shared_name="in_table", name="in_table", checkpoint=True) # build the vocab table (string to index) # initialize the training process self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) self.weight_decay = tf.Variable(float(weight_decay), trainable=False, dtype=tf.float32) self.keep_prob = tf.Variable(float(keep_prob), trainable=False, dtype=tf.float32) self.global_step = tf.Variable(0, trainable=False) self.epoch = tf.Variable(0, trainable=False) self.epoch_add_op = self.epoch.assign(self.epoch + 1) self.index_input = self.symbol2index.lookup(self.texts) # batch*len # build the embedding table (index to vector) if embed is None: # initialize the embedding randomly self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32) else: # initialize the embedding by pre-trained word vectors self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed) self.embed_input = tf.nn.embedding_lookup(self.embed, self.index_input) #batch*len*embed_unit # bi-LSTM with tf.variable_scope("foward_cell"): #fw_cell = tf.contrib.rnn.GRUCell(num_units) if RNN_type == "LSTM": fw_cell = BasicLSTMCell(num_units) else: fw_cell = GRUCell(num_units) ''' fw_cell = tf.contrib.rnn.GRUCell(num_units) fw_cell = tf.contrib.rnn.GRUCell(num_units) ''' with tf.variable_scope("barkward_cell"): #bw_cell = tf.contrib.rnn.GRUCell(num_units) if RNN_type == "LSTM": bw_cell = BasicLSTMCell(num_units) else: bw_cell = GRUCell(num_units) ''' if num_layers == 1: if RNN_type == "BasicRNN": cell = BasicRNNCell(num_units) # cell = tf.contrib.rnn.BasicRNNCell(num_units) elif RNN_type == "GRU": cell = GRUCell(num_units) elif RNN_type == "LSTM": cell = BasicLSTMCell(num_units) outputs, states = dynamic_rnn(cell, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn") ''' outputs, states = tf.nn.bidirectional_dynamic_rnn(fw_cell, bw_cell, self.embed_input, self.texts_length, dtype = tf.float32, scope = "bi_lstm") #print "***state: ", states #self.y0 = tf.reduce_max(outputs, axis = 1) #self.y0 = tf.reduce_max(outputs[0] + outputs[1], axis = 1) #self.y0 = tf.reduce_sum(states, axis = 0) self.y0 = states[0][1] + states[1][1] #print "****** y0:", self.y0 self.y0_dp = tf.nn.dropout(self.y0, keep_prob = self.keep_prob) self.y1 = tf.layers.dense(inputs = self.y0_dp, units = 128, activation = tf.nn.sigmoid) self.y2 = tf.layers.dense(inputs = self.y0_dp, units = num_labels) logits = self.y2 ''' self.W1 = tf.Variable(tf.truncated_normal(stddev = .1, shape = [num_units, 128])) self.b1 = tf.Variable(tf.constant(.1, shape = [128])) self.u1 = tf.matmul(self.y0_dp, self.W1) + self.b1 self.y1 = tf.nn.sigmoid(self.u1) self.W2 = tf.Variable(tf.truncated_normal(stddev = .1, shape = [128, 5])) self.b2 = tf.Variable(tf.constant(.1, shape = [5])) self.u2 = tf.matmul(self.y1, self.W2) + self.b2 ''' # logits = tf.layers.dense(inputs = self.y1, units = 5) # logits = self.u2 #todo: implement unfinished networks with tf.name_scope("l2_loss"): vars = tf.trainable_variables() self.lossL2 = tf.add_n([ tf.nn.l2_loss(v) for v in vars ]) * self.weight_decay self.loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss') + self.lossL2 mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32) predict_labels = tf.argmax(logits, 1, 'predict_labels') self.accuracy = tf.reduce_sum(tf.cast(tf.equal(self.labels, predict_labels), tf.int32), name='accuracy') self.params = tf.trainable_variables() # calculate the gradient of parameters opt = tf.train.GradientDescentOptimizer(self.learning_rate) #opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(mean_loss, self.params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.train_op = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step) #self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step,var_list=self.params) tf.summary.scalar('loss/step', self.loss) for each in tf.trainable_variables(): tf.summary.histogram(each.name, each) self.merged_summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)
def __init__(self, num_symbols, num_embed_units, num_units, num_layers, num_labels, embed, learning_rate=0.5, max_gradient_norm=5.0): #todo: implement placeholders self.texts = tf.placeholder(tf.string, [None, None], name='texts') # shape: batch*len self.texts_length = tf.placeholder(tf.int32, [None], name='texts_length') # shape: batch self.labels = tf.placeholder(tf.int32, [None], 'labels') # shape: batch self.symbol2index = MutableHashTable( key_dtype=tf.string, value_dtype=tf.int64, default_value=UNK_ID, shared_name="in_table", name="in_table", checkpoint=True) # build the vocab table (string to index) # initialize the training process self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32) self.global_step = tf.Variable(0, trainable=False) self.epoch = tf.Variable(0, trainable=False) self.epoch_add_op = self.epoch.assign(self.epoch + 1) self.index_input = self.symbol2index.lookup(self.texts) # batch*len # build the embedding table (index to vector) if embed is None: # initialize the embedding randomly self.embed = tf.get_variable('embed', [num_symbols, num_embed_units], tf.float32) else: # initialize the embedding by pre-trained word vectors self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed) self.embed_input = tf.nn.embedding_lookup(self.embed, self.index_input) #batch*len*embed_unit if num_layers == 1: cell = BasicLSTMCell(num_units) cell_dr = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=0.5, output_keep_prob=0.5) outputs, states = dynamic_rnn(cell_dr, self.embed_input, self.texts_length, dtype=tf.float32, scope="rnn") labels = self.labels indices = tf.stack([tf.range(tf.shape(outputs)[0]), self.texts_length - 1], axis=1) last_output = tf.gather_nd(outputs, indices) self.outputs = outputs self.states = states self.last_output = last_output #last_output = tf.reshape(last_output, (-1, num_units)) logits = tf.layers.dense(last_output, 5) print(indices.shape) print(last_output.shape) self.logits = logits self.loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss') l2 = tf.cast(0.005,tf.float32) * sum( tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables() if not ("noreg" in tf_var.name or "Bias" in tf_var.name) ) self.loss += l2 mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32) predict_labels = tf.cast(tf.argmax(logits, 1, 'predict_labels'), tf.int32) self.predict_labels = predict_labels self.accuracy = tf.reduce_sum(tf.cast(tf.equal(self.labels, predict_labels), tf.int32), name='accuracy') self.params = tf.trainable_variables() # calculate the gradient of parameters #opt = tf.train.GradientDescentOptimizer(self.learning_rate) #opt = tf.train.RMSPropOptimizer(self.learning_rate) opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(mean_loss, self.params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step) tf.summary.scalar('loss/step', self.loss) for each in tf.trainable_variables(): tf.summary.histogram(each.name, each) tf.summary.histogram('logits', logits) tf.summary.histogram('gradient_norm', self.gradient_norm) tf.summary.histogram('rnn_output', self.last_output) for param in self.params: tf.summary.histogram('clipped_gradients/%s'%param.name, param) self.merged_summary_op = tf.summary.merge_all() self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, max_to_keep=3, pad_step_number=True, keep_checkpoint_every_n_hours=1.0)