def create_model(max_word_id, is_test=False): GO_VALUE = max_word_id + 1 network = tflearn.input_data(shape=[None, max_seq_len + max_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(network, [0, 0], [-1, max_seq_len], name="enc_in") encoder_inputs = tf.unpack(encoder_inputs, axis=1) decoder_inputs = tf.slice(network, [0, max_seq_len], [-1, max_seq_len], name="dec_in") decoder_inputs = tf.unpack(decoder_inputs, axis=1) go_input = tf.mul( tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE ) decoder_inputs = [go_input] + decoder_inputs[: max_seq_len-1] num_encoder_symbols = max_word_id + 1 # 从0起始 num_decoder_symbols = max_word_id + 2 # 包括GO cell = rnn_cell.BasicLSTMCell(16*max_seq_len, state_is_tuple=True) model_outputs, states = seq2seq.embedding_rnn_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=num_encoder_symbols, num_decoder_symbols=num_decoder_symbols, embedding_size=max_word_id, feed_previous=is_test) network = tf.pack(model_outputs, axis=1) targetY = tf.placeholder(shape=[None, max_seq_len], dtype=tf.int32, name="Y") network = tflearn.regression( network, placeholder=targetY, optimizer='adam', learning_rate=learning_rate, loss=sequence_loss, metric=accuracy, name="Y") print "begin create DNN model" model = tflearn.DNN(network, tensorboard_verbose=0, checkpoint_path=None) print "create DNN model finish" return model
def model(self, mode="train", num_layers=1, cell_size=32, cell_type="BasicLSTMCell", embedding_size=20, learning_rate=0.0001, tensorboard_verbose=0, checkpoint_path=None): ''' Build tensor specifying graph of operations for the seq2seq neural network model. mode = string, either "train" or "predict" cell_type = attribute of rnn_cell specifying which RNN cell type to use cell_size = size for the hidden layer in the RNN cell num_layers = number of RNN cell layers to use Return TFLearn model instance. Use DNN model for this. ''' assert mode in ["train", "predict"] checkpoint_path = checkpoint_path or ( "%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else "")) GO_VALUE = self.out_max_int + 1 # unique integer value used to trigger decoder outputs in the seq2seq RNN network = tflearn.input_data( shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack( encoder_inputs, axis=1 ) # transform into list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len], name="dec_in") # get decoder inputs decoder_inputs = tf.unstack( decoder_inputs, axis=1 ) # transform into list of self.out_seq_len elements, each [-1] go_input = tf.multiply( tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE ) # insert "GO" symbol as the first decoder input; drop the last decoder input decoder_inputs = [ go_input ] + decoder_inputs[:self.out_seq_len - 1] # insert GO as first; drop last decoder input feed_previous = not (mode == "train") if self.verbose > 3: print("feed_previous = %s" % str(feed_previous)) print("encoder inputs: %s" % str(encoder_inputs)) print("decoder inputs: %s" % str(decoder_inputs)) print("len decoder inputs: %s" % len(decoder_inputs)) self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs single_cell = getattr(rnn_cell, cell_type)(cell_size, state_is_tuple=True) if num_layers == 1: cell = single_cell else: cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) if self.seq2seq_model == "embedding_rnn": model_outputs, states = seq2seq.embedding_rnn_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, feed_previous=feed_previous) elif self.seq2seq_model == "embedding_attention": model_outputs, states = seq2seq.embedding_attention_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, num_heads=1, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection( tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. if self.verbose > 2: print("model outputs: %s" % model_outputs) network = tf.stack( model_outputs, axis=1 ) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] if self.verbose > 2: print("packed model outputs: %s" % network) if self.verbose > 3: all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) print("all_vars = %s" % all_vars) with tf.name_scope( "TargetsData" ): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") network = tflearn.regression(network, placeholder=targetY, optimizer='adam', learning_rate=learning_rate, loss=self.sequence_loss, metric=self.accuracy, name="Y") model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path) return model
decoder_inputs = tf.slice(network, [0, max_input_len], [-1, max_output_len], name="dec_in") decoder_inputs = tf.unpack(decoder_inputs, axis=1) go_input = tf.mul(tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE) decoder_inputs = [go_input] + decoder_inputs[:max_output_len - 1] num_encoder_symbols = max_int + 1 # 从0起始 num_decoder_symbols = max_int + 2 # 包括GO print encoder_inputs print decoder_inputs cell = rnn_cell.BasicLSTMCell(16, state_is_tuple=True) model_outputs, states = seq2seq.embedding_rnn_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=num_encoder_symbols, num_decoder_symbols=num_decoder_symbols, embedding_size=embedding_size, feed_previous=False) network = tf.pack(model_outputs, axis=1) def sequence_loss(y_pred, y_true): logits = tf.unpack(y_pred, axis=1) targets = tf.unpack(y_true, axis=1) weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets] return seq2seq.sequence_loss(logits, targets, weights) def accuracy(y_pred, y_true, x_in):
for t in range(seq_length) ] weights = [tf.ones_like(labels_t, dtype=tf.float32) for labels_t in labels] # Decoder input: prepend some "GO" token and drop the final # token of the encoder input dec_inp = ([tf.zeros_like(enc_inp[0], dtype=np.int32, name="GO")] + enc_inp[:-1]) # Initial memory value for recurrence. prev_mem = tf.zeros((batch_size, memory_dim)) cell = rnn_cell.GRUCell(memory_dim) dec_outputs, dec_memory = seq2seq.embedding_rnn_seq2seq( enc_inp, dec_inp, cell, vocab_size, vocab_size, embedding_dim) loss = seq2seq.sequence_loss(dec_outputs, labels, weights, vocab_size) tf.scalar_summary("loss", loss) magnitude = tf.sqrt(tf.reduce_sum(tf.square(dec_memory[1]))) tf.scalar_summary("magnitude at t=1", magnitude) summary_op = tf.merge_all_summaries() learning_rate = 0.05 momentum = 0.9 optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) train_op = optimizer.minimize(loss) logdir = tempfile.mkdtemp() print(logdir) summary_writer = tf.train.SummaryWriter(logdir, sess.graph)
def model(self, mode="train", num_layers=1, cell_size=32, cell_type="BasicLSTMCell", embedding_size=20, learning_rate=0.0001, tensorboard_verbose=0, checkpoint_path=None): ''' Build tensor specifying graph of operations for the seq2seq neural network model. mode = string, either "train" or "predict" cell_type = attribute of rnn_cell specifying which RNN cell type to use cell_size = size for the hidden layer in the RNN cell num_layers = number of RNN cell layers to use Return TFLearn model instance. Use DNN model for this. ''' assert mode in ["train", "predict"] checkpoint_path = checkpoint_path or ("%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else "")) GO_VALUE = self.out_max_int + 1 # unique integer value used to trigger decoder outputs in the seq2seq RNN network = tflearn.input_data(shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack(encoder_inputs, axis=1) # transform into list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len], name="dec_in") # get decoder inputs decoder_inputs = tf.unstack(decoder_inputs, axis=1) # transform into list of self.out_seq_len elements, each [-1] go_input = tf.multiply( tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE ) # insert "GO" symbol as the first decoder input; drop the last decoder input decoder_inputs = [go_input] + decoder_inputs[: self.out_seq_len-1] # insert GO as first; drop last decoder input feed_previous = not (mode=="train") if self.verbose > 3: print ("feed_previous = %s" % str(feed_previous)) print ("encoder inputs: %s" % str(encoder_inputs)) print ("decoder inputs: %s" % str(decoder_inputs)) print ("len decoder inputs: %s" % len(decoder_inputs)) self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs single_cell = getattr(rnn_cell, cell_type)(cell_size, state_is_tuple=True) if num_layers==1: cell = single_cell else: cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) if self.seq2seq_model=="embedding_rnn": model_outputs, states = seq2seq.embedding_rnn_seq2seq(encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, feed_previous=feed_previous) elif self.seq2seq_model=="embedding_attention": model_outputs, states = seq2seq.embedding_attention_seq2seq(encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, num_heads=1, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. if self.verbose > 2: print ("model outputs: %s" % model_outputs) network = tf.stack(model_outputs, axis=1) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] if self.verbose > 2: print ("packed model outputs: %s" % network) if self.verbose > 3: all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) print ("all_vars = %s" % all_vars) with tf.name_scope("TargetsData"): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") network = tflearn.regression(network, placeholder=targetY, optimizer='adam', learning_rate=learning_rate, loss=self.sequence_loss, metric=self.accuracy, name="Y") model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path) return model
def seq2seq_function(encoder_input, decoder_input): return seq2seq.embedding_rnn_seq2seq( encoder_input, decoder_input, cell, self.n_classes, self.n_classes, self.embedding_dim, feed_previous=test_mode)
def _prepare_model(self): self.encode_in = [ tf.placeholder(tf.int32, shape=(None, ), name="ei_%i" % i) for i in range(self.seq_in_len) ] self.labels = [ tf.placeholder(tf.int32, shape=(None, ), name="l_%i" % i) for i in range(self.seq_out_len) ] loss_weights = [tf.ones_like(l, dtype=tf.float32) for l in self.labels] decode_in = [ tf.zeros_like(self.encode_in[0], dtype=np.int32, name="GO") ] + self.labels[:-1] cell = rnn_cell.GRUCell(self.cell_units) if hasattr(self.params, 'keep_probability'): self.keep_prob = tf.placeholder("float") cell = rnn_cell.DropoutWrapper(cell, output_keep_prob=self.keep_prob) if hasattr(self.params, 'num_layers'): cell = rnn_cell.MultiRNNCell([cell] * self.params.num_layers) with tf.variable_scope("decoders") as scope: if self.params.attention: decode_outs, decode_state = seq2seq.embedding_attention_seq2seq( encoder_inputs=self.encode_in, decoder_inputs=decode_in, cell=cell, num_encoder_symbols=self.vocab_in_size, num_decoder_symbols=self.vocab_out_size, embedding_size=self.embedding_dim, feed_previous=False) scope.reuse_variables() self.decode_outs_test, decode_state_test = \ seq2seq.embedding_attention_seq2seq(encoder_inputs=self.encode_in, decoder_inputs=decode_in, cell=cell, num_encoder_symbols=self.vocab_in_size, num_decoder_symbols=self.vocab_out_size, embedding_size=self.embedding_dim, feed_previous=True) else: decode_outs, decode_state = seq2seq.embedding_rnn_seq2seq( encoder_inputs=self.encode_in, decoder_inputs=decode_in, cell=cell, num_encoder_symbols=self.vocab_in_size, num_decoder_symbols=self.vocab_out_size, embedding_size=self.embedding_dim, feed_previous=False) scope.reuse_variables() self.decode_outs_test, decode_state_test = \ seq2seq.embedding_rnn_seq2seq(encoder_inputs=self.encode_in, decoder_inputs=decode_in, cell=cell, num_encoder_symbols=self.vocab_in_size, num_decoder_symbols=self.vocab_out_size, embedding_size=self.embedding_dim, feed_previous=True) self.loss = seq2seq.sequence_loss(decode_outs, self.labels, loss_weights, self.vocab_out_size) self.optimizer = tf.train.AdamOptimizer(1e-4) self.train_op = self.optimizer.minimize(self.loss)
encoder_inputs = tf.unpack(encoder_inputs, axis=1) decoder_inputs = tf.slice(network, [0, max_input_len], [-1, max_output_len], name="dec_in") decoder_inputs = tf.unpack(decoder_inputs, axis=1) go_input = tf.mul( tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE ) decoder_inputs = [go_input] + decoder_inputs[: max_output_len-1] num_encoder_symbols = max_int + 1 # 从0起始 num_decoder_symbols = max_int + 2 # 包括GO print encoder_inputs print decoder_inputs cell = rnn_cell.BasicLSTMCell(16, state_is_tuple=True) model_outputs, states = seq2seq.embedding_rnn_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=num_encoder_symbols, num_decoder_symbols=num_decoder_symbols, embedding_size=embedding_size, feed_previous=False) network = tf.pack(model_outputs, axis=1) def sequence_loss(y_pred, y_true): logits = tf.unpack(y_pred, axis=1) targets = tf.unpack(y_true, axis=1) weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets] return seq2seq.sequence_loss(logits, targets, weights) def accuracy(y_pred, y_true, x_in):
# labels[:-1] will always be an '_', so no need of putting it in decode_input decode_input = [tf.zeros_like(encode_input[0], dtype=np.int32, name="GO") ] + labels[:-1] keep_prob = tf.placeholder("float") # Define Model Layers cells = [ rnn_cell.DropoutWrapper(rnn_cell.BasicLSTMCell(embedding_dim), output_keep_prob=keep_prob) for i in range(4) ] stacked_lstm = rnn_cell.MultiRNNCell(cells) with tf.variable_scope("decoders") as scope: decode_outputs, decode_state = seq2seq.embedding_rnn_seq2seq( encode_input, decode_input, stacked_lstm, input_vocab_size, output_vocab_size, 1) scope.reuse_variables() decode_outputs_test, decode_state_test = seq2seq.embedding_rnn_seq2seq( encode_input, decode_input, stacked_lstm, input_vocab_size, output_vocab_size, 1, feed_previous=True) # Model loss optimizers loss_weights = [tf.ones_like(l, dtype=tf.float32) for l in labels]