def EmbeddingRNNSeq2SeqNoTupleF(enc_inp, dec_inp, feed_previous): cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False) return seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols, num_decoder_symbols, embedding_size=2, feed_previous=feed_previous)
def EmbeddingRNNSeq2SeqF(enc_inp, dec_inp, feed_previous): cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=True) return seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols, num_decoder_symbols, embedding_size=2, feed_previous=feed_previous)
def rnn_model(model, input_data, output_data,labels, vocab_size, batch_size=64,rnn_size=128): """ construct rnn seq2seq model. :param model: model class :param input_data: input data placeholder :param output_data: output data placeholder :param vocab_size: :param rnn_size: :param num_layers: :param batch_size: :param learning_rate: :return: """ end_points = {} if model == 'rnn': cell_fun = tf.contrib.rnn.BasicRNNCell elif model == 'gru': cell_fun = tf.contrib.rnn.GRUCell elif model == 'lstm': cell_fun = tf.contrib.rnn.BasicLSTMCell cell = cell_fun(rnn_size) # cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=True) weights = [tf.ones_like(labels_t, dtype=tf.float32) for labels_t in labels] outputs,last_state = seq2seq.embedding_rnn_seq2seq(input_data,output_data,cell,vocab_size,vocab_size,len(input_data)) loss = seq2seq.sequence_loss(ou, labels, weights, vocab_size) tf.scalar_summary("loss", loss) magnitude = tf.sqrt(tf.reduce_sum(tf.square(last_state[1]))) tf.scalar_summary("magnitude at t=1", magnitude) summary_op = tf.merge_all_summaries() learning_rate = 0.05 momentum = 0.9 optimizer = tf.train.MomentumOptimizer(learning_rate, momentum) train_op = optimizer.minimize(loss) logdir = tempfile.mkdtemp() print(logdir) summary_writer = tf.train.SummaryWriter(logdir, sess.graph_def) # if output_data is not None: # initial_state = cell.zero_state(batch_size, tf.float32) # else: # initial_state = cell.zero_state(1, tf.float32) # with tf.device("/cpu:0"): # embedding = tf.get_variable('embedding', initializer=tf.random_uniform( # [vocab_size + 1, rnn_size], -1.0, 1.0)) # inputs = tf.nn.embedding_lookup(embedding, input_data) # decoder_inputs = tf.nn.embedding_lookup(embedding, output_data) # # [batch_size, ?, rnn_size] = [64, ?, 128] # # outputs, last_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state) # outputs,last_state = basic_rnn_seq2seq(inputs,decoder_inputs,cell) # output = tf.reshape(outputs, [-1, rnn_size]) # weights = tf.Variable(tf.truncated_normal([rnn_size, vocab_size + 1])) # bias = tf.Variable(tf.zeros(shape=[vocab_size + 1])) # logits = tf.nn.bias_add(tf.matmul(output, weights), bias=bias) # # [?, vocab_size+1] # if output_data is not None: # # output_data must be one-hot encode # labels = tf.one_hot(tf.reshape(output_data, [-1]), depth=vocab_size + 1) # # should be [?, vocab_size+1] # loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits) # # loss shape should be [?, vocab_size+1] # total_loss = tf.reduce_mean(loss) # train_op = tf.train.AdamOptimizer(learning_rate).minimize(total_loss) # end_points['initial_state'] = initial_state # end_points['output'] = output # end_points['train_op'] = train_op # end_points['total_loss'] = total_loss # end_points['loss'] = loss # end_points['last_state'] = last_state # else: # prediction = tf.nn.softmax(logits) # end_points['initial_state'] = initial_state # end_points['last_state'] = last_state # end_points['prediction'] = prediction # return end_points
def model(self, mode="train", num_layers=1, cell_size=128, cell_type="BasicLSTMCell", embedding_size=20, learning_rate=0.001, tensorboard_verbose=0, checkpoint_path=None): ''' Build tensor specifying graph of operations for the seq2seq neural network model. mode = string, either "train" or "predict" cell_type = attribute of rnn_cell specifying which RNN cell type to use cell_size = size for the hidden layer in the RNN cell num_layers = number of RNN cell layers to use Return TFLearn model instance. Use DNN model for this. ''' assert mode in ["train", "predict"] checkpoint_path = checkpoint_path or ( "%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else "")) GO_VALUE = self.out_max_int + 1 # unique integer value used to trigger decoder outputs in the seq2seq RNN tflearn.config.init_graph(seed=None, log_device=False, num_cores=int(cpu_count() * 2 / 3), gpu_memory_fraction=0, soft_placement=True) network = tflearn.input_data( shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack( encoder_inputs, axis=1 ) # transform into list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len], name="dec_in") # get decoder inputs decoder_inputs = tf.unstack( decoder_inputs, axis=1 ) # transform into list of self.out_seq_len elements, each [-1] go_input = tf.multiply( tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE ) # insert "GO" symbol as the first decoder input; drop the last decoder input decoder_inputs = [ go_input ] + decoder_inputs[:self.out_seq_len - 1] # insert GO as first; drop last decoder input feed_previous = not (mode == "train") if self.verbose > 3: print("feed_previous = %s" % str(feed_previous)) print("encoder inputs: %s" % str(encoder_inputs)) print("decoder inputs: %s" % str(decoder_inputs)) print("len decoder inputs: %s" % len(decoder_inputs)) self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs single_cell = getattr(tf.nn.rnn_cell, cell_type)(cell_size, state_is_tuple=True) if num_layers == 1: print("rnn net later number:{}".format(num_layers)) cell = single_cell else: print("rnn net later number:{}".format(num_layers)) cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) if self.seq2seq_model == "embedding_rnn": model_outputs, states = seq2seq.embedding_rnn_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, feed_previous=feed_previous) elif self.seq2seq_model == "embedding_attention": model_outputs, states = seq2seq.embedding_attention_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, num_heads=1, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection( tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. if self.verbose > 2: print("model outputs: %s" % model_outputs) network = tf.stack( model_outputs, axis=1 ) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] if self.verbose > 2: print("packed model outputs: %s" % network) if self.verbose > 3: all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) print("all_vars = %s" % all_vars) with tf.name_scope( "TargetsData" ): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") network = tflearn.regression(network, placeholder=targetY, optimizer='adam', learning_rate=learning_rate, loss=self.sequence_loss, metric=self.accuracy, name="Y") model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path) return model
def create_network(self): self.seq2seq_model = "embedding_attention" mode = "train" GO_VALUE = self.out_max_int + 1 self.net = tflearn.input_data(shape=[None, self.in_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(self.net, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack( encoder_inputs, axis=1) #transform to list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(self.net, [0, 0], [-1, self.out_seq_len], name="dec_in") decoder_inputs = tf.unstack( decoder_inputs, axis=1) # transform into list of self.out_seq_len elements go_input = tf.multiply(tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE) decoder_inputs = [ go_input ] + decoder_inputs[:self.out_seq_len - 1] # insert GO as first; drop last decoder input feed_previous = not (mode == "train") self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs cell = rnn.MultiRNNCell([ rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128) ]) if self.seq2seq_model == "embedding_rnn": model_outputs, states = seq2seq.embedding_rnn_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=200, feed_previous=feed_previous) elif self.seq2seq_model == "embedding_attention": model_outputs, states = seq2seq.embedding_attention_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=200, num_heads=1, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection( tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore self.net = tf.stack( model_outputs, axis=1 ) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] with tf.name_scope( "TargetsData" ): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") self.net = tflearn.regression(self.net, placeholder=targetY, optimizer='adam', learning_rate=0.00005, loss=self.sequence_loss, metric=self.accuracy, name="Y") self.model = tflearn.DNN(self.net)
def testEmbeddingRNNSeq2Seq(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): enc_inp = [ constant_op.constant( 1, dtypes.int32, shape=[2]) for i in range(2) ] dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=True) dec, mem = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 5), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].c.shape) self.assertEqual((2, 2), res[0].h.shape) # Test with state_is_tuple=False. with variable_scope.variable_scope("no_tuple"): cell1 = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False) dec, mem = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp, cell1, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 5), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 4), res[0].shape) # Test externally provided output projection. w = variable_scope.get_variable("proj_w", [2, 5]) b = variable_scope.get_variable("proj_b", [5]) with variable_scope.variable_scope("proj_seq2seq"): dec, _ = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, output_projection=(w, b)) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 2), res[0].shape) # Test that previous-feeding model ignores inputs after the first. dec_inp2 = [ constant_op.constant( 0, dtypes.int32, shape=[2]) for _ in range(3) ] with variable_scope.variable_scope("other"): d3, _ = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp2, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, feed_previous=constant_op.constant(True)) sess.run([variables.global_variables_initializer()]) variable_scope.get_variable_scope().reuse_variables() d1, _ = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, feed_previous=True) d2, _ = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp2, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, feed_previous=True) res1 = sess.run(d1) res2 = sess.run(d2) res3 = sess.run(d3) self.assertAllClose(res1, res2) self.assertAllClose(res1, res3)
def _build_model(self): """ Builds a model either for training or testing :return: """ cell = self._set_cell_type() self._build_inputs() output_projection = None print("Embedding size: ", self.embedding_size) if self.use_attn: if self.copy: print("Using attention of form ", self.attn_type, " with copy mechanism...") else: print("Using attention of form ", self.attn_type) self.outputs, self.states, self.attn_outputs = embedding_attention_seq2seq( self.encoder_inputs, self.decoder_inputs, cell, num_encoder_symbols=self.vocab_size, num_decoder_symbols=self.vocab_size, embedding_size=self.embedding_size, output_projection=output_projection, feed_previous=self.do_decode, dtype=tf.float32, copy=self.copy, attn_type=self.attn_type) else: print("Using vanilla seq2seq...") self.outputs, self.states = embedding_rnn_seq2seq( self.encoder_inputs, self.decoder_inputs, cell, num_encoder_symbols=self.vocab_size, num_decoder_symbols=self.vocab_size, embedding_size=self.embedding_size, output_projection=output_projection, feed_previous=self.do_decode, dtype=tf.float32) self.attn_outputs = None # Compute loss -- averaged across batch + with l2 loss added trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) # Only get non-bias terms non_bias_vars = [v for v in trainable_vars if "Bias" not in v.name] l2_loss = tf.add_n( [self.l2_reg * tf.nn.l2_loss(nb) for nb in non_bias_vars]) # Compute loss -- averaged across batch self.total_loss = sequence_loss(self.outputs, self.decoder_inputs, self.target_weights) + l2_loss self.training_op = tf.train.AdamOptimizer( learning_rate=0.0001).minimize(self.total_loss) self.dec_prediction = tf.transpose(tf.argmax(self.outputs, axis=1), [1, 0]) self.predictions = tf.transpose( tf.argmax(tf.stack(self.outputs), axis=-1), [1, 0]) self.saver = tf.train.Saver(max_to_keep=10) self.increment_global_step = tf.assign_add( self.global_step, 1, name='increment_global_step')
def testEmbeddingRNNSeq2Seq(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): enc_inp = [ constant_op.constant( 1, dtypes.int32, shape=[2]) for i in range(2) ] dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] cell_fn = lambda: core_rnn_cell_impl.BasicLSTMCell(2) cell = cell_fn() dec, mem = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 5), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].c.shape) self.assertEqual((2, 2), res[0].h.shape) # Test with state_is_tuple=False. with variable_scope.variable_scope("no_tuple"): cell_nt = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False) dec, mem = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp, cell_nt, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 5), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 4), res[0].shape) # Test externally provided output projection. w = variable_scope.get_variable("proj_w", [2, 5]) b = variable_scope.get_variable("proj_b", [5]) with variable_scope.variable_scope("proj_seq2seq"): dec, _ = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp, cell_fn(), num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, output_projection=(w, b)) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 2), res[0].shape) # Test that previous-feeding model ignores inputs after the first. dec_inp2 = [ constant_op.constant( 0, dtypes.int32, shape=[2]) for _ in range(3) ] with variable_scope.variable_scope("other"): d3, _ = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp2, cell_fn(), num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, feed_previous=constant_op.constant(True)) sess.run([variables.global_variables_initializer()]) variable_scope.get_variable_scope().reuse_variables() d1, _ = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp, cell_fn(), num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, feed_previous=True) d2, _ = seq2seq_lib.embedding_rnn_seq2seq( enc_inp, dec_inp2, cell_fn(), num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, feed_previous=True) res1 = sess.run(d1) res2 = sess.run(d2) res3 = sess.run(d3) self.assertAllClose(res1, res2) self.assertAllClose(res1, res3)
display_step = 10 # Network Parameters n_input = 8 # data input n_hidden = 5 # hidden layer num of features # tf Graph input x = tf.placeholder("int32", [None, word_lenght, n_input]) y = tf.placeholder("int32", [None, n_input]) with tf.variable_scope("train_test", reuse=None): x = tf.unstack(x, word_lenght, 1) outputs, states = embedding_rnn_seq2seq( encoder_inputs=x, decoder_inputs=[0] * 20, cell=core_rnn_cell_impl.LSTMCell(n_hidden), num_encoder_symbols=256, num_decoder_symbols=256, embedding_size=100, output_projection=None, feed_previous=False) # Define loss and optimizer cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Evaluate model correct_pred = tf.equal(tf.argmax(outputs, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # Launch the graph
def model(self, mode="train", num_layers=1, cell_size=128, cell_type="BasicLSTMCell", embedding_size=20, learning_rate=0.001, tensorboard_verbose=0, checkpoint_path=None): ''' Build tensor specifying graph of operations for the seq2seq neural network model. mode = string, either "train" or "predict" cell_type = attribute of rnn_cell specifying which RNN cell type to use cell_size = size for the hidden layer in the RNN cell num_layers = number of RNN cell layers to use Return TFLearn model instance. Use DNN model for this. ''' assert mode in ["train", "predict"] checkpoint_path = checkpoint_path or ( "%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else "")) GO_VALUE = self.out_max_int + 1 # unique integer value used to trigger decoder outputs in the seq2seq RNN tflearn.config.init_graph(seed=None, log_device=False, num_cores=int(cpu_count() * 2 / 3), gpu_memory_fraction=0, soft_placement=True) network = tflearn.input_data(shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack(encoder_inputs, axis=1) # transform into list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len], name="dec_in") # get decoder inputs decoder_inputs = tf.unstack(decoder_inputs, axis=1) # transform into list of self.out_seq_len elements, each [-1] go_input = tf.multiply(tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE) # insert "GO" symbol as the first decoder input; drop the last decoder input decoder_inputs = [go_input] + decoder_inputs[ : self.out_seq_len - 1] # insert GO as first; drop last decoder input feed_previous = not (mode == "train") if self.verbose > 3: print("feed_previous = %s" % str(feed_previous)) print("encoder inputs: %s" % str(encoder_inputs)) print("decoder inputs: %s" % str(decoder_inputs)) print("len decoder inputs: %s" % len(decoder_inputs)) self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs single_cell = getattr(tf.nn.rnn_cell, cell_type)(cell_size, state_is_tuple=True) if num_layers == 1: print("rnn net later number:{}".format(num_layers)) cell = single_cell else: print("rnn net later number:{}".format(num_layers)) cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) if self.seq2seq_model == "embedding_rnn": model_outputs, states = seq2seq.embedding_rnn_seq2seq(encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, feed_previous=feed_previous) elif self.seq2seq_model == "embedding_attention": model_outputs, states = seq2seq.embedding_attention_seq2seq(encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, num_heads=1, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. if self.verbose > 2: print("model outputs: %s" % model_outputs) network = tf.stack(model_outputs, axis=1) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] if self.verbose > 2: print("packed model outputs: %s" % network) if self.verbose > 3: all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) print("all_vars = %s" % all_vars) with tf.name_scope("TargetsData"): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") network = tflearn.regression(network, placeholder=targetY, optimizer='adam', learning_rate=learning_rate, loss=self.sequence_loss, metric=self.accuracy, name="Y") model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path) return model