Пример #1
0
def decode_text(session, encode_input, pad_index, decoder_len, cell, em_dim,
                vocab_size, en_placeholders, de_placeholders,
                target_placeholders):
    # Decoding
    with variable_scope.variable_scope(variable_scope.get_variable_scope(),
                                       reuse=True):
        #outputs, states = embedding_rnn_seq2seq(
        outputs, states = embedding_attention_seq2seq(en_placeholders,
                                                      de_placeholders,
                                                      cell,
                                                      vocab_size,
                                                      vocab_size,
                                                      em_dim,
                                                      output_projection=None,
                                                      feed_previous=True)

        decode_input = encode_input + [pad_index
                                       ] * (decoder_len - len(encode_input))
        feed_dict_test = generate_feed_dict([encode_input], [decode_input],
                                            pad_index, en_placeholders,
                                            de_placeholders,
                                            target_placeholders)

        result = []
        for o in outputs:
            # 注意这里也需要提供 feed_dict
            m = np.argmax(o.eval(feed_dict_test, session=session), axis=1)
            result.append(m[0])

        return result
Пример #2
0
    def define(self,
               char_num,
               rnn_dim,
               emb_dim,
               max_x,
               max_y,
               write_trans_model=True):
        self.decode_step = max_y
        self.encode_step = max_x
        self.en_vec = [
            tf.placeholder(tf.int32, [None], name='en_input' + str(i))
            for i in range(max_x)
        ]
        self.trans_labels = [
            tf.placeholder(tf.int32, [None], name='de_input' + str(i))
            for i in range(max_y)
        ]
        weights = [
            tf.cast(tf.sign(ot_t), tf.float32) for ot_t in self.trans_labels
        ]
        self.de_vec = [tf.zeros_like(self.trans_labels[0], tf.int32)
                       ] + self.trans_labels[:-1]
        self.feed_previous = tf.placeholder(tf.bool)
        self.trans_l_rate = tf.placeholder(tf.float32, [],
                                           name='learning_rate')
        seq_cell = tf.nn.rnn_cell.BasicLSTMCell(rnn_dim, state_is_tuple=True)
        self.trans_output, states = seq2seq.embedding_attention_seq2seq(
            self.en_vec,
            self.de_vec,
            seq_cell,
            char_num,
            char_num,
            emb_dim,
            feed_previous=self.feed_previous)

        loss = seq2seq.sequence_loss(self.trans_output, self.trans_labels,
                                     weights)
        optimizer = tf.train.AdagradOptimizer(learning_rate=self.trans_l_rate)

        params = tf.trainable_variables()
        gradients = tf.gradients(loss, params)
        clipped_gradients, norm = tf.clip_by_global_norm(gradients, 5.0)
        self.trans_train = optimizer.apply_gradients(
            zip(clipped_gradients, params))

        self.saver = tf.train.Saver()

        if write_trans_model:
            param_dic = {}
            param_dic['char_num'] = char_num
            param_dic['rnn_dim'] = rnn_dim
            param_dic['emb_dim'] = emb_dim
            param_dic['max_x'] = max_x
            param_dic['max_y'] = max_y
            # print param_dic
            f_model = open(self.trained + '_model', 'w')
            pickle.dump(param_dic, f_model)
            f_model.close()
Пример #3
0
 def seq2seq_f(encoder_inputs, decoder_inputs, feed_previous):
     return seq2seq.embedding_attention_seq2seq(
         encoder_inputs=encoder_inputs,
         decoder_inputs=decoder_inputs,
         cell=cell,
         num_encoder_symbols=source_vocab_size,
         num_decoder_symbols=target_vocab_size,
         embedding_size=size,
         output_projection=output_projection,
         feed_previous=feed_previous)
Пример #4
0
 def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
     return legacy_seq2seq.embedding_attention_seq2seq(
         encoder_inputs,
         decoder_inputs,
         self.cell,
         num_encoder_symbols=config.ENC_VOCAB,
         num_decoder_symbols=config.DEC_VOCAB,
         embedding_size=config.HIDDEN_SIZE,
         output_projection=self.output_projection,
         feed_previous=do_decode)
Пример #5
0
 def seq2seq_function(encoder_inputs, decoder_inputs, do_decode):
     return legacy_seq2seq.embedding_attention_seq2seq(
         encoder_inputs,
         decoder_inputs,
         cell,
         num_encoder_symbols=vocabulary_size,
         num_decoder_symbols=vocabulary_size,
         embedding_size=hidden_size,
         output_projection=output_projection,
         feed_previous=do_decode)
Пример #6
0
 def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
     #return seq2seq.embedding_attention_seq2seq(
     return legacy_seq2seq.embedding_attention_seq2seq(
         encoder_inputs,
         decoder_inputs,
         cell,
         source_vocab_size,
         target_vocab_size,
         256,
         output_projection=output_projection,
         feed_previous=do_decode)
Пример #7
0
 def seq2seq_f(self, encoder_inputs, decoder_inputs, do_decode):
     return legacy_seq2seq.embedding_attention_seq2seq(
         encoder_inputs,
         decoder_inputs,
         BaseSeq2Seq2ModelTF.get_cell_definition(self.M, self.num_layers, self.use_lstm),
         num_encoder_symbols=self.src_vocab_size,
         num_decoder_symbols=self.tgt_vocab_size,
         embedding_size=self.M,
         output_projection=self.output_projection,
         feed_previous=do_decode
     )
Пример #8
0
 def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
     import copy
     temp_cell = copy.deepcopy(cell)
     return legacy_seq2seq.embedding_attention_seq2seq(
         encoder_inputs,
         decoder_inputs,
         temp_cell,
         num_encoder_symbols=self.source_vocab_size,
         num_decoder_symbols=self.target_vocab_size,
         embedding_size=self.lsmt_size,
         output_projection=output_projection,
         feed_previous=do_decode)
Пример #9
0
 def seq2seq_f(encoder_inputs, decoder_inputs):
     # Note: the returned function uses separate embeddings for encoded/decoded sets.
     #           Maybe try implementing same embedding for both.
     # Question: the outputs are projected to vocab_size NO MATTER WHAT.
     #           i.e. if output_proj is None, it uses its own OutputProjectionWrapper instead
     #           --> How does this affect our model?? A bit misleading imo.
     #with tf.variable_scope(scope or "seq2seq2_f") as seq_scope:
     return embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell,
                                        num_encoder_symbols=self.vocab_size,
                                        num_decoder_symbols=self.vocab_size,
                                        embedding_size=self.state_size,
                                        output_projection=output_proj,
                                        feed_previous=self.is_chatting,
                                        dtype=tf.float32)
Пример #10
0
    def model(self):
        cell = BasicLSTMCell(self.hidden_layer_size)
        if self.is_train:
            cell = DropoutWrapper(cell,
                           input_keep_prob=1.0, output_keep_prob=0.8)

        multi_cell = MultiRNNCell([cell for _ in range(self.num_layers)])
        # 第一维代表的是batch size
        self.x = tf.placeholder(tf.int32, [None, self.seq_input_length])
        self.y = tf.placeholder(tf.int32, [None, self.output_size_length])

        outputs, state = embedding_attention_seq2seq(self.x, self.y, multi_cell, self.vocab_size, self.vocab_size, self.embedding_size)

        if not self.is_train:
            self.output = outputs
        else:
            self.loss_func = sequence_loss(outputs, self.y, self.weight)
            opt = tf.train.AdamOptimizer(self.learning_rate)
            self.opt_op = opt.minimize(self.loss_func)
Пример #11
0
def get_model(fee_previous=False):
    learning_rate = tf.Variable(float(init_learning_rate),
                                trainable=False,
                                dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in range(input_seq_len):
        encoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name='encoder{0}'.format(i)))
    for i in range(output_seq_len + 1):
        decoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name='decoder{0}'.format(i)))
    for i in range(output_seq_len):
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[None],
                           name='weight{0}'.format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in range(output_seq_len)]
    cell = rnn.BasicLSTMCell(size)

    outputs, _ = legacy_seq2seq.embedding_attention_seq2seq(
        encoder_inputs=encoder_inputs,
        decoder_inputs=decoder_inputs[:output_seq_len],
        cell=cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=fee_previous,
        dtype=tf.float32)

    loss = legacy_seq2seq.sequence_loss(outputs, targets, target_weights)
    opt = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    saver = tf.train.Saver(max_to_keep=1)

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, opt, saver, learning_rate_decay_op, learning_rate
Пример #12
0
        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            print("当前桶的Seq2Seq模型构建.....")
            # encoder 先将cell及逆行deepcopy 因为seq2seq模型是两个相同的模型(encoder和decoder),但是模型参数不共享,所以encoder和decoder要使用两个不同的RNNcell
            tmp_cell = copy.deepcopy(cell)

            # cell:RNNCell常见的一些RNNCell定义都可以用
            # num_encoder_symbols:source的vocab_size大小,用于embedding矩阵定义
            # num_decoder_symbols:source的vocab_size大小,用于embedding矩阵定义
            # embedding_size:embedding向量的维度
            # num_heads:Attention头的个数,就是使用多少中attention的加权方式,用更多的参数来求出集中attention向量
            # output_projection:输出的映射层,因为decoder输出的维度是output_size,所以想要得到num_decoder_symbols对应的词还需要增加一个映射层, 仅用于预测过程
            # feed_previous:是否将上一时刻输出作为下一时刻输入,一般测试的时候设置为True,此时decoder_inputs除了第一个元素之外其他元素都不会使用, 仅用于预测过程

            return legacy_seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                tmp_cell,  # 自定义的cell,可以是GRU/LSTM,设置multiayer等
                num_encoder_symbols=source_vocab_size,  # 词典大小
                num_decoder_symbols=target_vocab_size,  # 目标词典大小
                embedding_size=size,  # embedding维度
                output_projection=
                output_projection,  # 不设定的化输出的维度可能很大(取决于此表大小),设定的话投射到一个低维向量
                feed_previous=do_decode,
                dtype=dtype)
Пример #13
0
with tf.name_scope('dropout'):
    keep_prob = tf.placeholder("float", name='keep_prob')


# In[5]:

cells = [
    DropoutWrapper(
        BasicLSTMCell(num_hidden), output_keep_prob=keep_prob_val
    ) for i in range(num_layers)
]

stacked_lstm = MultiRNNCell(cells)

with tf.variable_scope("decoders") as scope:
    decode_outputs, decode_state = seq2seq.embedding_attention_seq2seq(encode_input, decode_input, stacked_lstm, vocab_size, vocab_size, num_hidden, dtype=float_type)

    scope.reuse_variables()

    decode_outputs_test, decode_state_test = seq2seq.embedding_attention_seq2seq(encode_input, decode_input, stacked_lstm, vocab_size, vocab_size, num_hidden, dtype=float_type, feed_previous=True)
    

# In[6]:

with tf.name_scope('loss'):
    loss_weights = [tf.ones_like(l, dtype=float_type) for l in labels]
    loss = seq2seq.sequence_loss(decode_outputs, labels, loss_weights, vocab_size)

tf.summary.scalar('loss', loss)

Пример #14
0
def chat(input_text):
    word_cnt, train_dict, train_reverse_dict = load_dict(DICT_FILE)

    LINE_BREAK = u'<Break>'
    WORD_DELIMITER = u'/'
    UNK_WORD = u'<UNK>'
    PADDING_WORD = u'<PAD>'
    START_WORD = u'<GO>'
    END_WORD = u'<EOS>'

    START_ID = train_dict[START_WORD]
    END_ID = train_dict[END_WORD]
    PAD_ID = train_dict[PADDING_WORD]
    UNK_ID = train_dict[UNK_WORD]

    #Attenion
    tf.reset_default_graph()

    RNN_CELL_TYPE = 'LSTMCell_Attention'
    learning_rate = 1.0

    encoder_length = 15
    decoder_length = 20
    embed_dim = 128

    cell = tf.contrib.rnn.LSTMCell(embed_dim)
    num_encoder_symbols = VOCAB_SIZE
    num_decoder_symbols = VOCAB_SIZE
    embedding_size = embed_dim

    encoder_len_placeholder = tf.placeholder(tf.int32)

    encoder_placeholders = [
        tf.placeholder(tf.int32, shape=[None], name="encoder_%d" % i)
        for i in range(encoder_length)
    ]
    decoder_placeholders = [
        tf.placeholder(tf.int32, shape=[None], name="decoder_%d" % i)
        for i in range(decoder_length)
    ]
    target_placeholders = [
        tf.placeholder(tf.int32, shape=[None], name="target_%d" % i)
        for i in range(decoder_length)
    ]
    target_weights_placeholders = [
        tf.placeholder(tf.float32, shape=[None], name="decoder_weight_%d" % i)
        for i in range(decoder_length)
    ]
    outputs, states = embedding_attention_seq2seq(encoder_placeholders,
                                                  decoder_placeholders,
                                                  cell,
                                                  num_encoder_symbols,
                                                  num_decoder_symbols,
                                                  embedding_size,
                                                  output_projection=None,
                                                  feed_previous=False)

    loss = sequence_loss(outputs, target_placeholders,
                         target_weights_placeholders)
    #train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    #train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    train_step = tf.train.AdagradOptimizer(learning_rate).minimize(loss)

    saver = tf.train.Saver()
    sess = tf.Session()

    #sess.run(tf.global_variables_initializer())
    saved_model = MODEL_FILE
    #print('Loading model from:', saved_model)

    #t0 = time.time()
    saver.restore(sess, saved_model)
    #t1 = time.time()
    #print(t1-t0)

    #input_text = u'你要去哪?'
    output_text = generate_response(sess, input_text, train_dict,
                                    train_reverse_dict, encoder_length,
                                    decoder_length, PAD_ID, UNK_ID, START_ID,
                                    END_ID, cell, embed_dim, VOCAB_SIZE,
                                    encoder_placeholders, decoder_placeholders,
                                    target_weights_placeholders)
    #print(output_text.encode("utf-8"))
    return output_text
Пример #15
0
    def create_network(self):
        self.seq2seq_model = "embedding_attention"
        mode = "train"
        GO_VALUE = self.out_max_int + 1

        self.net = tflearn.input_data(shape=[None, self.in_seq_len],
                                      dtype=tf.int32,
                                      name="XY")
        encoder_inputs = tf.slice(self.net, [0, 0], [-1, self.in_seq_len],
                                  name="enc_in")  # get encoder inputs
        encoder_inputs = tf.unstack(
            encoder_inputs,
            axis=1)  # transform to list of self.in_seq_len elements, each [-1]

        decoder_inputs = tf.slice(self.net, [0, 0], [-1, self.out_seq_len],
                                  name="dec_in")
        decoder_inputs = tf.unstack(
            decoder_inputs,
            axis=1)  # transform into list of self.out_seq_len elements

        go_input = tf.multiply(tf.ones_like(decoder_inputs[0], dtype=tf.int32),
                               GO_VALUE)
        decoder_inputs = [
            go_input
        ] + decoder_inputs[:self.out_seq_len -
                           1]  # insert GO as first; drop last decoder input

        feed_previous = not (mode == "train")

        self.n_input_symbols = self.in_max_int + 1  # default is integers from 0 to 9
        self.n_output_symbols = self.out_max_int + 2  # extra "GO" symbol for decoder inputs

        cells = []
        for _ in range(3):
            cells.append(self.getCell(128))

        cell = rnn.MultiRNNCell(cells)

        if self.seq2seq_model == "embedding_rnn":
            model_outputs, states = seq2seq.embedding_rnn_seq2seq(
                encoder_inputs,
                # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=1000,
                feed_previous=feed_previous)
        elif self.seq2seq_model == "embedding_attention":
            model_outputs, states = seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=1000,
                num_heads=4,
                initial_state_attention=False,
                feed_previous=feed_previous)
        else:
            raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' %
                            self.seq2seq_model)

        tf.add_to_collection(
            tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model",
            model_outputs)  # for TFLearn to know what to save and restore
        self.net = tf.stack(
            model_outputs, axis=1
        )  # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols]

        with tf.name_scope(
                "TargetsData"
        ):  # placeholder for target variable (i.e. trainY input)
            targetY = tf.placeholder(shape=[None, self.out_seq_len],
                                     dtype=tf.int32,
                                     name="Y")

        self.net = tflearn.regression(self.net,
                                      placeholder=targetY,
                                      optimizer='adam',
                                      learning_rate=0.001,
                                      loss=self.sequence_loss,
                                      metric=self.accuracy,
                                      name="Y")

        self.model = tflearn.DNN(self.net)
Пример #16
0
    def model(self,
              mode="train",
              num_layers=2,
              cell_size=32,
              cell_type="BasicLSTMCell",
              embedding_size=20,
              learning_rate=0.0001,
              tensorboard_verbose=0,
              checkpoint_path=None):

        assert mode in ["train", "predict"]

        checkpoint_path = checkpoint_path or (
            "%s%ss2s_checkpoint.tfl" %
            (self.data_dir or "", "/" if self.data_dir else ""))
        GO_VALUE = self.out_max_int + 1  # unique integer value used to trigger decoder outputs in the seq2seq RNN

        network = tflearn.input_data(
            shape=[None, self.in_seq_len + self.out_seq_len],
            dtype=tf.int32,
            name="XY")
        encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len],
                                  name="enc_in")  # get encoder inputs
        encoder_inputs = tf.unstack(
            encoder_inputs, axis=1
        )  # transform into list of self.in_seq_len elements, each [-1]

        decoder_inputs = tf.slice(network, [0, self.in_seq_len],
                                  [-1, self.out_seq_len],
                                  name="dec_in")  # get decoder inputs
        decoder_inputs = tf.unstack(
            decoder_inputs, axis=1
        )  # transform into list of self.out_seq_len elements, each [-1]

        go_input = tf.multiply(
            tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE
        )  # insert "GO" symbol as the first decoder input; drop the last decoder input
        decoder_inputs = [
            go_input
        ] + decoder_inputs[:self.out_seq_len -
                           1]  # insert GO as first; drop last decoder input

        feed_previous = not (mode == "train")

        self.n_input_symbols = self.in_max_int + 1  # default is integers from 0 to 9
        self.n_output_symbols = self.out_max_int + 2  # extra "GO" symbol for decoder inputs

        single_cell = getattr(core_rnn_cell, cell_type)(cell_size,
                                                        state_is_tuple=True)
        if num_layers == 1:
            cell = single_cell
        else:
            cell = core_rnn_cell.MultiRNNCell([single_cell] * num_layers)

        if self.seq2seq_model == "embedding_rnn":
            model_outputs, states = legacy_seq2seq.embedding_rnn_seq2seq(
                encoder_inputs,
                # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=embedding_size,
                feed_previous=feed_previous)
        elif self.seq2seq_model == "embedding_attention":
            model_outputs, states = legacy_seq2seq.embedding_attention_seq2seq(
                encoder_inputs,
                # encoder_inputs: A list of 2D Tensors [batch_size, input_size].
                decoder_inputs,
                cell,
                num_encoder_symbols=self.n_input_symbols,
                num_decoder_symbols=self.n_output_symbols,
                embedding_size=embedding_size,
                num_heads=1,
                initial_state_attention=False,
                feed_previous=feed_previous)
        else:
            raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' %
                            self.seq2seq_model)

        tf.add_to_collection(
            tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model",
            model_outputs)  # for TFLearn to know what to save and restore

        # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs.

        network = tf.stack(
            model_outputs, axis=1
        )  # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols]

        with tf.name_scope(
                "TargetsData"
        ):  # placeholder for target variable (i.e. trainY input)
            targetY = tf.placeholder(shape=[None, self.out_seq_len],
                                     dtype=tf.int32,
                                     name="Y")

        network = tflearn.regression(network,
                                     placeholder=targetY,
                                     optimizer='adam',
                                     learning_rate=learning_rate,
                                     loss=self.sequence_loss,
                                     metric=self.accuracy,
                                     name="Y")

        model = tflearn.DNN(network,
                            tensorboard_verbose=tensorboard_verbose,
                            checkpoint_path=checkpoint_path)
        return model
Пример #17
0
    def create_network(self, in_vocab_size, out_vocab_size, model_name="bidirectional_attention_rnn",
                       in_seq_len=15, out_seq_len=1, num_layers=4, memory_size=128, embedding_size=200, num_heads=4, scope="asdl"):

        GO_VALUE = out_vocab_size + 1

        def get_cell(size):
            return rnn.GRUCell(size)

        def cell_layers(layers=num_layers, mem=memory_size):
            cells = []
            for _ in range(layers):
                cells.append(get_cell(mem))
            return rnn.MultiRNNCell(cells)

        net = tflearn.input_data(shape=[None, in_seq_len], dtype=tf.int32, name=scope+"XY")
        encoder_inputs = tf.slice(net, [0, 0], [-1, in_seq_len], name=scope+"enc_in")

        # transform to list of self.in_seq_len elements, each [-1]
        encoder_inputs = tf.unstack(encoder_inputs, axis=1)

        decoder_inputs = tf.slice(net, [0, 0], [-1, out_seq_len], name=scope+"dec_in")
        # transform to list of self.in_seq_len elements, each [-1]
        decoder_inputs = tf.unstack(decoder_inputs, axis=1)

        # insert GO as first; drop last decoder input
        go_input = tf.multiply(tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE)
        decoder_inputs = [go_input] + decoder_inputs[: out_seq_len - 1]

        n_input_symbols = in_vocab_size + 1
        n_output_symbols = out_vocab_size + 2  # extra "GO" symbol for decoder inputs

        if model_name == "bidirectional_attention_rnn":
            model_outputs, states = self.embedding_attention_bidirectional_seq2seq(encoder_inputs,
                                                                                   decoder_inputs,
                                                                                   cell_layers(),
                                                                                   cell_layers(),
                                                                                   cell_layers(),
                                                                                   num_encoder_symbols=n_input_symbols,
                                                                                   num_decoder_symbols=n_output_symbols,
                                                                                   embedding_size=embedding_size,
                                                                                   num_heads=num_heads,
                                                                                   feed_previous=True,
                                                                                   scope=scope)
        elif model_name == "embedding_attention":
            model_outputs, states = seq2seq.embedding_attention_seq2seq(encoder_inputs,
                                                                        decoder_inputs,
                                                                        cell_layers(),
                                                                        num_encoder_symbols=n_input_symbols,
                                                                        num_decoder_symbols=n_output_symbols,
                                                                        embedding_size=embedding_size,
                                                                        num_heads=num_heads,
                                                                        initial_state_attention=False,
                                                                        feed_previous=True)
        else:
            raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model)

        # for TFLearn to know what to save and restore
        tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + scope + "_seq2seq_model", model_outputs)

        # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols]
        net = tf.stack(model_outputs, axis=1)

        # placeholder for target variable (i.e. trainY input)
        with tf.name_scope(scope + "TargetsData"):
            targetY = tf.placeholder(shape=[None, out_seq_len], dtype=tf.int32, name=scope+"Y")

        net = tflearn.regression(net, placeholder=targetY,
                                      optimizer='adam',
                                      learning_rate=0.00005,
                                      loss=self.sequence_loss,
                                      metric=self.accuracy,
                                      name=scope+"Y")
        return tflearn.DNN(net)