def get_model(feed_previous=False): learning_rate = tf.Variable(float(init_learning_rate), trainable=False, dtype=tf.float32) learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9) encoder_inputs = [] decoder_inputs = [] target_weights = [] for i in range(input_seq_len): encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in range(output_seq_len + 1): decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) for i in range(output_seq_len): target_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) targets = [decoder_inputs[i + 1] for i in range(output_seq_len)] cell = tf.contrib.rnn.BasicLSTMCell(size) outputs, _ = seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs[:output_seq_len], cell, num_encoder_symbols=num_encoder_symbols, num_decoder_symbols=num_decoder_symbols, embedding_size=size, output_projection=None, feed_previous=feed_previous, dtype=tf.float32) loss = seq2seq.sequence_loss(outputs, targets, target_weights) opt = tf.train.GradientDescentOptimizer(learning_rate) update = opt.apply_gradients(opt.compute_gradients(loss)) saver = tf.train.Saver(tf.global_variables()) return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
def get_model(feed_previous=False): """构造模型 """ learning_rate = tf.Variable(float(init_learning_rate), trainable=False, dtype=tf.float32) learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9) encoder_inputs = [] decoder_inputs = [] target_weights = [] for i in range(input_seq_len): encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in range(output_seq_len + 1): decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) for i in range(output_seq_len): target_weights.append( tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) # decoder_inputs左移一个时序作为targets targets = [decoder_inputs[i + 1] for i in range(output_seq_len)] # cell = tf.contrib.rnn.BasicLSTMCell(size) dropout = 1 num_layers = 3 cell = tf.contrib.rnn.BasicLSTMCell(size) cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=dropout) cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers) # 纵向上有两个LSTM # 这里输出的状态我们不需要 outputs, _ = seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs[:output_seq_len], cell, num_encoder_symbols=num_encoder_symbols, num_decoder_symbols=num_decoder_symbols, embedding_size=size, output_projection=None, feed_previous=feed_previous, dtype=tf.float32) # 计算加权交叉熵损失 loss = seq2seq.sequence_loss(outputs, targets, target_weights) # 梯度下降优化器 opt = tf.train.AdamOptimizer(learning_rate).minimize(loss) # 优化目标:让loss最小化 # update = opt.apply_gradients(opt.compute_gradients(loss)) # 模型持久化 saver = tf.train.Saver(tf.global_variables()) return encoder_inputs, decoder_inputs, target_weights, outputs, loss, opt, saver, learning_rate_decay_op, learning_rate
def get_model(feed_previous=False): """ 构造模型:seq2seq feed_previous表示decoder_inputs是我们直接提供训练数据的输入, 还是用前一个RNNCell的输出映射出来的,如果feed_previous为True, 那么就是用前一个RNNCell的输出,并经过Wx+b线性变换成 """ learning_rate = tf.Variable(float(init_learning_rate), trainable=False, dtype=tf.float32) learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9) encoder_inputs = [] decoder_inputs = [] target_weights = [] for i in range(input_seq_len): encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in range(output_seq_len + 1): decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) for i in range(output_seq_len): target_weights.append( tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) # decoder_inputs左移一个时序作为targets targets = [decoder_inputs[i + 1] for i in range(output_seq_len)] cell = tf.contrib.rnn.BasicLSTMCell(size) # 这里输出的状态我们不需要 outputs, _ = seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs[:output_seq_len], cell, num_encoder_symbols=num_encoder_symbols, num_decoder_symbols=num_decoder_symbols, embedding_size=size, output_projection=None, feed_previous=feed_previous, dtype=tf.float32) # 计算加权交叉熵损失 loss = seq2seq.sequence_loss(outputs, targets, target_weights) # 梯度下降优化器 opt = tf.train.GradientDescentOptimizer(learning_rate) # 优化目标:让loss最小化 update = opt.apply_gradients(opt.compute_gradients(loss)) # 模型持久化 saver = tf.train.Saver(tf.global_variables()) return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
def EmbeddingAttentionSeq2SeqNoTuple(enc_inp, dec_inp, feed_previous): cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False) return seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols, num_decoder_symbols, embedding_size=2, feed_previous=feed_previous)
def GRUSeq2Seq(enc_inp, dec_inp): cell = core_rnn_cell_impl.MultiRNNCell( [core_rnn_cell_impl.GRUCell(24)] * 2, state_is_tuple=True) return seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=classes, num_decoder_symbols=classes, embedding_size=24)
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return seq2seq.embedding_attention_seq2seq(encoder_inputs=encoder_inputs, decoder_inputs=decoder_inputs, cell=cell, num_encoder_symbols=self.vocab_size, num_decoder_symbols=self.vocab_size, embedding_size=embedding_size, output_projection=output_projection, feed_previous=do_decode, beam_search=beam_search, beam_size=beam_size)
def GRUSeq2Seq(enc_inp, dec_inp): cell = rnn_cell.MultiRNNCell( [rnn_cell.GRUCell(24) for _ in range(2)], state_is_tuple=True) return seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=classes, num_decoder_symbols=classes, embedding_size=24, output_projection=(w, b))
def get_model(): # 这个方法需要的参数分别是:inputs_tensor,decoder_tensor,cell,类似与vocab_size的symbols,虽然我不知道encoder_symbolsy有什么用 # 然后是embed_size,应该和cell的size一样,然后是需不需要softmax,decode_inputs是来自前面的RNNcell还是我们自己输入,最后是数 # 据类型 ''' embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False ) ''' encoder_inputs = [] decoder_inputs = [] targets_weigh = [] for i in range(input_seq_len): encoder_inputs.append(tf.placeholder(shape=[None],dtype=tf.int32,name="encoder{0}".format(i))) for i in range(output_seq_len): decoder_inputs.append(tf.placeholder(shape=[None],dtype=tf.int32,name="decode{0}".format(i))) for i in range(output_seq_len): targets_weigh.append( tf.placeholder(shape=[None],dtype=tf.float32,name="weight{0}".format(i)) ) targets = [decoder_inputs[i] for i in range(1,output_seq_len)] targets.append(np.zeros(shape=[2],dtype=np.int32)) cell = tf.nn.rnn_cell.BasicLSTMCell(size) outputs,_ = seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=num_encoder_symbols, num_decoder_symbols=num_decoder_symbols, embedding_size=size, output_projection=None, feed_previous=False, dtype=tf.float32 ) loss = seq2seq.sequence_loss( outputs,targets,targets_weigh ) opt = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) update = opt.apply_gradients(opt.compute_gradients(loss)) saver = tf.train.Saver(tf.global_variables()) return encoder_inputs,decoder_inputs,targets_weigh,outputs,loss,update,saver pass
def get_model(feed_previous=False): learning_rate = tf.Variable(float(init_learning_rate), trainable=False, dtype=tf.float32) learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9) encoder_inputs = [] decoder_inputs = [] target_weights = [] for i in range(input_seq_len): encoder_inputs.append( tf.compat.v1.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in range(output_seq_len + 1): decoder_inputs.append( tf.compat.v1.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) for i in range(output_seq_len): target_weights.append( tf.compat.v1.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) # decoder_inputs左移一个时序作为targets targets = [decoder_inputs[i + 1] for i in range(output_seq_len)] cell = tf.contrib.rnn.BasicLSTMCell(size) # 这里输出的状态我们不需要 outputs, _ = seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs[:output_seq_len], cell, num_encoder_symbols=num_encoder_symbols, num_decoder_symbols=num_decoder_symbols, embedding_size=size, output_projection=None, # 是一个(W, B)结构的tuple,W是shape为[output_size x num_decoder_symbols]的weight矩阵,B是shape为[num_decoder_symbols]的偏置向量 feed_previous=feed_previous, dtype=tf.float32) # 计算交叉熵损失 loss = seq2seq.sequence_loss(outputs, targets, target_weights) # 梯度下降优化器 opt = tf.compat.v1.train.GradientDescentOptimizer(learning_rate) # 优化目标:让loss最小化 update = opt.apply_gradients(opt.compute_gradients(loss)) # 模型持久化,保存所有的变量 saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables()) return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
def get_model(feed_previous=False): """构造模型 """ learning_rate = tf.Variable(float(init_learning_rate), trainable=False, dtype=tf.float32) learning_rate_decay_op = learning_rate.assign(learning_rate * 0.99) encoder_inputs = [] decoder_inputs = [] target_weights = [] for i in range(input_seq_len): encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in range(output_seq_len + 1): decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) for i in range(output_seq_len): target_weights.append( tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) # decoder_inputs左移一个时序作为targets targets = [decoder_inputs[i + 1] for i in range(output_seq_len)] cell = tf.contrib.rnn.LSTMCell(size) # 这里输出的状态我们不需要 outputs, _ = seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs[:output_seq_len], cell, num_encoder_symbols=num_encoder_symbols, num_decoder_symbols=num_decoder_symbols, embedding_size=size, output_projection=None, feed_previous=feed_previous, dtype=tf.float32) # 计算加权交叉熵损失 loss = seq2seq.sequence_loss(outputs, targets, target_weights) # 梯度下降优化器 update = tf.train.AdamOptimizer(learning_rate).minimize(loss) # 模型持久化 saver = tf.train.Saver(tf.global_variables()) return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
def get_model(feed_previous=False): """构造模型 """ encoder_inputs = [] decoder_inputs = [] target_weights = [] for i in xrange(input_seq_len): encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(output_seq_len + 1): decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) for i in xrange(output_seq_len): target_weights.append( tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) # decoder_inputs左移一个时序作为targets targets = [decoder_inputs[i + 1] for i in xrange(output_seq_len)] cell = tf.contrib.rnn.BasicLSTMCell(size) # 这里输出的状态我们不需要 outputs, _ = seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs[:output_seq_len], cell, num_encoder_symbols=num_encoder_symbols, num_decoder_symbols=num_decoder_symbols, embedding_size=size, output_projection=None, feed_previous=feed_previous, dtype=tf.float32) # 计算加权交叉熵损失 loss = seq2seq.sequence_loss(outputs, targets, target_weights) # 梯度下降优化器 opt = tf.train.GradientDescentOptimizer(learning_rate) # 优化目标:让loss最小化 update = opt.apply_gradients(opt.compute_gradients(loss)) # 模型持久化 saver = tf.train.Saver(tf.global_variables()) return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver
def get_model(feed_previous=False): """构造模型 """ learning_rate = tf.Variable(float(init_learning_rate), trainable=False, dtype=tf.float32) learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9) encoder_inputs = [] decoder_inputs = [] target_weights = [] for i in xrange(input_seq_len): encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(output_seq_len + 1): decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) for i in xrange(output_seq_len): target_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i))) # decoder_inputs左移一个时序作为targets targets = [decoder_inputs[i + 1] for i in xrange(output_seq_len)] cell = tf.contrib.rnn.BasicLSTMCell(size) # 这里输出的状态我们不需要 outputs, _ = seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs[:output_seq_len], cell, num_encoder_symbols=num_encoder_symbols, num_decoder_symbols=num_decoder_symbols, embedding_size=size, output_projection=None, feed_previous=feed_previous, dtype=tf.float32) # 计算加权交叉熵损失 loss = seq2seq.sequence_loss(outputs, targets, target_weights) # 梯度下降优化器 opt = tf.train.GradientDescentOptimizer(learning_rate) # 优化目标:让loss最小化 update = opt.apply_gradients(opt.compute_gradients(loss)) # 模型持久化 saver = tf.train.Saver(tf.global_variables()) return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
def model(self, mode="train", num_layers=1, cell_size=128, cell_type="BasicLSTMCell", embedding_size=20, learning_rate=0.001, tensorboard_verbose=0, checkpoint_path=None): ''' Build tensor specifying graph of operations for the seq2seq neural network model. mode = string, either "train" or "predict" cell_type = attribute of rnn_cell specifying which RNN cell type to use cell_size = size for the hidden layer in the RNN cell num_layers = number of RNN cell layers to use Return TFLearn model instance. Use DNN model for this. ''' assert mode in ["train", "predict"] checkpoint_path = checkpoint_path or ( "%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else "")) GO_VALUE = self.out_max_int + 1 # unique integer value used to trigger decoder outputs in the seq2seq RNN tflearn.config.init_graph(seed=None, log_device=False, num_cores=int(cpu_count() * 2 / 3), gpu_memory_fraction=0, soft_placement=True) network = tflearn.input_data( shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack( encoder_inputs, axis=1 ) # transform into list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len], name="dec_in") # get decoder inputs decoder_inputs = tf.unstack( decoder_inputs, axis=1 ) # transform into list of self.out_seq_len elements, each [-1] go_input = tf.multiply( tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE ) # insert "GO" symbol as the first decoder input; drop the last decoder input decoder_inputs = [ go_input ] + decoder_inputs[:self.out_seq_len - 1] # insert GO as first; drop last decoder input feed_previous = not (mode == "train") if self.verbose > 3: print("feed_previous = %s" % str(feed_previous)) print("encoder inputs: %s" % str(encoder_inputs)) print("decoder inputs: %s" % str(decoder_inputs)) print("len decoder inputs: %s" % len(decoder_inputs)) self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs single_cell = getattr(tf.nn.rnn_cell, cell_type)(cell_size, state_is_tuple=True) if num_layers == 1: print("rnn net later number:{}".format(num_layers)) cell = single_cell else: print("rnn net later number:{}".format(num_layers)) cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) if self.seq2seq_model == "embedding_rnn": model_outputs, states = seq2seq.embedding_rnn_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, feed_previous=feed_previous) elif self.seq2seq_model == "embedding_attention": model_outputs, states = seq2seq.embedding_attention_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, num_heads=1, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection( tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. if self.verbose > 2: print("model outputs: %s" % model_outputs) network = tf.stack( model_outputs, axis=1 ) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] if self.verbose > 2: print("packed model outputs: %s" % network) if self.verbose > 3: all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) print("all_vars = %s" % all_vars) with tf.name_scope( "TargetsData" ): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") network = tflearn.regression(network, placeholder=targetY, optimizer='adam', learning_rate=learning_rate, loss=self.sequence_loss, metric=self.accuracy, name="Y") model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path) return model
def create_network(self): self.seq2seq_model = "embedding_attention" mode = "train" GO_VALUE = self.out_max_int + 1 self.net = tflearn.input_data(shape=[None, self.in_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(self.net, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack( encoder_inputs, axis=1) #transform to list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(self.net, [0, 0], [-1, self.out_seq_len], name="dec_in") decoder_inputs = tf.unstack( decoder_inputs, axis=1) # transform into list of self.out_seq_len elements go_input = tf.multiply(tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE) decoder_inputs = [ go_input ] + decoder_inputs[:self.out_seq_len - 1] # insert GO as first; drop last decoder input feed_previous = not (mode == "train") self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs cell = rnn.MultiRNNCell([ rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128), rnn.GRUCell(128) ]) if self.seq2seq_model == "embedding_rnn": model_outputs, states = seq2seq.embedding_rnn_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=200, feed_previous=feed_previous) elif self.seq2seq_model == "embedding_attention": model_outputs, states = seq2seq.embedding_attention_seq2seq( encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=200, num_heads=1, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection( tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore self.net = tf.stack( model_outputs, axis=1 ) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] with tf.name_scope( "TargetsData" ): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") self.net = tflearn.regression(self.net, placeholder=targetY, optimizer='adam', learning_rate=0.00005, loss=self.sequence_loss, metric=self.accuracy, name="Y") self.model = tflearn.DNN(self.net)
def model(self, mode="train", num_layers=1, cell_size=128, cell_type="BasicLSTMCell", embedding_size=20, learning_rate=0.001, tensorboard_verbose=0, checkpoint_path=None): ''' Build tensor specifying graph of operations for the seq2seq neural network model. mode = string, either "train" or "predict" cell_type = attribute of rnn_cell specifying which RNN cell type to use cell_size = size for the hidden layer in the RNN cell num_layers = number of RNN cell layers to use Return TFLearn model instance. Use DNN model for this. ''' assert mode in ["train", "predict"] checkpoint_path = checkpoint_path or ( "%s%ss2s_checkpoint.tfl" % (self.data_dir or "", "/" if self.data_dir else "")) GO_VALUE = self.out_max_int + 1 # unique integer value used to trigger decoder outputs in the seq2seq RNN tflearn.config.init_graph(seed=None, log_device=False, num_cores=int(cpu_count() * 2 / 3), gpu_memory_fraction=0, soft_placement=True) network = tflearn.input_data(shape=[None, self.in_seq_len + self.out_seq_len], dtype=tf.int32, name="XY") encoder_inputs = tf.slice(network, [0, 0], [-1, self.in_seq_len], name="enc_in") # get encoder inputs encoder_inputs = tf.unstack(encoder_inputs, axis=1) # transform into list of self.in_seq_len elements, each [-1] decoder_inputs = tf.slice(network, [0, self.in_seq_len], [-1, self.out_seq_len], name="dec_in") # get decoder inputs decoder_inputs = tf.unstack(decoder_inputs, axis=1) # transform into list of self.out_seq_len elements, each [-1] go_input = tf.multiply(tf.ones_like(decoder_inputs[0], dtype=tf.int32), GO_VALUE) # insert "GO" symbol as the first decoder input; drop the last decoder input decoder_inputs = [go_input] + decoder_inputs[ : self.out_seq_len - 1] # insert GO as first; drop last decoder input feed_previous = not (mode == "train") if self.verbose > 3: print("feed_previous = %s" % str(feed_previous)) print("encoder inputs: %s" % str(encoder_inputs)) print("decoder inputs: %s" % str(decoder_inputs)) print("len decoder inputs: %s" % len(decoder_inputs)) self.n_input_symbols = self.in_max_int + 1 # default is integers from 0 to 9 self.n_output_symbols = self.out_max_int + 2 # extra "GO" symbol for decoder inputs single_cell = getattr(tf.nn.rnn_cell, cell_type)(cell_size, state_is_tuple=True) if num_layers == 1: print("rnn net later number:{}".format(num_layers)) cell = single_cell else: print("rnn net later number:{}".format(num_layers)) cell = rnn_cell.MultiRNNCell([single_cell] * num_layers) if self.seq2seq_model == "embedding_rnn": model_outputs, states = seq2seq.embedding_rnn_seq2seq(encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, feed_previous=feed_previous) elif self.seq2seq_model == "embedding_attention": model_outputs, states = seq2seq.embedding_attention_seq2seq(encoder_inputs, # encoder_inputs: A list of 2D Tensors [batch_size, input_size]. decoder_inputs, cell, num_encoder_symbols=self.n_input_symbols, num_decoder_symbols=self.n_output_symbols, embedding_size=embedding_size, num_heads=1, initial_state_attention=False, feed_previous=feed_previous) else: raise Exception('[TFLearnSeq2Seq] Unknown seq2seq model %s' % self.seq2seq_model) tf.add_to_collection(tf.GraphKeys.LAYER_VARIABLES + '/' + "seq2seq_model", model_outputs) # for TFLearn to know what to save and restore # model_outputs: list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x output_size] containing the generated outputs. if self.verbose > 2: print("model outputs: %s" % model_outputs) network = tf.stack(model_outputs, axis=1) # shape [-1, n_decoder_inputs (= self.out_seq_len), num_decoder_symbols] if self.verbose > 2: print("packed model outputs: %s" % network) if self.verbose > 3: all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) print("all_vars = %s" % all_vars) with tf.name_scope("TargetsData"): # placeholder for target variable (i.e. trainY input) targetY = tf.placeholder(shape=[None, self.out_seq_len], dtype=tf.int32, name="Y") network = tflearn.regression(network, placeholder=targetY, optimizer='adam', learning_rate=learning_rate, loss=self.sequence_loss, metric=self.accuracy, name="Y") model = tflearn.DNN(network, tensorboard_verbose=tensorboard_verbose, checkpoint_path=checkpoint_path) return model
def testEmbeddingAttentionSeq2Seq(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): enc_inp = [ constant_op.constant( 1, dtypes.int32, shape=[2]) for i in range(2) ] dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] cell_fn = lambda: core_rnn_cell_impl.BasicLSTMCell(2) cell = cell_fn() dec, mem = seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 5), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].c.shape) self.assertEqual((2, 2), res[0].h.shape) # Test with state_is_tuple=False. with variable_scope.variable_scope("no_tuple"): cell_fn = functools.partial( core_rnn_cell_impl.BasicLSTMCell, 2, state_is_tuple=False) cell_nt = cell_fn() dec, mem = seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp, cell_nt, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 5), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 4), res[0].shape) # Test externally provided output projection. w = variable_scope.get_variable("proj_w", [2, 5]) b = variable_scope.get_variable("proj_b", [5]) with variable_scope.variable_scope("proj_seq2seq"): dec, _ = seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp, cell_fn(), num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, output_projection=(w, b)) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 2), res[0].shape)
# add one more target targets.append(tf.placeholder(dtype = tf.int32, shape = [None], name = 'last_target')) target_weights = [tf.placeholder(dtype = tf.float32, shape = [None], name = 'target_w{}'.format(i)) for i in range(output_seq_len)] # output projection size = 512 w_t = tf.get_variable('proj_w', [en_vocab_size, size], tf.float32) b = tf.get_variable('proj_b', [en_vocab_size], tf.float32) w = tf.transpose(w_t) output_projection = (w, b) outputs, states = seq2seq_lib.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, BasicLSTMCell(size), num_encoder_symbols = zh_vocab_size, num_decoder_symbols = en_vocab_size, embedding_size = 100, feed_previous = False, output_projection = output_projection, dtype = tf.float32) # define our loss function def sampled_loss(labels, logits): return tf.nn.sampled_softmax_loss( weights = w_t, biases = b, labels = tf.reshape(labels, [-1, 1]), inputs = logits, num_sampled = 512, num_classes = en_vocab_size)
def testEmbeddingAttentionSeq2Seq(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): enc_inp = [ constant_op.constant( 1, dtypes.int32, shape=[2]) for i in range(2) ] dec_inp = [ constant_op.constant( i, dtypes.int32, shape=[2]) for i in range(3) ] cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=True) dec, mem = seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 5), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 2), res[0].c.shape) self.assertEqual((2, 2), res[0].h.shape) # Test with state_is_tuple=False. with variable_scope.variable_scope("no_tuple"): cell = core_rnn_cell_impl.BasicLSTMCell(2, state_is_tuple=False) dec, mem = seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 5), res[0].shape) res = sess.run([mem]) self.assertEqual((2, 4), res[0].shape) # Test externally provided output projection. w = variable_scope.get_variable("proj_w", [2, 5]) b = variable_scope.get_variable("proj_b", [5]) with variable_scope.variable_scope("proj_seq2seq"): dec, _ = seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, output_projection=(w, b)) sess.run([variables.global_variables_initializer()]) res = sess.run(dec) self.assertEqual(3, len(res)) self.assertEqual((2, 2), res[0].shape) # Test that previous-feeding model ignores inputs after the first. dec_inp2 = [ constant_op.constant( 0, dtypes.int32, shape=[2]) for _ in range(3) ] with variable_scope.variable_scope("other"): d3, _ = seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp2, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, feed_previous=constant_op.constant(True)) sess.run([variables.global_variables_initializer()]) variable_scope.get_variable_scope().reuse_variables() d1, _ = seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, feed_previous=True) d2, _ = seq2seq_lib.embedding_attention_seq2seq( enc_inp, dec_inp2, cell, num_encoder_symbols=2, num_decoder_symbols=5, embedding_size=2, feed_previous=True) res1 = sess.run(d1) res2 = sess.run(d2) res3 = sess.run(d3) self.assertAllClose(res1, res2) self.assertAllClose(res1, res3)