def stacked_rnn(inputs, hidden_sizes, cell_fn, scope=None, dtype=dtypes.float32, reuse=False): with variable_scope.variable_scope(scope or "stacked_rnn", reuse=reuse) as varscope: # Create a new scope in which the caching device is either # determined by the parent scope, or is set to place the cached # Variable using the same placement as for the rest of the RNN. if not context.executing_eagerly(): if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) layers = [] fixed_hidden_sizes = hidden_sizes + [hidden_sizes[-1]] for idx, hidden_size in enumerate(fixed_hidden_sizes[:-1]): cell = cell_fn(hidden_size) if hidden_size != fixed_hidden_sizes[idx + 1]: cell = rnn.OutputProjectionWrapper(cell, fixed_hidden_sizes[idx + 1]) layers.append(cell) initial_states = tuple([ create_initial_state_placeholder(cell.state_size, dtype) for cell in layers ]) layers = rnn.MultiRNNCell(layers) outputs, states = rnn_ops.dynamic_rnn(layers, inputs, initial_state=initial_states, dtype=dtype, time_major=False) return outputs, states, initial_states, layers.zero_state
def rnn_model(self): cell = rnn.BasicLSTMCell(num_units=self.n_units) multi_cell = rnn.MultiRNNCell([cell] * self.n_layers) # we only need one output so get it wrapped to out one value which is next word index cell_wrapped = rnn.OutputProjectionWrapper(multi_cell, output_size=1) # get input embed embedding = tf.Variable(initial_value=tf.random_uniform( [self.vocab_size, self.n_units], -1.0, 1.0)) inputs = tf.nn.embedding_lookup(embedding, self.inputs) # what is inputs dim?? outputs, states = tf.nn.dynamic_rnn(cell_wrapped, inputs=inputs, dtype=tf.float32) outputs = tf.reshape( outputs, [int(outputs.get_shape()[0]), int(inputs.get_shape()[1])]) w = tf.Variable( tf.truncated_normal([int(inputs.get_shape()[1]), self.vocab_size])) b = tf.Variable(tf.zeros([self.vocab_size])) logits = tf.nn.bias_add(tf.matmul(outputs, w), b) return logits
def embedding_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, embedding_size, output_projection=None, feed_previous=False, dtype=None, scope=None): with variable_scope.variable_scope(scope or "embedding_rnn_seq2seq") as scope: if dtype is not None: scope.set_dtype(dtype) else: dtype = scope.dtype # Encoder encoder_cell = rnn.EmbeddingWrapper(cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder if output_projection is None: cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): return embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous) # 如果feed_previous是张量,我们构造2个图并进行cond def decoder(feed_previous_bool): if feed_previous_bool: reuse = None else: reuse = True with variable_scope.variable_scope(variable_scope.variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_rnn_decoder(decoder_inputs, encoder_state, cell, num_decoder_symbols, embedding_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def rnn_segment(features, targets, mode, params): seq_feature = features['seq_feature'] seq_length = features['seq_length'] with tf.variable_scope("emb"): embeddings = tf.get_variable( "char_emb", shape=[params['num_char'], params['emb_size']]) seq_emb = tf.nn.embedding_lookup(embeddings, seq_feature) batch_size = tf.shape(seq_feature)[0] time_step = tf.shape(seq_feature)[1] flat_seq_emb = tf.reshape( seq_emb, shape=[batch_size, time_step, (params['k'] + 1) * params['emb_size']]) cell = rnn.LSTMCell(params['rnn_units']) if mode == ModeKeys.TRAIN: cell = rnn.DropoutWrapper(cell, params['input_keep_prob'], params['output_keep_prob']) projection_cell = rnn.OutputProjectionWrapper(cell, params['num_class']) logits, _ = tf.nn.dynamic_rnn(projection_cell, flat_seq_emb, sequence_length=seq_length, dtype=tf.float32) weight_mask = tf.to_float(tf.sequence_mask(seq_length)) loss = seq2seq.sequence_loss(logits, targets, weights=weight_mask) train_op = layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=params["learning_rate"], optimizer=tf.train.AdamOptimizer, clip_gradients=params['grad_clip'], summaries=[ "learning_rate", "loss", "gradients", "gradient_norm", ]) pred_classes = tf.to_int32(tf.argmax(input=logits, axis=2)) pred_words = tf.logical_or(tf.equal(pred_classes, 0), tf.equal(pred_classes, 3)) target_words = tf.logical_or(tf.equal(targets, 0), tf.equal(targets, 3)) precision = metrics.streaming_precision(pred_words, target_words, weights=weight_mask) recall = metrics.streaming_recall(pred_words, target_words, weights=weight_mask) predictions = {"classes": pred_classes} eval_metric_ops = {"precision": precision, "recall": recall} return learn.ModelFnOps(mode, predictions, loss, train_op, eval_metric_ops=eval_metric_ops)
def rnn_model(self): # BasicLSTMCell 最基本的LSTM循环网络单元,添加forget_bias(默认值是1)到遗忘门的偏置。为了减少在开始训练时遗忘的规模,他 # 不允许单元有一个裁剪,映射层不允许有peep-hole连接,这是基准。 # BasicLSTMCell 的实现类在 rnn.python.ops下, core_rnn_cell_impl.py cell = rnn.BasicLSTMCell(num_units=self.n_units) # MultiRNNCell 这个函数有两个参数:第一个参数就是输入的RNN的实例形成的列表,第二个参数就是让状态是 # 一个元组,官方推荐是True state_is_tuple = True # 可以实现多层的LSTM网络,将前一层的输出作为后一层的输入 multi_cell = rnn.MultiRNNCell([cell] * self.n_layers) # we only need one output so get it wrapped to out one value which is next word index # 将 rnn_cell 的输出映射成想要的维度 output_size是映射后的size 返回一个带Output_projection的rnn_cell cell_wrapped = rnn.OutputProjectionWrapper(multi_cell, output_size=1) # get input embed # tf.random_uniform(shape, minval, maxval, dtype, seed, name) : 返回一个 n*n的矩阵,值产生于minval 和 maxval 之间 embedding = tf.Variable(initial_value=tf.random_uniform( [self.vocab_size, self.n_units], -1.0, 1.0)) # tf.nn.embedding_lokkup(embedding, inputs_id) : 根据inputs_id寻找embedding中对应的元素。比如,input_ids=[1,3,5],则 # 找出embedding中下标为1,3,5的向量组成一个矩阵返回。 inputs = tf.nn.embedding_lookup(embedding, self.inputs) # what is inputs dim?? # add initial state into dynamic rnn, if I am not result would be bad, I tried, don't know why if self.labels is not None: # zero_state ; 参数初始化 initial_state = cell_wrapped.zero_state(int(inputs.get_shape()[0]), tf.float32) else: initial_state = cell_wrapped.zero_state(1, tf.float32) # dynamic_rnn 实现的功能可以让不同迭代的batch是不同长度的数据,但同一次迭代一个batch内部的所有数据长度仍然是固定的。 # dynamic_rnn 和 rnn 比较 outputs, states = tf.nn.dynamic_rnn(cell_wrapped, inputs=inputs, dtype=tf.float32, initial_state=initial_state) outputs = tf.reshape( outputs, [int(outputs.get_shape()[0]), int(inputs.get_shape()[1])]) # truncated_normal : 截断分布,详见高斯分布 w = tf.Variable( tf.truncated_normal([int(inputs.get_shape()[1]), self.vocab_size])) b = tf.Variable(tf.zeros([self.vocab_size])) logits = tf.nn.bias_add(tf.matmul(outputs, w), b) return logits, states
def __init__(self, desynth, coding_size, neuron_count): cell = rnn.OutputProjectionWrapper( rnn.DropoutWrapper(rnn.LSTMCell( num_units=neuron_count, initializer=tf.variance_scaling_initializer(), activation=tf.nn.elu, ), input_keep_prob=0.7), output_size=coding_size, ) self.outputs, self.states = tf.nn.dynamic_rnn(cell, desynth, dtype=tf.float32)
def testOutputProjectionWrapper(self): with self.test_session() as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 3]) m = array_ops.zeros([1, 3]) cell = contrib_rnn.OutputProjectionWrapper(rnn_cell_impl.GRUCell(3), 2) g, new_m = cell(x, m) sess.run([variables_lib.global_variables_initializer()]) res = sess.run([g, new_m], { x.name: np.array([[1., 1., 1.]]), m.name: np.array([[0.1, 0.1, 0.1]]) }) self.assertEqual(res[1].shape, (1, 3)) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.231907, 0.231907]])
def embedding_attention_bidirectional_seq2seq(self, encoder_inputs, decoder_inputs, input_cell1, input_cell2, output_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=4, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): with tf.variable_scope(scope or "embedding_attention_bidirectional_seq2seq") as scope: # Encoder. encoder_cell1 = core_rnn_cell.EmbeddingWrapper(input_cell1, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_cell2 = core_rnn_cell.EmbeddingWrapper(input_cell2, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state1, encoder_state2 = core_rnn.static_bidirectional_rnn(encoder_cell1, encoder_cell2, encoder_inputs, dtype=tf.float32) # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, input_cell1.output_size + input_cell2.output_size]) for e in encoder_outputs] attention_states = array_ops.concat(top_states, 1) # Concatenate states of both enocders encoder_state = encoder_state1 + encoder_state2 # Decoder. output_size = None if output_projection is None: output_cell = rnn.OutputProjectionWrapper(output_cell, num_decoder_symbols) output_size = num_decoder_symbols assert isinstance(feed_previous, bool) return seq2seq.embedding_attention_decoder(decoder_inputs, encoder_state, attention_states, output_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention)
def _decoder(self, keep_prob, encoder_output, encoder_state, batch_size, scope, helper, reuse=None): with tf.variable_scope(scope, reuse=reuse): attention_states = encoder_output cell = rnn.MultiRNNCell([self._cell(keep_prob) for _ in range(self.lstm_dims)]) attention_mechanism = seq2seq.BahdanauAttention(self.hidden_size, attention_states) # attention decoder_cell = seq2seq.AttentionWrapper(cell, attention_mechanism, attention_layer_size=self.hidden_size // 2) decoder_cell = rnn.OutputProjectionWrapper(decoder_cell, self.hidden_size, reuse=reuse, activation=tf.nn.leaky_relu) decoder_initial_state = decoder_cell.zero_state(batch_size, tf.float32).clone(cell_state=encoder_state) output_layer = tf.layers.Dense(self.num_words, kernel_initializer=tf.contrib.layers.xavier_initializer(), activation=tf.nn.leaky_relu) decoder = seq2seq.BasicDecoder(decoder_cell, helper, decoder_initial_state, output_layer=output_layer) output, _, _ = seq2seq.dynamic_decode(decoder, maximum_iterations=self.max_sentence_length, impute_finished=True) # tf.summary.histogram('decoder', output) return output
def decoding_layer(self, rnn_inputs, encoder_output, encoder_state): decoder_cell = build_multicell(self.uni_layers, self.cell_size, self.keep_prob) attention_mech = seq2seq.BahdanauAttention(self.cell_size, encoder_output, self.in_length) attention_cell = seq2seq.AttentionWrapper(decoder_cell, attention_mech, self.cell_size / 2) decoder_cell = rnn.OutputProjectionWrapper(attention_cell, self.vocab_length) initial_state = decoder_cell.zero_state(self.batch_size, tf.float32) initial_state.clone(cell_state=encoder_state) with tf.variable_scope("decode"): train_logits = self.train_decoding_layer(rnn_inputs, decoder_cell, initial_state) with tf.variable_scope("decode", reuse=True): inference_logits = self.inference_decoding_layer( self.embeddings, decoder_cell, initial_state) return train_logits, inference_logits
def embedding_tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, num_symbols, embedding_size, num_decoder_symbols=None, output_projection=None, feed_previous=False, dtype=None, scope=None): with variable_scope.variable_scope(scope or "embedding_tied_rnn_seq2seq", dtype=dtype) as scope: dtype = scope.dtype proj_weights = ops.convert_to_tensor(output_projection[0], dtype=dtype) proj_weights.get_shape().assert_is_compatible_with([None, num_symbols]) proj_biases = ops.convert_to_tensor(output_projection[1], dtype=dtype) proj_biases.get_shape().assert_is_compatible_with([num_symbols]) embedding = variable_scope.variable_scope.get_variable("embedding", [num_symbols, embedding_size], dtype=dtype) emb_encoder_inputs = [embedding_ops.embedding_lookup(embedding, x) for x in encoder_inputs] emb_decoder_inputs = [embedding_ops.embedding_lookup(embedding, x) for x in decoder_inputs] output_symbols = num_symbols if num_decoder_symbols is not None: output_symbols = num_decoder_symbols if output_projection is None: cell = rnn.OutputProjectionWrapper(cell, output_symbols) if isinstance(feed_previous, bool): loop_function = _extract_argmax_and_embed( embedding, output_projection, True) if feed_previous else None return tied_rnn_seq2seq(emb_encoder_inputs, emb_decoder_inputs, cell, loop_function=loop_function, dtype=dtype) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): loop_function = _extract_argmax_and_embed( embedding, output_projection, False) if feed_previous_bool else None reuse = None if feed_previous_bool else True with variable_scope.variable_scope(variable_scope.variable_scope.get_variable_scope(),reuse=reuse): outputs, state = tied_rnn_seq2seq( emb_encoder_inputs, emb_decoder_inputs, cell, loop_function=loop_function, dtype=dtype) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] # Calculate zero-state to know it's structure. static_batch_size = encoder_inputs[0].get_shape()[0] for inp in encoder_inputs[1:]: static_batch_size.merge_with(inp.get_shape()[0]) batch_size = static_batch_size.value if batch_size is None: batch_size = array_ops.shape(encoder_inputs[0])[0] zero_state = cell.zero_state(batch_size, dtype) if nest.is_sequence(zero_state): state = nest.pack_sequence_as(structure=zero_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def _lstmnet( features, # This is batch_features from input_fn labels, # This is batch_labels from input_fn mode, # An instance of tf.estimator.ModeKeys params, is_test): with tf.variable_scope('EncoderNet') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): pkeep = params['pkeep'] else: pkeep = 1.0 x = tf.feature_column.input_layer( features, feature_columns=params['feature_columns']) X = tf.reshape(x, shape=[ x.get_shape()[0], params['sequence_length'], params['dimension'] ]) # X: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION] encoder_Hin = params['encoder_Hin'] # encoder_Hin: [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS] encoder_cells = [ rnn.GRUBlockCell(params['encoder_hidden_layer_size']) for _ in range(params['encoder_hidden_layer_depth']) ] # "naive dropout" implementation encoder_dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in encoder_cells ] encoder_multicell = rnn.MultiRNNCell(encoder_dropcells, state_is_tuple=False) # dropout for the softmax layer encoder_multicell = rnn.DropoutWrapper(encoder_multicell, output_keep_prob=pkeep) encoder_Yr, encoder_H = tf.nn.dynamic_rnn( encoder_multicell, X, dtype=tf.float32, initial_state=encoder_Hin, scope='EncoderNet', parallel_iterations=params['parallel_iters']) encoder_H = tf.identity(encoder_H, name='encoder_H') # just to give it a name # encoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, ENCODER_INTERNALSIZE ] # encoder_H: [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS ] # this is the last state in the sequence # Select last output. encoder_output = tf.transpose(encoder_Yr, [1, 0, 2]) # encoder_output: [ SEEQLEN, BATCH_SIZE, ENCODER_INTERNALSIZE ] last = tf.gather(encoder_output, int(encoder_output.get_shape()[0]) - 1) # last: [ BATCH_SIZE , ENCODER_INTERNALSIZE ] # Last layer to evaluate INTERNALSIZE LSTM output to bottleneck representation bottleneck = layers.fully_connected(last, params['bottleneck_size'], activation_fn=tf.nn.relu) encoded_V = bottleneck # bottleneck: [ BATCH_SIZE, BOTTLENECK_SIZE ] with tf.variable_scope('NetDecoder') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): pkeep = params['pkeep'] else: pkeep = 1.0 decoder_Hin = params['decoder_Hin'] # decoder_Hin: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS] # tile bottleneck layer tiled_bottleneck = tf.tile(tf.expand_dims(bottleneck, axis=1), multiples=[1, params['sequence_length'], 1]) # bottleneck_tiled: [ BATCH_SIZE, SEQUENCE_LENGTH, BOTTLENECK_SIZE ] decoder_cells = [ rnn.GRUBlockCell(params['decoder_hidden_layer_size']) for _ in range(params['decoder_hidden_layer_depth']) ] # "naive dropout" implementation decoder_dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in decoder_cells ] decoder_multicell = rnn.MultiRNNCell(decoder_dropcells, state_is_tuple=False) # dropout for the softmax layer decoder_multicell = rnn.DropoutWrapper(decoder_multicell, output_keep_prob=pkeep) # dense layer to adjust dimensions decoder_multicell = rnn.OutputProjectionWrapper( decoder_multicell, params['dimension']) decoded_Yr, decoded_H = tf.nn.dynamic_rnn( decoder_multicell, tiled_bottleneck, dtype=tf.float32, initial_state=decoder_Hin, scope='NetDecoder', parallel_iterations=params['parallel_iters']) decoded_H = tf.identity(decoded_H, name='decoded_H') # just to give it a name # decoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, DIMENSION ] # decoder_H: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS ] # this is the last state in the sequence return decoded_Yr, encoded_V # = bottleneck
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, enc_cell, dec_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, output_projection=None, feed_previous=False, dtype=None, scope=None, initial_state_attention=False): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Warning: when output_projection is None, the size of the attention vectors and variables will be made proportional to num_decoder_symbols, can be large. Args: encoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: A list of 1D int32 Tensors of shape [batch_size]. cell: tf.nn.rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: Integer; number of symbols on the encoder side. num_decoder_symbols: Integer; number of symbols on the decoder side. embedding_size: Integer, the length of the embedding vector for each symbol. num_heads: Number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: A tuple of the form (outputs, state), where: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. state: The state of each decoder cell at the final time-step. It is a 2D Tensor of shape [batch_size x cell.state_size]. """ with tf.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = enc_cell encoder_cell = rnn.EmbeddingWrapper( encoder_cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [ tf.reshape(e, [-1, 1, encoder_cell.output_size]) for e in encoder_outputs ] attention_states = tf.concat(top_states, 1) # Decoder. output_size = None if output_projection is None: dec_cell = rnn.OutputProjectionWrapper(dec_cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, dec_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with tf.variable_scope(tf.get_variable_scope(), reuse=reuse): outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, dec_cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = tf.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len( decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
INPUT_SIZE = 28 OUTPUT_SIZE = 10 NUM_STEPS = 28 BATCH_SIZE = 50 LEARNING_RATE = 0.0003 ITERATIONS = 2000 x = tf.placeholder(dtype=tf.float32,shape=[None,NUM_STEPS * INPUT_SIZE]) y = tf.placeholder(dtype=tf.float32,shape=[None,1,OUTPUT_SIZE]) reshape = tf.reshape(x,shape=[-1,NUM_STEPS,INPUT_SIZE]) cell = rnn.GRUCell(num_units=50,activation=tf.nn.relu) cell = rnn.OutputProjectionWrapper(cell,OUTPUT_SIZE) outputs,states = tf.nn.dynamic_rnn(cell,reshape,dtype=tf.float32) outputs = outputs[:,-1,:] loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=outputs,labels=y)) optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE) train = optimizer.minimize(loss) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) for i in range(ITERATIONS) : batch_x,batch_y = mnist.train.next_batch(BATCH_SIZE) batch_y = batch_y.reshape([-1,1,OUTPUT_SIZE]) sess.run(train,feed_dict={x:batch_x,y:batch_y})
def _build_forward(self): config = self.config N, M, JX, JQ, VW, VC, d, W = \ config.batch_size, config.max_num_sents, config.max_sent_size, \ config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \ config.max_word_size beam_width = config.beam_width GO_TOKEN = 0 EOS_TOKEN = 1 JX = tf.shape(self.x)[2] JQ = tf.shape(self.q)[1] M = tf.shape(self.x)[1] dc, dw, dco = config.char_emb_size, config.word_emb_size, config.char_out_size with tf.variable_scope("emb"): if config.use_char_emb: with tf.variable_scope("emb_var"), tf.device("/cpu:0"): char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float') with tf.variable_scope("char"): Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx) # [N, M, JX, W, dc] Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq) # [N, JQ, W, dc] Acx = tf.reshape(Acx, [-1, JX, W, dc]) Acq = tf.reshape(Acq, [-1, JQ, W, dc]) filter_sizes = list( map(int, config.out_channel_dims.split(','))) heights = list(map(int, config.filter_heights.split(','))) assert sum(filter_sizes) == dco, (filter_sizes, dco) with tf.variable_scope("conv"): xx = multi_conv1d(Acx, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx") if config.share_cnn_weights: tf.get_variable_scope().reuse_variables() qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="xx") else: qq = multi_conv1d(Acq, filter_sizes, heights, "VALID", self.is_train, config.keep_prob, scope="qq") xx = tf.reshape(xx, [-1, M, JX, dco]) qq = tf.reshape(qq, [-1, JQ, dco]) if config.use_word_emb: with tf.variable_scope("emb_var"), tf.device("/cpu:0"): if config.mode == 'train': word_emb_mat = tf.get_variable( "word_emb_mat", dtype='float', shape=[VW, dw], initializer=get_initializer(config.emb_mat), trainable=True) else: word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, dw], dtype='float') if config.use_glove_for_unk: word_emb_mat = tf.concat( axis=0, values=[word_emb_mat, self.new_emb_mat]) with tf.name_scope("word"): Ax = tf.nn.embedding_lookup(word_emb_mat, self.x) # [N, M, JX, d] Aq = tf.nn.embedding_lookup(word_emb_mat, self.q) # [N, JQ, d] self.tensor_dict['x'] = Ax self.tensor_dict['q'] = Aq if config.use_char_emb: xx = tf.concat(axis=3, values=[xx, Ax]) # [N, M, JX, di] qq = tf.concat(axis=2, values=[qq, Aq]) # [N, JQ, di] else: xx = Ax qq = Aq # highway network if config.highway: with tf.variable_scope("highway"): xx = highway_network(xx, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train) tf.get_variable_scope().reuse_variables() qq = highway_network(qq, config.highway_num_layers, True, wd=config.wd, is_train=self.is_train) self.tensor_dict['xx'] = xx self.tensor_dict['qq'] = qq cell_fw = BasicLSTMCell(d, state_is_tuple=True) cell_bw = BasicLSTMCell(d, state_is_tuple=True) d_cell_fw = SwitchableDropoutWrapper( cell_fw, self.is_train, input_keep_prob=config.input_keep_prob) d_cell_bw = SwitchableDropoutWrapper( cell_bw, self.is_train, input_keep_prob=config.input_keep_prob) cell2_fw = BasicLSTMCell(d, state_is_tuple=True) cell2_bw = BasicLSTMCell(d, state_is_tuple=True) d_cell2_fw = SwitchableDropoutWrapper( cell2_fw, self.is_train, input_keep_prob=config.input_keep_prob) d_cell2_bw = SwitchableDropoutWrapper( cell2_bw, self.is_train, input_keep_prob=config.input_keep_prob) cell3_fw = BasicLSTMCell(d, state_is_tuple=True) cell3_bw = BasicLSTMCell(d, state_is_tuple=True) d_cell3_fw = SwitchableDropoutWrapper( cell3_fw, self.is_train, input_keep_prob=config.input_keep_prob) d_cell3_bw = SwitchableDropoutWrapper( cell3_bw, self.is_train, input_keep_prob=config.input_keep_prob) cell4_fw = BasicLSTMCell(d, state_is_tuple=True) cell4_bw = BasicLSTMCell(d, state_is_tuple=True) d_cell4_fw = SwitchableDropoutWrapper( cell4_fw, self.is_train, input_keep_prob=config.input_keep_prob) d_cell4_bw = SwitchableDropoutWrapper( cell4_bw, self.is_train, input_keep_prob=config.input_keep_prob) x_len = tf.reduce_sum(tf.cast(self.x_mask, 'int32'), 2) # [N, M] q_len = tf.reduce_sum(tf.cast(self.q_mask, 'int32'), 1) # [N] with tf.variable_scope("prepro"): (fw_u, bw_u), ((_, fw_u_f), (_, bw_u_f)) = bidirectional_dynamic_rnn( d_cell_fw, d_cell_bw, qq, q_len, dtype='float', scope='u1') # [N, J, d], [N, d] u = tf.concat(axis=2, values=[fw_u, bw_u]) if config.share_lstm_weights: tf.get_variable_scope().reuse_variables() (fw_h, bw_h), ((_, fw_h_f), (_, bw_h_f)) = bidirectional_dynamic_rnn( cell_fw, cell_bw, xx, x_len, dtype='float', scope='u1') # [N, M, JX, 2d] h = tf.concat(axis=3, values=[fw_h, bw_h]) # [N, M, JX, 2d] else: (fw_h, bw_h), ((_, fw_h_f), (_, bw_h_f)) = bidirectional_dynamic_rnn( cell_fw, cell_bw, xx, x_len, dtype='float', scope='h1') # [N, M, JX, 2d] h = tf.concat(axis=3, values=[fw_h, bw_h]) # [N, M, JX, 2d] self.tensor_dict['u'] = u self.tensor_dict['h'] = h with tf.variable_scope("main"): if config.dynamic_att: p0 = h u = tf.reshape(tf.tile(tf.expand_dims(u, 1), [1, M, 1, 1]), [N * M, JQ, 2 * d]) q_mask = tf.reshape( tf.tile(tf.expand_dims(self.q_mask, 1), [1, M, 1]), [N * M, JQ]) first_cell_fw = AttentionCell( cell2_fw, u, mask=q_mask, mapper='sim', input_keep_prob=self.config.input_keep_prob, is_train=self.is_train) first_cell_bw = AttentionCell( cell2_bw, u, mask=q_mask, mapper='sim', input_keep_prob=self.config.input_keep_prob, is_train=self.is_train) second_cell_fw = AttentionCell( cell3_fw, u, mask=q_mask, mapper='sim', input_keep_prob=self.config.input_keep_prob, is_train=self.is_train) second_cell_bw = AttentionCell( cell3_bw, u, mask=q_mask, mapper='sim', input_keep_prob=self.config.input_keep_prob, is_train=self.is_train) else: p0 = attention_layer(config, self.is_train, h, u, h_mask=self.x_mask, u_mask=self.q_mask, scope="p0", tensor_dict=self.tensor_dict) first_cell_fw = d_cell2_fw second_cell_fw = d_cell3_fw first_cell_bw = d_cell2_bw second_cell_bw = d_cell3_bw (fw_g0, bw_g0), _ = bidirectional_dynamic_rnn( first_cell_fw, first_cell_bw, p0, x_len, dtype='float', scope='g0') # [N, M, JX, 2d] g0 = tf.concat(axis=3, values=[fw_g0, bw_g0]) (fw_g1, bw_g1), _ = bidirectional_dynamic_rnn( second_cell_fw, second_cell_bw, g0, x_len, dtype='float', scope='g1') # [N, M, JX, 2d] g1 = tf.concat(axis=3, values=[fw_g1, bw_g1]) logits = get_logits([g1, p0], d, True, wd=config.wd, input_keep_prob=config.input_keep_prob, mask=self.x_mask, is_train=self.is_train, func=config.answer_func, scope='logits1') a1i = softsel(tf.reshape(g1, [N, M * JX, 2 * d]), tf.reshape(logits, [N, M * JX])) a1i = tf.tile(tf.expand_dims(tf.expand_dims(a1i, 1), 1), [1, M, JX, 1]) (fw_g2, bw_g2), _ = bidirectional_dynamic_rnn( d_cell4_fw, d_cell4_bw, tf.concat(axis=3, values=[p0, g1, a1i, g1 * a1i]), x_len, dtype='float', scope='g2') # [N, M, JX, 2d] g2 = tf.concat(axis=3, values=[fw_g2, bw_g2]) logits2 = get_logits([g2, p0], d, True, wd=config.wd, input_keep_prob=config.input_keep_prob, mask=self.x_mask, is_train=self.is_train, func=config.answer_func, scope='logits2') flat_logits = tf.reshape(logits, [-1, M * JX]) flat_yp = tf.nn.softmax(flat_logits) # [-1, M*JX] flat_logits2 = tf.reshape(logits2, [-1, M * JX]) flat_yp2 = tf.nn.softmax(flat_logits2) if config.na: na_bias = tf.get_variable("na_bias", shape=[], dtype='float') na_bias_tiled = tf.tile(tf.reshape(na_bias, [1, 1]), [N, 1]) # [N, 1] concat_flat_logits = tf.concat( axis=1, values=[na_bias_tiled, flat_logits]) concat_flat_yp = tf.nn.softmax(concat_flat_logits) na_prob = tf.squeeze(tf.slice(concat_flat_yp, [0, 0], [-1, 1]), [1]) flat_yp = tf.slice(concat_flat_yp, [0, 1], [-1, -1]) concat_flat_logits2 = tf.concat( axis=1, values=[na_bias_tiled, flat_logits2]) concat_flat_yp2 = tf.nn.softmax(concat_flat_logits2) na_prob2 = tf.squeeze( tf.slice(concat_flat_yp2, [0, 0], [-1, 1]), [1]) # [N] flat_yp2 = tf.slice(concat_flat_yp2, [0, 1], [-1, -1]) self.concat_logits = concat_flat_logits self.concat_logits2 = concat_flat_logits2 self.na_prob = na_prob * na_prob2 yp = tf.reshape(flat_yp, [-1, M, JX]) yp2 = tf.reshape(flat_yp2, [-1, M, JX]) wyp = tf.nn.sigmoid(logits2) self.tensor_dict['g1'] = g1 self.tensor_dict['g2'] = g2 self.logits = flat_logits self.logits2 = flat_logits2 self.yp = yp self.yp2 = yp2 self.wyp = wyp with tf.variable_scope("q_gen"): # Question Generation Using (Paragraph & Predicted Ans Pos) NM = config.max_num_sents * config.batch_size # Separated encoder #ss = tf.reshape(xx, (-1, JX, dw+dco)) q_worthy = tf.reduce_sum( tf.to_int32(self.y), axis=2 ) # so we get probability distribution of answer-likely. (N, M) q_worthy = tf.expand_dims(tf.to_int32(tf.argmax(q_worthy, axis=1)), axis=1) # (N) -> (N, 1) q_worthy = tf.concat([ tf.expand_dims(tf.range(0, N, dtype=tf.int32), axis=1), q_worthy ], axis=1) # example : [0, 9], [1, 11], [2, 8], [3, 5], [4, 0], [5, 1] ... ss = tf.gather_nd(xx, q_worthy) syp = tf.expand_dims(tf.gather_nd(yp, q_worthy), axis=-1) syp2 = tf.expand_dims(tf.gather_nd(yp2, q_worthy), axis=-1) ss_with_ans = tf.concat([ss, syp, syp2], axis=2) qg_dim = 600 cell_fw, cell_bw = rnn.DropoutWrapper(rnn.GRUCell(qg_dim), input_keep_prob=config.input_keep_prob), \ rnn.DropoutWrapper(rnn.GRUCell(qg_dim), input_keep_prob=config.input_keep_prob) s_outputs, s_states = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ss_with_ans, dtype=tf.float32) s_outputs = tf.concat(s_outputs, axis=2) s_states = tf.concat(s_states, axis=1) start_tokens = tf.zeros([N], dtype=tf.int32) self.inp_q_with_GO = tf.concat( [tf.expand_dims(start_tokens, axis=1), self.q], axis=1) # supervise if mode is train if config.mode == "train": emb_q = tf.nn.embedding_lookup(params=word_emb_mat, ids=self.inp_q_with_GO) #emb_q = tf.reshape(tf.tile(tf.expand_dims(emb_q, axis=1), [1, M, 1, 1]), (NM, JQ+1, dw)) train_helper = seq2seq.TrainingHelper(emb_q, [JQ] * N) else: s_outputs = seq2seq.tile_batch(s_outputs, multiplier=beam_width) s_states = seq2seq.tile_batch(s_states, multiplier=beam_width) cell = rnn.DropoutWrapper(rnn.GRUCell(num_units=qg_dim * 2), input_keep_prob=config.input_keep_prob) attention_mechanism = seq2seq.BahdanauAttention(num_units=qg_dim * 2, memory=s_outputs) attn_cell = seq2seq.AttentionWrapper(cell, attention_mechanism, attention_layer_size=qg_dim * 2, output_attention=True, alignment_history=False) total_glove_vocab_size = 78878 #72686 out_cell = rnn.OutputProjectionWrapper(attn_cell, VW + total_glove_vocab_size) if config.mode == "train": decoder_initial_states = out_cell.zero_state( batch_size=N, dtype=tf.float32).clone(cell_state=s_states) decoder = seq2seq.BasicDecoder( cell=out_cell, helper=train_helper, initial_state=decoder_initial_states) else: decoder_initial_states = out_cell.zero_state( batch_size=N * beam_width, dtype=tf.float32).clone(cell_state=s_states) decoder = seq2seq.BeamSearchDecoder( cell=out_cell, embedding=word_emb_mat, start_tokens=start_tokens, end_token=EOS_TOKEN, initial_state=decoder_initial_states, beam_width=beam_width, length_penalty_weight=0.0) outputs = seq2seq.dynamic_decode(decoder=decoder, maximum_iterations=JQ) if config.mode == "train": gen_q = outputs[0].sample_id gen_q_prob = outputs[0].rnn_output gen_q_states = outputs[1] else: gen_q = outputs[0].predicted_ids[:, :, 0] gen_q_prob = tf.nn.embedding_lookup( params=word_emb_mat, ids=outputs[0].predicted_ids[:, :, 0]) gen_q_states = outputs[1] self.gen_q = gen_q self.gen_q_prob = gen_q_prob self.gen_q_states = gen_q_states
def predict_stock(): def time_series(t): return t * np.sin(t) / 3 + 2 * np.sin(t * 5) def next_batch(batch_size, n_steps): t0 = np.random.rand(batch_size, 1) * (t_max - t_min - n_steps * resolution) Ts = t0 + np.arange(0., n_steps + 1) * resolution ys = time_series(Ts) return ys[:, :-1].reshape(-1, n_steps, 1), ys[:, 1:].reshape(-1, n_steps, 1) t_min, t_max = 0, 30 resolution = 0.1 t = np.linspace(t_min, t_max, int((t_max - t_min) / resolution)) # t = np.arange(t_min, t_max + resolution, resolution) n_steps = 20 n_outputs = 1 n_neurons = 100 n_inputs = 1 use_projection_wrapper = False X = tf.placeholder(tf.float32, [None, n_steps, n_inputs]) y = tf.placeholder(tf.float32, [None, n_steps, n_outputs]) if use_projection_wrapper: cell = rnn.OutputProjectionWrapper(rnn.BasicRNNCell( num_units=n_neurons, activation=tf.nn.relu), output_size=n_outputs) outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32) else: cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons, activation=tf.nn.relu) outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32) stacked_rnn_outputs = tf.reshape(outputs, [-1, n_neurons]) stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs) outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs]) loss = tf.reduce_mean(tf.square(outputs - y)) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) training_op = optimizer.minimize(loss) init = tf.global_variables_initializer() saver = tf.train.Saver() n_epochs = 1500 batch_size = 50 with tf.Session() as sess: init.run() for epoch in range(n_epochs): X_batch, y_batch = next_batch(batch_size, n_steps) sess.run(training_op, feed_dict={X: X_batch, y: y_batch}) if epoch % 100 == 0: mse_val = loss.eval(feed_dict={X: X_batch, y: y_batch}) print('Epoch: {}, mse: {}'.format(epoch, mse_val)) saver.save(sess, './14_stock_predict_model') with tf.Session() as sess: saver.restore(sess, './14_stock_predict_model') is_predict_sequence = True if is_predict_sequence: seq_len = 300 seq = np.zeros(n_steps, dtype=np.float32) for i in range(seq_len): X_batch = seq[-n_steps:].reshape(1, n_steps, 1) y_pred = sess.run(outputs, feed_dict={X: X_batch}) seq = np.append(seq, y_pred[0, -1, 0]) plt.plot(seq, 'b-') plt.xlabel('Time') else: t_instance = np.linspace(12.2, 12.2 + resolution * (n_steps + 1), n_steps + 1) X_new = time_series( np.array(t_instance[:-1].reshape(-1, n_steps, n_inputs))) y_pred = sess.run(outputs, feed_dict={X: X_new}) print(X_new.shape, y_pred.shape) plt.plot(t_instance[:-1], time_series(t_instance[:-1]), 'bo', markersize=10, label='instance') plt.plot(t_instance[1:], time_series(t_instance[1:]), 'y*', markersize=10, label='target') plt.plot(t_instance[1:], y_pred[0, :, 0], 'r.', markersize=10, label='prediction') plt.legend() plt.show()
slice_size = 612 fft_size = slice_size // 2 + 1 steps_seconds = 2.0 n_steps = math.ceil(steps_seconds * prepare.samples_per_second / slice_size) n_inputs = 2 * fft_size n_neurons = 20 n_outputs = 2 * fft_size X = tf.placeholder(tf.float32, [None, n_steps, n_inputs], name='X') y = tf.placeholder(tf.float32, [None, n_steps, n_outputs], name='y') cell = rnn.OutputProjectionWrapper( rnn.DropoutWrapper(rnn.LSTMCell( num_units=n_neurons, initializer=tf.variance_scaling_initializer(), activation=tf.nn.elu, ), input_keep_prob=0.7), output_size=n_outputs, ) outputs, states = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32) loss = tf.reduce_mean(tf.square(outputs - y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(loss) init = tf.global_variables_initializer() n_epochs = 2000 n_iterations = 150
def build_GRU(self): with tf.variable_scope("vae_model", reuse=tf.AUTO_REUSE): x = self.input acti = None cat = tf.layers.dense(self.category, 16) big_cat = tf.tile(tf.expand_dims(cat, axis=1), (1, self.leng, 1)) print("pre", x) #x_t = tf.tile(x, (1, 1, leng)) x_t = tf.concat([x, big_cat], axis=-1) print("post", x_t) cell_fw = tfn.MultiRNNCell( [ tf.nn.rnn_cell.DropoutWrapper( tfn.GRUCell(self.enc_size, name="fow_cell0", activation=acti, kernel_initializer=self.initia), output_keep_prob=self.dropout), tf.nn.rnn_cell.DropoutWrapper( tfn.GRUCell(self.enc_size, name="fow_cell1", activation=acti, kernel_initializer=self.initia), output_keep_prob=self.dropout) ] ) #(self.enc_size, name="fow_cell",activation=self.act,reuse=tf.AUTO_REUSE,initializer=self.initia,forget_bias=0.9) cell_bw = tfn.MultiRNNCell( [ tf.nn.rnn_cell.DropoutWrapper( tfn.GRUCell(self.enc_size, name="bow_cell0", activation=acti, kernel_initializer=self.initia), output_keep_prob=self.dropout), tf.nn.rnn_cell.DropoutWrapper( tfn.GRUCell(self.enc_size, name="bow_cell1", activation=acti, kernel_initializer=self.initia), output_keep_prob=self.dropout) ] ) #(self.enc_size, name="fow_cell",activation=self.act,reuse=tf.AUTO_REUSE,initializer=self.initia,forget_bias=0.9) #cell_fw =tf.nn.rnn_cell.DropoutWrapper(tfn.GRUCell(self.enc_size,name="fow_cell0"),output_keep_prob=self.dropout) #cell_bw =tf.nn.rnn_cell.DropoutWrapper(tfn.GRUCell(self.enc_size,name="bow_cell0"),output_keep_prob=self.dropout) #cell_fw =tf.nn.rnn_cell.DropoutWrapper(tfn.LSTMCell(self.enc_size,name="fow_cell0",initializer=self.initia),output_keep_prob=self.dropout) #cell_fw =self.cell_enc(self.enc_size, name="baw_cell",activation=self.act) outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, inputs=x_t, dtype=tf.float32, sequence_length=self.input_size, time_major=False, scope="encoder") #outputs,state=tf.nn.dynamic_rnn(cell_fw,inputs=x, dtype=tf.float32,sequence_length=self.input_size, time_major=False, scope="encoder") #outputs, state = tf.nn.bidirectional_dynamic_rnn(cell_fw,cell_bw, inputs=x_t, dtype=tf.float32, sequence_length=self.input_size,time_major=False, scope="encoder") self.latent_h = tf.layers.dense(tf.concat( [state[0][0], state[1][0], state[0][1], state[1][1], cat], axis=-1, name="latent_concat"), self.latent_size, kernel_initializer=self.initia) self.z_mean_c = tf.layers.dense(self.latent_h, self.latent_size, kernel_initializer=self.initia, name="MEAN") self.z_std_c = tf.layers.dense(self.latent_h, self.latent_size, kernel_initializer=self.initia, name="STD") mu_c = self.z_mean_c sigma_c = self.z_std_c self.samples_c = tf.random_normal( [self.batch_size, self.latent_size], 0.0, 1.0, dtype=tf.float32) self.sampled_z_c = mu_c + tf.exp(sigma_c / 2) * self.samples_c self.sampled_z_c = tf.nn.tanh(self.sampled_z_c) #self.sampled_z_c = mu_c + sigma_c * self.samples_c next_state = tf.concat([self.sampled_z_c, cat], axis=-1) latent_state = tf.layers.dense(next_state, 512) with tf.variable_scope("dec", reuse=False): print("SAMPLED ", self.sampled_z_c) res = tf.zeros_like(x) second = tfn.OutputProjectionWrapper(tf.nn.rnn_cell.DropoutWrapper( tfn.GRUCell(512, kernel_initializer=self.initia), output_keep_prob=self.dropout), 2, activation=tf.nn.tanh) print("RES", res) coord_outs, dec_state = tf.nn.dynamic_rnn( second, res, initial_state=latent_state, sequence_length=self.input_size, time_major=False, dtype=tf.float32, scope='RNN_cord') print("x", latent_state) state_outs, _ = tf.nn.dynamic_rnn( #self.cell_dec(self.latent_size, name="dec",initializer=self.initia), tfn.OutputProjectionWrapper( tf.nn.rnn_cell.DropoutWrapper( tfn.GRUCell(512, kernel_initializer=self.initia), output_keep_prob=self.dropout), 3), coord_outs, initial_state=latent_state, sequence_length=self.input_size, time_major=False, dtype=tf.float32, scope='RNN_stat') print("OUT : ", coord_outs) print("OUT : ", state_outs) coord_outs = tf.concat([coord_outs, state_outs], axis=-1) self.out_cat = tf.layers.dense(tf.layers.flatten(coord_outs), 7) #flat_out=tf.reshape(coord_outs,[self.batch_size,self.leng*self.latent_size]) #out=tf.layers.dense(coord_outs, self.leng*5) return coord_outs
def build(self): with tf.variable_scope("vae_model", reuse=tf.AUTO_REUSE): x = self.input cat = tf.layers.dense(self.category, 16) big_cat = tf.tile(tf.expand_dims(self.category, axis=1), (1, self.leng, 1)) print("pre", x) #x_t = tf.tile(x, (1, 1, leng)) x_t = tf.concat([x, big_cat], axis=-1) print("post", x_t) cell_fw = tfn.MultiRNNCell( [ tf.nn.rnn_cell.DropoutWrapper( tfn.LSTMCell(self.enc_size, name="fow_cell0", initializer=self.initia), output_keep_prob=self.dropout), tf.nn.rnn_cell.DropoutWrapper( tfn.LSTMCell(self.enc_size, name="fow_cell1", initializer=self.initia), output_keep_prob=self.dropout), tf.nn.rnn_cell.DropoutWrapper( tfn.LSTMCell(self.enc_size, name="fow_cell2", initializer=self.initia, activation=tf.nn.tanh), output_keep_prob=self.dropout) ] ) #(self.enc_size, name="fow_cell",activation=self.act,reuse=tf.AUTO_REUSE,initializer=self.initia,forget_bias=0.9) cell_bw = tfn.MultiRNNCell( [ tf.nn.rnn_cell.DropoutWrapper( tfn.LSTMCell(self.enc_size, name="bow_cell0", initializer=self.initia), output_keep_prob=self.dropout), tf.nn.rnn_cell.DropoutWrapper( tfn.LSTMCell(self.enc_size, name="bow_cell1", initializer=self.initia), output_keep_prob=self.dropout), tf.nn.rnn_cell.DropoutWrapper( tfn.LSTMCell(self.enc_size, name="bow_cell2", initializer=self.initia, activation=tf.nn.tanh), output_keep_prob=self.dropout) ] ) #(self.enc_size, name="fow_cell",activation=self.act,reuse=tf.AUTO_REUSE,initializer=self.initia,forget_bias=0.9) #cell_fw =tf.nn.rnn_cell.DropoutWrapper(tfn.LSTMCell(self.enc_size,name="fow_cell0",initializer=self.initia),output_keep_prob=self.dropout) #cell_fw =self.cell_enc(self.enc_size, name="baw_cell",activation=self.act) #outputs, state = tf.nn.bidirectional_dynamic_rnn(cell_fw,cell_bw, inputs=x_t, dtype=tf.float32,sequence_length=self.input_size, time_major=False, scope="encoder") #outputs,state=tf.nn.dynamic_rnn(cell_fw,inputs=x, dtype=tf.float32,sequence_length=self.input_size, time_major=False, scope="encoder") outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, inputs=x, dtype=tf.float32, sequence_length=self.input_size, time_major=False, scope="encoder") if (self.cell_enc == tfn.LSTMCell): #latent_h=tf.concat([tf.concat([state[1].c,state[2].c],axis=-1)+state[0].c,cat],axis=-1,name="latent_concat") ##self.latent_h =tf.layers.dense(tf.concat([state[0].c,state[1].c,state[2].c], axis=-1,name="latent_concat"),self.latent_size) self.latent_h = tf.layers.dense(tf.concat( [ state[0][0].c + state[1][0].c, state[0][1].c + state[1][1].c, state[0][2].c + state[0][2].c ], axis=-1, name="latent_concat"), self.latent_size, kernel_initializer=self.initia) #self.latent_h =tf.layers.dense(state[0].c+state[1].c+state[2].c,self.latent_size) #self.latent_h=tf.layers.dense(state.c,self.latent_size) #post=tf.concat([self.latent_h,cat],axis=-1) #latent_h=tf.reshape(tf.concat([outputs[0],outputs[1]],axis=-1),shape=[self.batch_size,-1]) #latent_c = tf.concat([state.c, state.h], axis=-1) #print("LATENT",latent_c) else: self.latent = tf.concat([state[0], state[1]], axis=-1) print("ELSE LATENT ", self.latent) self.z_mean_c = tf.layers.dense(self.latent_h, self.latent_size, kernel_initializer=self.initia, name="MEAN") self.z_std_c = tf.layers.dense(self.latent_h, self.latent_size, kernel_initializer=self.initia, name="STD") mu_c = self.z_mean_c sigma_c = self.z_std_c self.samples_c = tf.random_normal( [self.batch_size, self.latent_size], mu_c, sigma_c, dtype=tf.float32) self.sampled_z_c = mu_c + tf.exp(sigma_c / 2) * self.samples_c next_state = tf.concat([self.sampled_z_c, cat], axis=-1) with tf.variable_scope("dec", reuse=False): print("SAMPLED ", self.sampled_z_c) if (self.cell_dec == tfn.LSTMCell): latent_state = tfn.LSTMStateTuple( next_state, tf.zeros_like(next_state)) #,tfn.LSTMStateTuple(mu,sigma)) else: latent_state = self.sampled_z_c res = tf.zeros_like(x) second = tfn.OutputProjectionWrapper(tf.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.LSTMCell(self.latent_size + 16, initializer=self.initia), output_keep_prob=self.dropout), 5, activation=tf.nn.tanh) print("RES", res) coord_outs, dec_state = tf.nn.dynamic_rnn( #self.cell_dec(self.latent_size, name="dec",initializer=self.initia), second, #tfn.OutputProjectionWrapper(tfn.MultiRNNCell([self.cell_enc(self.latent_size, name="decc", use_peepholes=True),self.cell_dec(self.latent_size, name="decc2", use_peepholes=True)]),2), res, initial_state=latent_state, sequence_length=self.input_size, time_major=False, dtype=tf.float32, scope='RNN_cord') print("OUT : ", coord_outs) self.out_cat = tf.layers.dense(tf.layers.flatten(coord_outs), 17) #flat_out=tf.reshape(coord_outs,[self.batch_size,self.leng*self.latent_size]) #out=tf.layers.dense(coord_outs, self.leng*5) return coord_outs
def embedding_attention_seq2seq(encoder_inputs, # [T, batch_size] decoder_inputs, # [T, batch_size] cell, num_encoder_symbols, num_decoder_symbols, embedding_size, num_heads=1, # attention的抽头数量 output_projection=None, #decoder的投影矩阵 feed_previous=False, dtype=None, scope=None, initial_state_attention=False, loop_fn_factory=_extract_argmax_and_embed): """ :param encoder_inputs: encoder的输入,int32型 id tensor list :param decoder_inputs: decoder的输入,int32型id tensor list :param cell: RNN_Cell的实例 :param num_encoder_symbols: 编码的符号数,即词表大小 :param num_decoder_symbols: 解码的符号数,即词表大小 :param embedding_size: 词向量的维度 :param num_heads: attention的抽头数量,一个抽头算一种加权求和方式 :param output_projection: decoder的output向量投影到词表空间时,用到的投影矩阵和偏置项(W, B);W的shape是[output_size, num_decoder_symbols],B的shape是[num_decoder_symbols];若此参数存在且feed_previous=True,上一个decoder的输出先乘W再加上B作为下一个decoder的输入 :param feed_previous: 若为True, 只有第一个decoder的输入(“GO"符号)有用,所有的decoder输入都依赖于上一步的输出;一般在测试时用 :param dtype: :param scope: :param initial_state_attention: 默认为False, 初始的attention是零;若为True,将从initial state和attention states开始attention :param loop_fn_factory: :return: """ with variable_scope.variable_scope(scope or "embedding_attention_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. # 创建了一个embedding matrix. # 计算encoder的output和state # 生成attention states,用于计算attention encoder_cell = rnn.EmbeddingWrapper( # EmbeddingWrapper, 是RNNCell的前面加一层embedding,作为encoder_cell, input就可以是word的id. cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) encoder_outputs, encoder_state = rnn.static_rnn( encoder_cell, encoder_inputs, dtype=dtype) # [T,batch_size,size] # First calculate a concatenation of encoder outputs to put attention on. top_states = [array_ops.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] # T * [batch_size, 1, size] attention_states = array_ops.concat(top_states, 1) # [batch_size,T,size] # Decoder. # 生成decoder的cell,通过OutputProjectionWrapper类对输入参数中的cell实例包装实现 output_size = None if output_projection is None: cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols) # OutputProjectionWrapper将输出映射成想要的维度 output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous, initial_state_attention=initial_state_attention, loop_fn_factory=loop_fn_factory) # If feed_previous is a Tensor, we construct 2 graphs and use cond. def decoder(feed_previous_bool): reuse = None if feed_previous_bool else True with variable_scope.variable_scope(variable_scope.variable_scope.get_variable_scope(), reuse=reuse) as scope: outputs, state = embedding_attention_decoder( decoder_inputs, encoder_state, attention_states, cell, num_decoder_symbols, embedding_size, num_heads=num_heads, output_size=output_size, output_projection=output_projection, feed_previous=feed_previous_bool, update_embedding_for_previous=False, initial_state_attention=initial_state_attention, loop_fn_factory=loop_fn_factory) state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond(feed_previous, lambda: decoder(True), lambda: decoder(False)) outputs_len = len(decoder_inputs) # Outputs length same as decoder inputs. state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) return outputs_and_state[:outputs_len], state
def one2many_rnn_seq2seq(encoder_inputs, decoder_inputs_dict, cell, num_encoder_symbols, num_decoder_symbols_dict, embedding_size, feed_previous=False, dtype=None, scope=None): outputs_dict = {} state_dict = {} with variable_scope.variable_scope( scope or "one2many_rnn_seq2seq", dtype=dtype) as scope: dtype = scope.dtype # Encoder. encoder_cell = rnn.EmbeddingWrapper( cell, embedding_classes=num_encoder_symbols, embedding_size=embedding_size) _, encoder_state = rnn.static_rnn(encoder_cell, encoder_inputs, dtype=dtype) # Decoder. for name, decoder_inputs in decoder_inputs_dict.items(): num_decoder_symbols = num_decoder_symbols_dict[name] with variable_scope.variable_scope("one2many_decoder_" + str(name)) as scope: decoder_cell = rnn.OutputProjectionWrapper(cell, num_decoder_symbols) if isinstance(feed_previous, bool): outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols, embedding_size, feed_previous=feed_previous) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. def filled_embedding_rnn_decoder(feed_previous): """The current decoder with a fixed feed_previous parameter.""" # pylint: disable=cell-var-from-loop reuse = None if feed_previous else True vs = variable_scope.get_variable_scope() with variable_scope.variable_scope(vs, reuse=reuse): outputs, state = embedding_rnn_decoder( decoder_inputs, encoder_state, decoder_cell, num_decoder_symbols, embedding_size, feed_previous=feed_previous) # pylint: enable=cell-var-from-loop state_list = [state] if nest.is_sequence(state): state_list = nest.flatten(state) return outputs + state_list outputs_and_state = control_flow_ops.cond( feed_previous, lambda: filled_embedding_rnn_decoder(True), lambda: filled_embedding_rnn_decoder(False)) # Outputs length is the same as for decoder inputs. outputs_len = len(decoder_inputs) outputs = outputs_and_state[:outputs_len] state_list = outputs_and_state[outputs_len:] state = state_list[0] if nest.is_sequence(encoder_state): state = nest.pack_sequence_as(structure=encoder_state, flat_sequence=state_list) outputs_dict[name] = outputs state_dict[name] = state return outputs_dict, state_dict
def _lstmnet( features, # This is batch_features from input_fn labels, # This is batch_labels from input_fn mode, # An instance of tf.estimator.ModeKeys params, is_test): with tf.variable_scope('EncoderNet') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): # Train graph pkeep = params['pkeep'] else: # Test or inference graph pkeep = 1.0 x = tf.feature_column.input_layer( features, feature_columns=params['feature_columns']) X = tf.reshape(x, shape=[ x.get_shape()[0], params['sequence_length'], params['dimension'] ]) X = tf.identity(X, name='X') # X: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION] if labels is not None: Labels = tf.reshape(labels, shape=[ x.get_shape()[0], params['sequence_length'], params['dimension'] ]) else: Labels = None encoder_Hin = params['encoder_Hin'] # encoder_Hin: [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS] seqlen = tf.Variable(params['sequence_length'], name='seqlen') seqlen = tf.reshape(seqlen, shape=[1]) seqdescr = tf.tile(seqlen, multiples=[x.get_shape()[0]]) # seqdescr: [ BATCHSIZE ] inital_time_sample = params['decoder_inital_time_sample'] # inital_time_sample: [ BATCH_SIZE, DIMENSION ] encoder_cells = [ rnn.GRUBlockCell(params['encoder_hidden_layer_size']) for _ in range(params['encoder_hidden_layer_depth']) ] # "naive dropout" implementation encoder_dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in encoder_cells ] encoder_multicell = rnn.MultiRNNCell(encoder_dropcells, state_is_tuple=False) # Input wrapper to keep symmetry with decoder encoder_multicell = rnn.InputProjectionWrapper( encoder_multicell, num_proj=params['bottleneck_size'], activation=None) # dropout for the softmax layer # No dropout in bottleneck layer! # encoder_multicell = rnn.DropoutWrapper(encoder_multicell, output_keep_prob=pkeep) encoded_Yr, encoded_H = tf.nn.dynamic_rnn( encoder_multicell, X, dtype=tf.float32, initial_state=encoder_Hin, scope='EncoderNet', parallel_iterations=params['parallel_iters']) encoded_H = tf.identity(encoded_H, name='encoded_H') # just to give it a name encoded_Yr = tf.identity(encoded_Yr, name='endoded_Yr') # encoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, ENCODER_INTERNALSIZE ] # encoder_H: [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS ] # this is the last state in the sequence encoded_V = tf.reshape(encoded_H, [x.get_shape()[0], -1]) # encoded_V: [ BATCH_SIZE, BOTTLENECK_SIZE ] with tf.variable_scope('NetDecoder') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): pkeep = params['pkeep'] else: pkeep = 1.0 decoder_Hin = encoded_H # decoder_Hin: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS] decoder_cells = [ rnn.GRUBlockCell(params['decoder_hidden_layer_size']) for _ in range(params['decoder_hidden_layer_depth']) ] # "naive dropout" implementation decoder_dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in decoder_cells ] decoder_multicell = rnn.MultiRNNCell(decoder_dropcells, state_is_tuple=False) # dropout for the softmax layer decoder_multicell = rnn.DropoutWrapper(decoder_multicell, output_keep_prob=pkeep) # dense layer to adjust dimensions decoder_multicell = rnn.OutputProjectionWrapper(decoder_multicell, params['dimension'], activation=None) custom_Helper = create_fixed_len_numeric_training_helper( inital_time_sample, params['sequence_length'], X.dtype) #helper = tf.contrib.seq2seq.TrainingHelper(inputs=Labels, # sequence_length=seqdescr, # time_major=False) decoder = seq2seq.BasicDecoder(cell=decoder_multicell, helper=custom_Helper, initial_state=decoder_Hin) decoded_Yr, decoded_H, _ = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, output_time_major=False, impute_finished=False, maximum_iterations=None, parallel_iterations=params['parallel_iters']) decoded_Yr = decoded_Yr.rnn_output print('decoded_Yr') print(decoded_Yr) decoded_Yr.set_shape([ decoded_Yr.get_shape()[0], params['sequence_length'], decoded_Yr.get_shape()[2] ]) print(decoded_Yr) decoded_H = tf.identity(decoded_H, name='decoded_H') decoded_Yr = tf.identity(decoded_Yr, name='decoded_Yr') # decoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, DIMENSION ] # decoder_H: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS ] # this is the last state in the sequence return decoded_Yr, encoded_V # = encoded_H reshaped
def lstm_sentence_embedding(features, labels, mode, params): ''' :param features: dict of sentence features with shape (batch_size, max_words, dim_of_word) features['seq1'] return batch of query sentence features['seq2'] return batch of positive response sentence features['seq3'] return batch of negative response sentence :param labels: nothing :param mode: :param params: :return: ''' print('CURRENT MODE: %s' % mode.upper()) M = params['M'] # a constant for computed with loss input_keep_prob = params['input_keep_prob'] output_keep_prob = params['output_keep_prob'] n_lstm_units = 100 # number of hidden units # create a LSTM cell (only 1 cell but train both query, pos_response, neg_response) with tf.variable_scope("emb_cell"): cell = rnn.LSTMCell(num_units=n_lstm_units, activation=tf.nn.softmax) if mode == ModeKeys.TRAIN: cell = rnn.DropoutWrapper(cell=cell, input_keep_prob=input_keep_prob, output_keep_prob=output_keep_prob) projection_cell = rnn.OutputProjectionWrapper(cell=cell, output_size=lstm_emb_size, activation=tf.nn.softmax) def lstm_embed_sentence(x): # (outputs, final_state) is returned from tf.nn.dynamic_rnn() # | └→ final_state = (c_state, h_state) final # └→ outputs is an collection of all outputs in every step emitted # which shape = (batch, time_step, n_output_size) # but in this project, we care only outputs outputs, _ = tf.nn.dynamic_rnn(cell=projection_cell, inputs=x, time_major=False, dtype=tf.float32) # transpose (batch, time_step, n_output_size) -> (time_step, batch, n_output_size) # └→ unpack to list [(batch, outputs)..] * steps outputs = tf.transpose(outputs, [1, 0, 2]) # get the last output from last time_step only. # shape = (batch, n_output_size) outputs = outputs[-1] # assume that this outputs is a embed_vector return outputs def cosine_similarity(vec1, vec2): ''' Calculate cosine_similarity of each sample by A•B / (norm(A) * norm(B)) :param vec1: batch of vector1 :param vec2: batch of vector2 :return: ''' # calculate (norm(A) * norm(B)) # output.shape = [n_sample, ] vec_norm = tf.norm(vec1, axis=1) * tf.norm(vec2, axis=1) # multiply sub_vec vs sub_vec. # output.shape = [n_sample , emb_dim] mul = tf.multiply(vec1, vec2) # sum values in emb_dim for each sample so output.shape = [n_sample, ] reduce_sum = tf.reduce_sum(mul, axis=1) # calculate cosine similarity. # output.shape = [n_sample, ] cosine_sim = reduce_sum / vec_norm return cosine_sim loss = None train_op = None # every mode must push seq1 be one of features dict seq1 = features[QUERY_KEY] # Calculate Loss (for TRAIN, EVAL modes) if mode != ModeKeys.INFER: seq2 = features[POS_RESP_KEY] # get a pos_response seq3 = features[NEG_RESP_KEY] # get a neg_response # get embedded vector: output.shape = [n_sample , emb_dim] vec1 = lstm_embed_sentence(seq1) # query vec2 = lstm_embed_sentence(seq2) # pos_response vec3 = lstm_embed_sentence(seq3) # neg_response # calculate cosine similarity of each vec pairs, output.shape = [n_sample, ] cosine_sim_pos = cosine_similarity(vec1, vec2) # need a large value cosine_sim_neg = cosine_similarity(vec1, vec3) # need a tiny value # LOSS # calculate loss of each pair pos_neg. output.shape = [n_sample,] losses = tf.maximum(0., M - cosine_sim_pos + cosine_sim_neg) # << too small too good # final_loss = sum all loss. and get output be scalar loss = tf.reduce_mean(losses) # Configure the Training Optimizer (for TRAIN modes) if mode == ModeKeys.TRAIN: # configuration the training Op train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), optimizer=tf.train.AdamOptimizer, learning_rate=params['learning_rate'], summaries=[ 'learning_rate', 'loss', "gradients", "gradient_norm", ] ) # Generate Predictions which is a embedding of given sentence predictions = {} if mode == ModeKeys.INFER: predictions = {'emb_vec': lstm_embed_sentence(seq1)} # Return a ModelFnOps object return ModelFnOps(predictions=predictions, loss=loss, train_op=train_op, eval_metric_ops=None, mode=mode)
def _convlstmnet( features, # This is batch_features from input_fn labels, # This is batch_labels from input_fn mode, # An instance of tf.estimator.ModeKeys params, is_test): with tf.variable_scope('EncoderNet') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): pkeep = params['pkeep'] else: pkeep = 1.0 x = tf.feature_column.input_layer( features, feature_columns=params['feature_columns']) X = tf.reshape(x, shape=[ x.get_shape()[0], params['sequence_length'], params['dimension'], 1 ]) # X: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION, 1 ] print(X) # Convolutional Layer 1 conv1 = tf.layers.conv2d(inputs=X, filters=6, kernel_size=[5, 1], padding="same", activation=tf.nn.relu) # conv1: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION, 12 ] print(conv1) # Conv Layer 2 with some stride conv2 = tf.layers.conv2d(inputs=conv1, filters=10, kernel_size=[5, 1], padding="same", strides=(2, 1), activation=tf.nn.relu) # conv2: [ BATCH_SIZE, SEQUENCE_LENGTH/2, DIMENSION, 24 ] print(conv2) # Conv Layer 3 with big filter size and stride conv3 = tf.layers.conv2d(inputs=conv2, filters=15, kernel_size=[8, 1], padding="same", strides=(4, 1), activation=tf.nn.relu) # last: [ BATCH_SIZE , SEQUENCE_LENGTH/(2*8), DIMENSION, 48 ] print(conv3) # flatten: conv3_flat = tf.reshape( conv3, [conv3.get_shape()[0], 7 * params['dimension'] * 15]) dense = tf.layers.dense(inputs=conv3_flat, units=128, activation=tf.nn.relu) dropout = tf.layers.dropout( inputs=dense, rate=params['pkeep'], training=mode == tf.estimator.ModeKeys.TRAIN) # Last layer to evaluate INTERNALSIZE LSTM output to bottleneck representation bottleneck = layers.fully_connected(dropout, params['bottleneck_size'], activation_fn=tf.nn.relu) encoded_V = bottleneck # bottleneck: [ BATCH_SIZE, BOTTLENECK_SIZE ] with tf.variable_scope('NetDecoder') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): pkeep = params['pkeep'] else: pkeep = 1.0 decoder_Hin = params['decoder_Hin'] # decoder_Hin: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS] # tile bottleneck layer tiled_bottleneck = tf.tile(tf.expand_dims(bottleneck, axis=1), multiples=[1, params['sequence_length'], 1]) # bottleneck_tiled: [ BATCH_SIZE, SEQUENCE_LENGTH, BOTTLENECK_SIZE ] decoder_cells = [ rnn.GRUBlockCell(params['decoder_hidden_layer_size']) for _ in range(params['decoder_hidden_layer_depth']) ] # "naive dropout" implementation decoder_dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in decoder_cells ] decoder_multicell = rnn.MultiRNNCell(decoder_dropcells, state_is_tuple=False) # dropout for the softmax layer decoder_multicell = rnn.DropoutWrapper(decoder_multicell, output_keep_prob=pkeep) # dense layer to adjust dimensions decoder_multicell = rnn.OutputProjectionWrapper( decoder_multicell, params['dimension']) decoder_Yr, decoder_H = tf.nn.dynamic_rnn( decoder_multicell, tiled_bottleneck, dtype=tf.float32, initial_state=decoder_Hin, scope='NetDecoder', parallel_iterations=params['parallel_iters']) decoder_H = tf.identity(decoder_H, name='decoder_H') # just to give it a name # decoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, DIMENSION ] # decoder_H: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS ] # this is the last state in the sequence return decoder_Yr, encoded_V
def _model(self): graph = tf.Graph() with graph.as_default(): embedding = tf.Variable(np.zeros( shape=[self.num_words, self.embedding_size], dtype=np.float32), trainable=False, name='embedding') # 词向量 lr = tf.placeholder(tf.float32, [], name='learning_rate') # 输入数据 x_input = tf.placeholder(tf.int32, [None, None], name='x_input') # 输入数据X x_sequence_length = tf.placeholder(tf.int32, [None], name='x_length') # 输入数据每一条的长度 x_embedding = tf.nn.embedding_lookup(embedding, x_input) # 将输入的one-hot编码转换成向量 y_input = tf.placeholder(tf.int32, [None, None], name='y_input') # 输入数据Y y_sequence_length = tf.placeholder(tf.int32, [None], name='y_length') # 每一个Y的长度 y_embedding = tf.nn.embedding_lookup(embedding, y_input) # 对Y向量化 batch_size = tf.placeholder(tf.int32, [], name='batch_size') # batch_size = tf.shape(x_input)[0] # 使用gru代替LSTM, 4层cell堆叠 encoder_cell = rnn.MultiRNNCell( [rnn.GRUCell(128, activation=tf.tanh) for _ in range(4)]) decoder_cell = rnn.MultiRNNCell( [rnn.GRUCell(128, activation=tf.tanh) for _ in range(4)]) # 计算encoder output, encoder_state = tf.nn.dynamic_rnn( cell=encoder_cell, inputs=x_embedding, initial_state=encoder_cell.zero_state(batch_size, tf.float32), sequence_length=x_sequence_length) attention_mechanism = seq2seq.BahdanauAttention( 128, output, x_sequence_length) attention_cell = seq2seq.AttentionWrapper(decoder_cell, attention_mechanism) decoder_cell = rnn.OutputProjectionWrapper(attention_cell, 128, activation=tf.tanh) encoder_state = decoder_cell.zero_state( batch_size, tf.float32).clone(cell_state=encoder_state) output_layer = tf.layers.Dense( self.num_words, kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1)) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): # 定义training decoder training_helper = seq2seq.TrainingHelper( inputs=y_embedding, sequence_length=y_sequence_length) training_decoder = seq2seq.BasicDecoder( decoder_cell, training_helper, encoder_state, output_layer) # impute_finish 标记为True时,序列读入<eos>后不再进行计算,保持state不变并且输出全0 training_output, _, _ = seq2seq.dynamic_decode( training_decoder, # 加上<GO>和<EOS> maximum_iterations=self.max_sentence_length + 2, impute_finished=True) # predict decoder predict_helper = seq2seq.GreedyEmbeddingHelper( embedding, tf.fill([batch_size], self.word2index['GO']), self.word2index['EOS']) predict_decoder = seq2seq.BasicDecoder(decoder_cell, predict_helper, encoder_state, output_layer) predict_output, _, _ = seq2seq.dynamic_decode( predict_decoder, maximum_iterations=self.max_sentence_length + 2, impute_finished=True) # loss function training_logits = tf.identity(training_output.rnn_output, name='training_logits') predicting_logits = tf.identity(predict_output.rnn_output, name='predicting') masks = tf.sequence_mask(y_sequence_length, dtype=tf.float32, name='mask') with tf.variable_scope('optimization'): loss = seq2seq.sequence_loss(training_logits, y_input, masks) optimizer = tf.train.AdamOptimizer(lr) gradients = optimizer.compute_gradients(loss) capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients) return graph, loss, train_op, predicting_logits
sess = tf.InteractiveSession() lstm_size = 300 str_len = 50 batch_size = 200 learning_rate = 0.001 x = tf.placeholder(tf.float32, [None, None, num_chars], name='x') y = tf.placeholder(tf.float32, [None, None, num_chars], name='y') num_cells = 2 ## lstm: cells = [rnn.BasicLSTMCell(lstm_size) for _ in range(num_cells)] multicell = rnn.MultiRNNCell(cells) projection = rnn.OutputProjectionWrapper(multicell, num_chars) # outputs for training: rnn_outputs, final_state = tf.nn.dynamic_rnn(projection, x, dtype=tf.float32) xe = tf.nn.softmax_cross_entropy_with_logits(logits=rnn_outputs, labels=y) total_loss = tf.reduce_mean(xe) train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss) # outputs for sequential text generation: seq_init = projection.zero_state(1, dtype=tf.float32) seq_len = tf.placeholder(dtype=tf.int32, name='seq_len') seq_output, seq_state = tf.nn.dynamic_rnn(projection, x, initial_state=seq_init,