def __init__(self, num_cells, input_dim, seq_length, name, activation=tf.nn.tanh, dynamic=False, bidirectional=False): """ Parameters ---------- num_cells : int Number of neurons in the layer. input_dim : int Dimensionality of the input vectors, e.t. number of features. Dimensionality example: [batch_size, seq_length, num_features(this is input_dim in this case)]. seq_length : int Max length of the input sequences. activation : tensorflow function Activation function of the layer. dynamic : boolean Influences whether the layer will be working as dynamic RNN or static. The difference between static and dynamic is that in case of static TensorFlow builds static graph and the RNN will always go through each time step in the sequence. In case of dynamic TensorFlow will be creating RNN `in a while loop`, that is to say that using dynamic RNN you can pass sequences of variable length, but you have to provide list of sequences' lengthes. Currently API for using dynamic RNNs is not provided. WARNING! THIS PARAMETER DOESN'T PLAY ANY ROLE IF YOU'RE GONNA STACK RNN LAYERS. bidirectional : boolean Influences whether the layer will be bidirectional. WARNING! THIS PARAMETER DOESN'T PLAY ANY ROLE IF YOU'RE GONNA STACK RNN LAYERS. """ self.name = str(name) self.num_cells = num_cells self.input_dim = input_dim self.seq_length = seq_length self.dynamic = dynamic self.bidirectional = bidirectional self.f = activation self.cells = LSTMCell(num_units=num_cells, activation=activation, dtype=tf.float32) # Responsible for being RNN whether bidirectional or vanilla self.cell_type = CellType.get_cell_type(bidirectional, dynamic) self.cells.build(inputs_shape=[None, tf.Dimension(self.input_dim)]) self.params = self.cells.variables self.param_common_name = self.name + f'_{num_cells}_{input_dim}_{seq_length}' self.named_params_dict = {(self.param_common_name + '_' + str(i)): param for i, param in enumerate(self.params)}
def rnn_layers(x, seq_length, training, hidden_num=100, layer_num=3, class_n=5): cells_fw = list() cells_bw = list() for i in range(layer_num): #cell_fw = BNLSTMCell(hidden_num,training = training)#,training) #cell_bw = BNLSTMCell(hidden_num,training = training)#,training) cell_fw = LSTMCell(hidden_num) cell_bw = LSTMCell(hidden_num) cells_fw.append(cell_fw) cells_bw.append(cell_bw) with tf.variable_scope('BDLSTM_rnn') as scope: lasth, _, _ = stack_bidirectional_dynamic_rnn( cells_fw=cells_fw, cells_bw=cells_bw, inputs=x, sequence_length=seq_length, dtype=tf.float32, scope=scope) #shape of lasth [batch_size,max_time,hidden_num*2] batch_size = lasth.get_shape().as_list()[0] max_time = lasth.get_shape().as_list()[1] with tf.variable_scope('rnn_fnn_layer'): weight_out = tf.Variable(tf.truncated_normal( [2, hidden_num], stddev=np.sqrt(2.0 / (2 * hidden_num))), name='weights') biases_out = tf.Variable(tf.zeros([hidden_num]), name='bias') weight_class = tf.Variable(tf.truncated_normal([hidden_num, class_n], stddev=np.sqrt( 2.0 / hidden_num)), name='weights_class') bias_class = tf.Variable(tf.zeros([class_n]), name='bias_class') lasth_rs = tf.reshape(lasth, [batch_size, max_time, 2, hidden_num], name='lasth_rs') lasth_output = tf.nn.bias_add(tf.reduce_sum(tf.multiply( lasth_rs, weight_out), axis=2), biases_out, name='lasth_bias_add') lasth_output_rs = tf.reshape(lasth_output, [batch_size * max_time, hidden_num], name='lasto_rs') logits = tf.reshape(tf.nn.bias_add( tf.matmul(lasth_output_rs, weight_class), bias_class), [batch_size, max_time, class_n], name="rnn_logits_rs") return logits
def discriminator_rnn(x, labels, df_dim, number_classes, kernel=(3, 3), strides=(2, 2), dilations=(1, 1), pooling='avg', update_collection=None, act=tf.nn.relu, scope_name='Discriminator', reuse=False): num_layers = 3 num_nodes = [int(8 / 2), df_dim, df_dim] x = tf.transpose(tf.squeeze(x), perm=[0, 2, 1]) with tf.variable_scope(scope_name) as scope: if reuse: scope.reuse_variables() # Define LSTM cells enc_fw_cells = [ LSTMCell(num_nodes[layer], name="fw_" + str(layer)) for layer in range(num_layers) ] enc_bw_cells = [ LSTMCell(num_nodes[layer], name="bw_" + str(layer)) for layer in range(num_layers) ] # Connect LSTM cells bidirectionally and stack (all_states, fw_state, bw_state) = stack_bidirectional_dynamic_rnn(cells_fw=enc_fw_cells, cells_bw=enc_bw_cells, inputs=x, dtype=tf.float32) # Concatenate results for k in range(num_layers): if k == 0: con_c = tf.concat((fw_state[k].c, bw_state[k].c), 1) con_h = tf.concat((fw_state[k].h, bw_state[k].h), 1) else: con_c = tf.concat((con_c, fw_state[k].c, bw_state[k].c), 1) con_h = tf.concat((con_h, fw_state[k].h, bw_state[k].h), 1) output = all_states[:, x.get_shape.as_list()[2]] output = ops.snlinear(output, 1, update_collection=update_collection, name='d_sn_linear') return output, tf.concat((fw_state[2].c, bw_state[2].c), 1)
def __init__(self): print("tensorflow version: ", tf.__version__) tf.reset_default_graph() self.encoder_vec_file = os.path.join(BASE_DIR, 'dialog', 'preprocessing/enc.vec') self.decoder_vec_file = os.path.join(BASE_DIR, 'dialog', 'preprocessing/dec.vec') self.encoder_vocabulary = os.path.join(BASE_DIR, 'dialog', 'preprocessing/enc.vocab') self.decoder_vocabulary = os.path.join(BASE_DIR, 'dialog', 'preprocessing/dec.vocab') self.dictFile = os.path.join(BASE_DIR, 'dialog', 'word_dict.txt') self.batch_size = 1 self.max_batches = 10000 self.show_epoch = 100 self.model_path = os.path.join(BASE_DIR, 'dialog', 'model/') #自定义业务字段 self.keyword = list(Keyword.objects.all().values_list("content", flat=True)) # jieba导入词典 # jieba.load_userdict(self.dictFile) for each in self.keyword: jieba.add_word(each) self.model = dynamicSeq2seq(encoder_cell=LSTMCell(40), decoder_cell=LSTMCell(40), encoder_vocab_size=600, decoder_vocab_size=1600, embedding_size=20, attention=False, bidirectional=False, debug=False, time_major=True) self.location = ["杭州", "重庆", "上海", "北京"] self.user_info = {"__username__": "yw", "__location__": "重庆"} self.robot_info = {"__robotname__": "Rr"} self.dec_vocab = {} self.enc_vocab = {} self.dec_vecToSeg = {} tag_location = '' # with open(self.encoder_vocabulary, "r") as enc_vocab_file: # for index, word in enumerate(enc_vocab_file.readlines()): # self.enc_vocab[word.strip()] = index with open(self.encoder_vocabulary, "r") as enc_vocab_file: for index, word in enumerate(enc_vocab_file.readlines()): self.enc_vocab[word.strip()] = index with open(self.decoder_vocabulary, "r") as dec_vocab_file: for index, word in enumerate(dec_vocab_file.readlines()): self.dec_vecToSeg[index] = word.strip() self.dec_vocab[word.strip()] = index
def biLSTM_layer_op(self): with tf.variable_scope("bi-lstm", reuse=tf.AUTO_REUSE): cell_fw = LSTMCell(self.hidden_dim) cell_bw = LSTMCell(self.hidden_dim) (output_fw_seq, output_bw_seq), (encoder_fw_final_state, encoder_bw_final_state) = \ tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=self.word_embeddings, sequence_length=self.sequence_lengths, dtype=tf.float32) output = tf.concat([output_fw_seq, output_bw_seq], axis=-1) output = tf.nn.dropout(output, self.dropout_pl) encoder_final_state_h = tf.concat((encoder_fw_final_state.h, encoder_bw_final_state.h), 1) with tf.variable_scope("proj", reuse=tf.AUTO_REUSE): W = tf.get_variable(name="W", shape=[2 * self.hidden_dim, self.num_tags], # 实体的个数 initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) b = tf.get_variable(name="b", shape=[self.num_tags], initializer=tf.zeros_initializer(), dtype=tf.float32) w_intent = tf.get_variable(name="W_intent", shape=[2 * self.hidden_dim, self.intent_counts], # intent的个数 initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) b_intent = tf.get_variable(name="b_intent", shape=[self.intent_counts], initializer=tf.zeros_initializer(), dtype=tf.float32) # slot s = tf.shape(output) # shape is (time_step, batch_size, 2*dim) output = tf.reshape(output, [-1, 2*self.hidden_dim]) # shape = (time_step*batch_size, 2*dim) pred = tf.matmul(output, W) + b self.logits = tf.reshape(pred, [-1, s[1], self.num_tags]) # shape is (time_step, batch_size) # intent intent_logits = tf.add(tf.matmul(encoder_final_state_h, w_intent), b_intent) # 得到意图的识别 self.intent = tf.argmax(intent_logits, axis=1) # 定义intent分类的损失 cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(self.intent_targets, depth=self.intent_counts, dtype=tf.float32), logits=intent_logits) self.loss_intent = tf.reduce_mean(cross_entropy)
def model(self): # (Bi-GRU) layers rnn_outputs, _ = bi_rnn(LSTMCell(self.hidden_size), LSTMCell(self.hidden_size), inputs=self.batch_embedded, dtype=tf.float32) tf.summary.histogram('RNN_outputs', rnn_outputs) if isinstance(rnn_outputs, tuple): rnn_outputs = tf.concat(rnn_outputs, 2) print('rnn_outputs.shape:', rnn_outputs.shape) rnn_outputs = tf.reduce_mean(rnn_outputs, axis=2) print('rnn_outputs.shape:', rnn_outputs.shape) self.output = tf.reduce_sum(rnn_outputs, axis=1)
def add_bilstm(self): with tf.variable_scope("bi-lstm"): cell_fw = LSTMCell(256) cell_bw = LSTMCell(256) (output_fw_seq, output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=self.bert_embeddings, # sequence_length=self.sequence_lengths, dtype=tf.float32) output = tf.concat([output_fw_seq, output_bw_seq], axis=-1) self.bilstm_embeddings = tf.nn.dropout(output, self.keep_rate)
def attention_layer(self, value, hiddenUnits, numLabels, name): # todo:finish this network and build a class with tf.name_scope("attention_layer"): # y = tf.placeholder(dtype=tf.float32, shape=[None, self.numLabels], name="outputs" with tf.variable_scope("rnn_attention_" + name, reuse=False): with tf.variable_scope("rnn_encoder_variable", reuse=False): cell = LSTMCell(num_units=hiddenUnits, initializer=tf.truncated_normal_initializer) outputs, lastStates = layer.dynamicRnnLayer(value, cell) with tf.variable_scope("rnn_attention_variable", reuse=False): attentionCell = LSTMCell(num_units=1, initializer=tf.truncated_normal_initializer) attentions, lastAttentionState = layer.dynamicRnnLayer(outputs, attentionCell) attentionEmbedding = tf.matmul(attentions, outputs, transpose_a=True) squeeze = tf.squeeze(attentionEmbedding,axis=1) return squeeze
def _build_net(self): #词嵌入 embedding = tf.get_variable( "embedding", [Config.data.vocab_size, Config.model.embed_dim], dtype=tf.float32) self.word_embeddings = tf.nn.embedding_lookup(params=embedding, ids=self.batch_x_input, name="word_embeddings") cell1 = tf.contrib.rnn.DropoutWrapper(LSTMCell( Config.model.hidden_size), output_keep_prob=self.keep_prob) cell2 = tf.contrib.rnn.DropoutWrapper(LSTMCell( Config.model.hidden_size), output_keep_prob=self.keep_prob) with tf.variable_scope("bi-lstm"): _, (output_state_fw, output_state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell1, cell_bw=cell2, inputs=self.word_embeddings, sequence_length=self.sequence_lengths, dtype=tf.float32) #output_states为(output_state_fw, output_state_bw),包含了前向和后向最后的隐藏状态的组成的元组state。 #state由(c,h)组成,分别代表memory cell和hidden state,因此维度是Config.model.hidden_size*4 output = tf.concat([ output_state_fw[0], output_state_fw[1], output_state_bw[0], output_state_bw[1] ], axis=1) output = tf.reshape(output, [-1, Config.model.hidden_size * 4]) with slim.arg_scope( [slim.fully_connected], weights_regularizer=layers.l2_regularizer(0.0005), normalizer_fn=slim.batch_norm, normalizer_params={'is_training': self.is_training}): net = slim.fully_connected(output, 64, activation_fn=tf.nn.relu) net = slim.dropout(net, keep_prob=self.keep_prob, is_training=self.is_training) net = slim.fully_connected(output, 2, activation_fn=None) with tf.variable_scope("output"): self.logits = net #这样方便在Java里直接取到值 self.scores = tf.reduce_max(tf.nn.softmax(self.logits), axis=1, name="scores") self.predictions = tf.argmax(self.logits, 1, name="predictions")
def positive_propagation(self): with tf.variable_scope('lstm-crf'): cell_fw = LSTMCell(self.hidden_dim, forget_bias=self.forget_bias) cell_bw = LSTMCell(self.hidden_dim, forget_bias=self.forget_bias) # inputs(self.input_x)的shape通常是[batch_size, sequence_length, dim_embedding] # output_fw_seq和output_bw_seq的shape都是[batch_size, sequence_length, num_units] (output_fw_seq, output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, self.input_x, self.sequence_lengths, dtype=tf.float32) out_put = tf.concat([output_fw_seq, output_bw_seq], axis=-1) # 对正反向的输出进行合并 out_put = tf.nn.dropout(out_put, self.dropout_pl) # 防止过拟合 # 循环神经网络之后进行一次线性变换,用于把输出转换为crf_log_likelihood的接收格式,主要 # 是把最后一维的维度转换为num_tags,以便于随后进行优化 with tf.variable_scope('proj'): W = tf.get_variable( name='W', shape=[2 * self.hidden_dim, self.num_tags], # xavier初始化器是用来保持每一层的梯度大小都差不多相同。 initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) b = tf.get_variable(name='b', shape=[self.num_tags], initializer=tf.zeros_initializer, dtype=tf.float32) s = tf.shape(out_put) # 正向传播的结果计算 out_put = tf.reshape(out_put, [-1, 2 * self.hidden_dim]) # 就是一个维度变换 pred = tf.matmul(out_put, W) + b # 进行线性变换 # s[1]是所选取的最大句子长度 logits = tf.reshape(pred, [-1, s[1], self.num_tags]) # CRF损失值的计算 # transition_params是CRF的转换矩阵,会被自动计算出来 # tag_indices:填入维度为[batch_size, max_seq_len]的矩阵,也就是Golden标签,注意这里的标签都是以索引方式表示的这个就是真实的标签序列了 # sequence_lengths:维度为[batch_size]的向量,记录了每个序列的长度 # inputs:unary potentials,也就是每个标签的预测概率值,这个值根据实际情况选择计算方法,CNN,RNN...都可以 # crf_log_likelihood求的是CRF的损失值,牵扯到前向后向算法,会获得概率转移矩阵 log_likelihood, transition_params = crf_log_likelihood( inputs=logits, tag_indices=self.labels, sequence_lengths=self.sequence_lengths) print(log_likelihood) loss = -tf.reduce_mean(log_likelihood) return loss, transition_params, logits
def _add_bidir_lstm(self, input, lstm_hidden_size, layer_num): with tf.name_scope( "bidir_LSTM_layer_" + str(layer_num) ): encoder_f_cell = LSTMCell( lstm_hidden_size ) encoder_b_cell = LSTMCell( lstm_hidden_size ) (encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state) = \ tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_f_cell, cell_bw=encoder_b_cell, inputs=input, dtype=tf.float32) # input has shape [batch, time, feature_dim] (default time major == False) fw_output = tf.nn.embedding_lookup(tf.transpose( encoder_fw_outputs, [1, 0, 2] ), self.time_step-1) bw_output = tf.nn.embedding_lookup(tf.transpose( encoder_bw_outputs, [1, 0, 2] ), self.time_step-1) encoder_outputs = tf.concat( (fw_output, bw_output), 1 ) return encoder_outputs
def __init__(self, config, scope, max_action_num): super(Qnetwork, self).__init__() self.scope = scope with tf.variable_scope(self.scope): self.config = config self.max_action_num = max_action_num self.LSTM_dim = config.hidden_dim self.initializer = tf.contrib.layers.xavier_initializer( ) # variables initializer self.cell = LSTMCell(self.LSTM_dim, initializer=self.initializer) self.fn_layer = tf.layers.Dense(30, activation=tf.nn.tanh, name='fn_layer') self.out_layer = tf.layers.Dense(max_action_num, name='out_layer')
def __call__(self, img, formula, dropout): """Decodes an image into a sequence of token Args: img: encoded image (tf.Tensor) shape = (N, H, W, C) (N, H/2/2/2-2, W/2/2/2-2, 512) formula: (tf.placeholder), shape = (N, T) Returns: pred_train: (tf.Tensor), shape = (?, ?, vocab_size) logits of each class pret_test: (structure) - pred.test.logits, same as pred_train - pred.test.ids, shape = (?, config.max_length_formula) 主要用这个,id 直接就是 token 的 id 了 """ dim_embeddings = self._config.attn_cell_config.get("dim_embeddings") dim_e = self._config.attn_cell_config["dim_e"] num_units = self._config.attn_cell_config["num_units"] with tf.variable_scope("Decoder"): embedding_table = tf.get_variable("embedding_table", shape=[self._n_tok, dim_embeddings], dtype=tf.float32, initializer=embedding_initializer()) start_token = tf.get_variable("start_token", shape=[dim_embeddings], dtype=tf.float32, initializer=embedding_initializer()) batch_size = tf.shape(img)[0] # training with tf.variable_scope("AttentionCell", reuse=False): embeddings = get_embeddings(formula, embedding_table, dim_embeddings, start_token, batch_size) # (N, T, dim_embedding) attn_meca = AttentionMechanism(img, dim_e) recu_cell = LSTMCell(num_units) attn_cell = AttentionCell(recu_cell, attn_meca, dropout, self._config.attn_cell_config, self._n_tok) train_outputs, _ = tf.nn.dynamic_rnn(attn_cell, embeddings, initial_state=attn_cell.initial_state()) # decoding with tf.variable_scope("AttentionCell", reuse=True): attn_meca = AttentionMechanism(img, dim_e, tiles=self._tiles) recu_cell = LSTMCell(num_units, reuse=True) attn_cell = AttentionCell(recu_cell, attn_meca, dropout, self._config.attn_cell_config, self._n_tok) if self._config.decoding == "greedy": decoder_cell = GreedyDecoderCell(embedding_table, attn_cell, batch_size, start_token, self._id_end) elif self._config.decoding == "beam_search": decoder_cell = BeamSearchDecoderCell(embedding_table, attn_cell, batch_size, start_token, self._id_end, self._config.beam_size, self._config.div_gamma, self._config.div_prob) test_outputs, _ = dynamic_decode(decoder_cell, self._config.max_length_formula+1) return train_outputs, test_outputs
def bi_lstm_op(self): """build the biLSTMs network. Return the y_pred""" with tf.variable_scope("bi-lstm"): _word_embeddings = tf.Variable( self.embeddings, dtype=tf.float32, trainable=self.update_embedding, #是否在训练过程中更新该变量 name="_word_embeddings") self.word_embeddings = tf.nn.embedding_lookup( params=_word_embeddings, ids=self.word_ids, name="word_embeddings") #inputs = tf.nn.embedding_lookup(embedding, self.x) #双向LSTM层 cell_fw = LSTMCell(self.config.hidden_size) #前向 cell_bw = LSTMCell(self.config.hidden_size) #后向 #shape (batchsize, timestep, hidden_size) (output_fw_seq, output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=self.word_embeddings, sequence_length=self.sequence_lengths, dtype=tf.float32) #前向后向concat到一起获得最终输出 output = tf.concat([output_fw_seq, output_bw_seq], axis=-1) #最终输出 with tf.variable_scope("proj"): W = tf.get_variable( name="W", shape=[2 * self.config.hidden_size, self.config.class_num], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) b = tf.get_variable(name="b", shape=[self.config.class_num], initializer=tf.zeros_initializer(), dtype=tf.float32) s = tf.shape(output) output = tf.reshape(output, [-1, 2 * self.config.hidden_size]) pred = tf.matmul(output, W) + b self.logits = tf.reshape(pred, [-1, s[1], self.config.class_num]) temp = tf.reshape(tf.argmax(self.logits, -1), [-1]) correct_prediction = tf.equal(tf.cast(temp, tf.int32), tf.reshape(self.labels, [-1])) self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32))
def __biLSTM_layer_op(self): with tf.variable_scope("bi-lstm"): cell_fw = LSTMCell(self.hidden_layer_size) cell_bw = LSTMCell(self.hidden_layer_size) if self.flags == 'att': att1 = Att(self.dropout_pl) att_out = att1.multiAttention_layer_op( queries=self.word_embeddings, keys=self.word_embeddings, values=self.word_embeddings, num_heads=6, scope="bilstm_attention", scope1='bilstm_att1') (output_fw_seq, output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=att_out, sequence_length=self.sequence_lengths, dtype=tf.float32) else: (output_fw_seq, output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=self.word_embeddings, sequence_length=self.sequence_lengths, dtype=tf.float32) output = tf.concat([output_fw_seq, output_bw_seq, self.Att_Conv], axis=-1) output = tf.nn.dropout(output, self.dropout_pl) with tf.variable_scope("proj"): W = tf.get_variable( name="W", shape=[2 * self.hidden_layer_size + 128, self.output_size], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32) b = tf.get_variable(name="b", shape=[self.output_size], initializer=tf.zeros_initializer(), dtype=tf.float32) s = tf.shape(output) output = tf.reshape(output, [-1, 2 * self.hidden_layer_size + 128]) pred = tf.matmul(output, W) + b self.logits = tf.reshape(pred, [-1, s[1], self.output_size])
def BiLSTM_layer(self, lstm_inputs, seq_lengths, layer_num=0, unit_num=None): if unit_num == None: unit_num = self.lstm_hidden_dim with tf.variable_scope("bilstm_" + str(layer_num), reuse=tf.AUTO_REUSE): cell_fw, cell_bw = LSTMCell(unit_num), LSTMCell(unit_num) (output_fw_seq, output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=lstm_inputs,\ sequence_length=seq_lengths, dtype=tf.float32) output = tf.concat([output_fw_seq, output_bw_seq], axis=-1) output = tf.nn.dropout(output, self.dropout) return output
def _encoder(self, inputs): with tf.variable_scope("encoder"): self.encoder_fw_cell = LSTMCell(self.hidden_dim) self.encoder_bw_cell = LSTMCell(self.hidden_dim) outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw=self.encoder_fw_cell, cell_bw=self.encoder_fw_cell, inputs=inputs, sequence_length=self.sequence_lengths, dtype=tf.float32) outputs = tf.concat(outputs, axis=-1) state = (tf.reduce_mean((state[0][0], state[1][0]), axis=0), tf.reduce_mean((state[0][1], state[1][1]), axis=0)) outputs = tf.nn.dropout(outputs, self.dropout_pl) return outputs, state
def bi_lstm(sequence_output, mask, lstm_layer): logging.info( "+++++++++++++++++++++++++++{} lstm layer++++++++++++++++++++++++++++++++" .format(lstm_layer)) mask2len = tf.reduce_sum(mask, axis=1) with tf.variable_scope("bi-lstm"): cells_fw = [LSTMCell(FLAGS.hidden_dim) for _ in range(lstm_layer)] cells_bw = [LSTMCell(FLAGS.hidden_dim) for _ in range(lstm_layer)] output, _, _ = tf.contrib.rnn.stack_bidirectional_dynamic_rnn( cells_fw=cells_fw, cells_bw=cells_bw, inputs=sequence_output, sequence_length=mask2len, dtype=tf.float32) return output
def biLSTM_layer(self): with tf.variable_scope("bi-lstm", reuse=SharedModel.reuse): cell_fw = LSTMCell(self.hidden_size) cell_bw = LSTMCell(self.hidden_size) (output_fw_seq, output_bw_seq), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=self.embeddings, sequence_length=self.sequence_lengths, dtype=tf.float32) output = tf.concat([output_fw_seq, output_bw_seq], axis=-1) output = tf.nn.dropout(output, self.dropout_pl) self.lstm_outputs = output
def _bilstm_layer(self, lstm_inputs, lstm_dim, lengths, name='BiLSTM_layer'): with tf.variable_scope(name): cell_fw = LSTMCell(lstm_dim) cell_bw = LSTMCell(lstm_dim) (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=lstm_inputs, dtype=tf.float32, sequence_length=lengths) return tf.concat([output_fw, output_bw], axis=-1)
def init_model(self): self.encoder = Encoder.define( # tb Encoder cell=LSTMCell(self.hparams.enc_units), model=self, ) # identity needed so that z is not re-sampled? self.z_sample = tf.identity(self.encoder.z_sample, name="z_sample_copy") self.decoder = Decoder.define( # tb Decoder cell=LSTMCell(self.hparams.dec_units), model=self, encoder=self.encoder, z_sample=self.z_sample, )
def create_critic_network(self, Scope): inputs = tf.placeholder(shape=[1, self.max_lenth], dtype=tf.int32, name="inputs") action = tf.placeholder(shape=[1, self.max_lenth], dtype=tf.int32, name="action") action_pos = tf.placeholder(shape=[1, None], dtype=tf.int32, name="action_pos") lenth = tf.placeholder(shape=[1], dtype=tf.int32, name="lenth") lenth_up = tf.placeholder(shape=[1], dtype=tf.int32, name="lenth_up") #Lower network if Scope[-1] == 'e': vec = tf.nn.embedding_lookup(self.wordvector, inputs) print "active" else: vec = tf.nn.embedding_lookup(self.target_wordvector, inputs) print "target" cell = LSTMCell(self.dim, initializer=self.init, state_is_tuple=False) self.state_size = cell.state_size actions = tf.to_float(action) h = cell.zero_state(1, tf.float32) embedding = [] for step in range(self.max_lenth): with tf.variable_scope("Lower/"+Scope, reuse=True): o, h = cell(vec[:,step,:], h) embedding.append(o[0]) h = h *(1.0 - actions[0,step]) #Upper network embedding = tf.stack(embedding) embedding = tf.gather(embedding, action_pos, name="Upper_input") with tf.variable_scope("Upper", reuse=True): out, _ = tf.nn.bidirectional_dynamic_rnn(cell, cell, embedding, lenth_up, dtype=tf.float32, scope=Scope) if self.isAttention: out = tf.concat(out, 2) out = out[0,:,:] tmp = tflearn.fully_connected(out, self.dim, scope=Scope, name="att") tmp = tflearn.tanh(tmp) with tf.variable_scope(Scope): v_T = tf.get_variable("v_T", dtype=tf.float32, shape=[self.dim, 1], trainable=True) a = tflearn.softmax(tf.matmul(tmp,v_T)) out = tf.reduce_sum(out * a, 0) out = tf.expand_dims(out, 0) else: #out = embedding[:, -1, :] out = tf.concat((out[0][:,-1,:], out[1][:,0,:]), 1) out = tflearn.dropout(out, self.keep_prob) out = tflearn.fully_connected(out, self.grained, scope=Scope+"/pred", name="get_pred") return inputs, action, action_pos, lenth, lenth_up, out
def _encoder(self): word_embeddings = self._get_embeddings(self.input_placeholder) expanded_answer_position = tf.expand_dims(self.answer_position, 2) word_embeddings_answer_position = tf.concat( (word_embeddings, expanded_answer_position), 2) encoder_lstm_cell = LSTMCell( num_units=self.config.encoder_hidden_state_size) ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_lstm_cell, cell_bw=encoder_lstm_cell, inputs=word_embeddings_answer_position, sequence_length=self.input_length_placeholder, dtype=tf.float32) encoder_output = tf.concat([encoder_fw_outputs, encoder_bw_outputs], 2) encoder_final_state_c = tf.concat( (encoder_fw_final_state.c, encoder_bw_final_state.c), 1) encoder_final_state_h = tf.concat( (encoder_fw_final_state.h, encoder_bw_final_state.h), 1) encoder_final_state = LSTMStateTuple(c=encoder_final_state_c, h=encoder_final_state_h) # decoder_lstm_cell = LSTMCell(decoder_hidden_state_size) # eos_step_embedded = self.get_embeddings(self.eos_time_slice) # pad_step_embedded = self.get_embeddings(self.pad_time_slice) return encoder_final_state
def BiLSTM(x, seqlen, weights, biases): cell = LSTMCell(n_hidden) cell = tf.nn.rnn_cell.DropoutWrapper( cell, output_keep_prob=0.5) #giam hien tuong overfitting # cell = tf.nn.rnn_cell.MultiRNNCell([cell] * self.num_layers, state_is_tuple=True) # cell_bw = tf.nn.rnn_cell.MultiRNNCell([self.encoder_cell] * self.num_layers, state_is_tuple=True) ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_state, encoder_bw_state)) = ( tf.nn.bidirectional_dynamic_rnn( cell_fw=cell, cell_bw=cell, inputs=x, # sequence_length=seqlen, time_major=True, dtype=tf.float32)) encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2) if isinstance(encoder_fw_state, LSTMStateTuple): encoder_state_c = tf.concat((encoder_fw_state.c, encoder_bw_state.c), 1, name='bidirectional_concat_c') encoder_state_h = tf.concat((encoder_fw_state.h, encoder_bw_state.h), 1, name='bidirectional_concat_h') encoder_state = LSTMStateTuple(c=encoder_state_c, h=encoder_state_h) elif isinstance(encoder_fw_state, tf.Tensor): encoder_state = tf.concat((encoder_fw_state, encoder_bw_state), 1, name='bidirectional_concat') return tf.matmul(encoder_outputs, weights['out']) + biases['out']
def create_Upper_LSTM_cell(self, Scope): cell = LSTMCell(self.dim, initializer=self.init, state_is_tuple=False) state_l = tf.placeholder(tf.float32, shape = [1, cell.state_size], name="cell_state_l") state_d = tf.placeholder(tf.float32, shape = [1, self.dim], name="cell_state_d") with tf.variable_scope(Scope, reuse=False): _, out = cell(state_d, state_l) return state_l, state_d, out
def __init__(self, mode, cell_type, num_hidden, embedding_seman_size, embedding_senti_size, vocab_size, max_seq_len, decode_type, mle_learning_rate, rl_learning_rate, softmax_temperature, grad_clip, scale_sentiment): self.mode = mode self.cell_type = cell_type self.vocab_size = vocab_size self.embedding_seman_size = embedding_seman_size self.embedding_senti_size = embedding_senti_size self.num_hidden = num_hidden self.max_seq_len = max_seq_len self.grad_clip = grad_clip self.sample = True if decode_type == constants.RANDOM else False self.keep_prob = 0.5 if mode == constants.TRAIN else 1.0 self.MLE_learning_rate = mle_learning_rate self.RL_learning_rate = rl_learning_rate self.softmax_temperature = softmax_temperature self.scale_sentiment = scale_sentiment print('self.MLE_learning_rate', self.MLE_learning_rate) print('self.RL_learning_rate', self.RL_learning_rate) self._check_args() if self.cell_type == 'lstm': self.cell_fn = lambda x: DropoutWrapper(LSTMCell(x, state_is_tuple=True), output_keep_prob=self.keep_prob) elif self.cell_type == 'gru': self.cell_fn = lambda x: DropoutWrapper(GRUCell(x), output_keep_prob=self.keep_prob) self._create_placeholders() self._create_variable() self._create_network()
def single_layer_rnn(cell_num, x, sequence_len): cell = LSTMCell(cell_num) outputs, _ = tf.nn.dynamic_rnn(cell, x, sequence_length=sequence_len, dtype=tf.float32) return outputs
def __init__(self, inputs, labels, num_hidden, cell_type, wtable, keep_prob=1.0, sequence_length=None, learning_rate=0.001): self.inputs = inputs self.labels = labels self.wtable = wtable self.sequence_length = sequence_length self.num_hidden = num_hidden self.num_classes = int(labels.get_shape()[1]) cells = [] if cell_type == 'PLSTM': assert len(inputs) == 2, "Inputs should be a tuple of (t, x)" for n in num_hidden: cell = PhasedLSTMCell(n, use_peepholes=True) cell = DropoutWrapper(cell, input_keep_prob=keep_prob) cells.append(cell) self.stacked_cell = MultiPRNNCell(cells) elif cell_type == 'LSTM': for n in num_hidden: cell = LSTMCell(n, use_peepholes=True) cell = DropoutWrapper(cell, input_keep_prob=keep_prob) cells.append(cell) self.stacked_cell = MultiRNNCell(cells) else: raise ValueError('Unit {} not implemented.'.format(cell_type)) self.learning_rate = learning_rate self.logits self.optimize self.accuracy
def single_rnn_cell(): single_cell = GRUCell( self.rnn_size) if self.cell_type == 'GRU' else LSTMCell( self.rnn_size) basic_cell = DropoutWrapper(single_cell, output_keep_prob=self.keep_prob) return basic_cell
def encode(self, inputs): # Tensor blocks holding the input sequences [Batch Size, Sequence Length, Features] # self.input_ = tf.placeholder(tf.float32, [self.batch_size, self.max_length, self.input_dimension], name="input_raw") with tf.variable_scope("embedding"): # Embed input sequence W_embed = tf.get_variable( "weights", [1, self.input_dimension, self.input_embed], initializer=self.initializer) # +2 for TW feat. here too embedded_input = tf.nn.conv1d(inputs, W_embed, 1, "VALID", name="embedded_input") # Batch Normalization embedded_input = tf.layers.batch_normalization( embedded_input, axis=2, training=self.is_training, name='layer_norm', reuse=None) with tf.variable_scope("dynamic_rnn"): # Encode input sequence cell1 = LSTMCell( self.input_embed, initializer=self.initializer ) # BNLSTMCell(self.num_neurons, self.training) or cell1 = DropoutWrapper(cell1, output_keep_prob=0.9) # Return the output activations [Batch size, Sequence Length, Num_neurons] and last hidden state as tensors. self.encoder_output, encoder_state = tf.nn.dynamic_rnn( cell1, embedded_input, dtype=tf.float32) return self.encoder_output