def bidirectional_RNN(self, num_hidden, inputs): """ desc: create bidirectional rnn layer args: num_hidden: number of hidden units inputs: input word or sentence returns: concatenated encoder and decoder outputs """ with tf.name_scope("bidirectional_RNN"): encoder_fw_cell = rnn.GRUCell(num_hidden) encoder_bw_cell = rnn.GRUCell(num_hidden) ((encoder_fw_outputs, encoder_bw_outputs), (_, _)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_fw_cell, cell_bw=encoder_bw_cell, inputs=inputs, dtype=tf.float32, time_major=True) encoder_outputs = tf.concat( (encoder_fw_outputs, encoder_bw_outputs), 2) return encoder_outputs
def BidirectionalGRUEncoder(self,inputs,name): ''' inputs: [batch,max_time,embedding_size] output: [batch,max_time,2*hidden_size] ''' with tf.variable_scope(name): fw_gru_cell = rnn.GRUCell(self.hidden_size) bw_gru_cell = rnn.GRUCell(self.hidden_size) fw_gru_cell = rnn.DropoutWrapper(fw_gru_cell,output_keep_prob = self.dropout_keep_prob) bw_gru_cell = rnn.DropoutWrapper(bw_gru_cell,output_keep_prob = self.dropout_keep_prob) (fw_outputs,bw_outputs),(fw_outputs_sta,bw_outputs_sta) = tf.nn.bidirectional_dynamic_rnn( cell_fw = fw_gru_cell, cell_bw = bw_gru_cell, inputs = inputs, sequence_length = getSequenceRealLength(inputs), dtype = tf.float32) outputs = tf.concat((fw_outputs,bw_outputs),2) return outputs
def _init_inter_review_encoder(self): # reviews encoding with tf.variable_scope('inter-review-encoder') as scope: review_inputs = tf.reshape( self.sent_outputs, [-1, self.max_review_length, 4 * self.emb_size]) sent_inputs_mask_temp = tf.cast(self.docs, tf.bool) sent_inputs_mask = tf.reduce_any(sent_inputs_mask_temp, reduction_indices=[3]) review_inputs_mask = tf.reduce_any(sent_inputs_mask, reduction_indices=[2]) # reviews GRU encoder cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw') cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw') init_state_fw = tf.tile(tf.get_variable( 'init_state_fw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(review_inputs)[0], 1]) init_state_bw = tf.tile(tf.get_variable( 'init_state_bw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(review_inputs)[0], 1]) rnn_outputs, _ = bidirectional_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=review_inputs, input_lengths=self.review_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) reviews_encoding = disan(rnn_outputs, review_inputs_mask, 'DiSAN', self.dropout_rate, self.is_training, 0., 'elu', None, 'reviews-encoding') self.review_outputs = reviews_encoding
def build_cell(units, cell_type='lstm', num_layers=1): if num_layers > 1: cell = rnn.MultiRNNCell([ build_cell(units, cell_type, 1) for _ in range(num_layers) ]) else: if cell_type == "lstm": cell = rnn.LSTMCell(units) elif cell_type == "gru": cell = rnn.GRUCell(units) else: raise ValueError('Do not support %s' % cell_type) return cell
def _witch_cell(self, dropout): """ RNN 类型 :return: """ cell_tmp = None if self.params.cell_type == 'lstm': cell_tmp = rnn.LSTMCell(self.params.hidden_size) elif self.params.cell_type == 'gru': cell_tmp = rnn.GRUCell(self.params.hidden_size) cell_tmp = rnn.DropoutWrapper(cell_tmp, output_keep_prob=dropout) return cell_tmp
def __init__(self, n_hidden, cell="GRU"): """ qa_rnn module init. :param n_hidden: num of hidden units :param cell: gru|lstm|basic_rnn """ self.rnn_cell = rnn.BasicRNNCell(num_units=n_hidden) if cell == "GRU": self.rnn_cell = rnn.GRUCell(num_units=n_hidden) elif cell == "LSTM": self.rnn_cell = rnn.LSTMCell(num_units=n_hidden) else: raise Exception(cell + " not supported.")
def BidirectionalGRUEncoder(self, inputs, name): """ 双向GRU编码层,将一segment中的所有character或者一个datagram中的所有segment进行编码得到一个2xhidden_size的输出向量 然后在输入inputs的shape是: input:[batch, max_time, embedding_size] output:[batch, max_time, 2*hidden_size] :return: """ with tf.name_scope(name), tf.variable_scope(name, reuse=tf.AUTO_REUSE): fw_gru_cell = rnn.GRUCell(self.hidden_size) bw_gru_cell = rnn.GRUCell(self.hidden_size) fw_gru_cell = rnn.DropoutWrapper(fw_gru_cell, output_keep_prob=self.dropout_keep_prob) bw_gru_cell = rnn.DropoutWrapper(bw_gru_cell, output_keep_prob=self.dropout_keep_prob) # fw_outputs和bw_outputs的size都是[batch_size, max_time, hidden_size] (fw_outputs, bw_outputs), (fw_outputs_state, bw_outputs_state) = tf.nn.bidirectional_dynamic_rnn( cell_fw=fw_gru_cell, cell_bw=bw_gru_cell, inputs=inputs, sequence_length=getSequenceLength(inputs), dtype=tf.float32 ) # outputs的shape是[batch_size, max_time, hidden_size*2] outputs = tf.concat((fw_outputs, bw_outputs), 2) return outputs
def call_cell(cell_type, nun_hid, dropout): cell = None # choosing rnn type if cell_type == "gru": cell = rnn.GRUCell(nun_hid) elif cell_type == "lstm": cell = CustomLSTMCell(nun_hid, forget_bias=1.0) elif cell_type == "rnn": cell = rnn.BasicRNNCell(nun_hid) # wrapping in dropout return tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=1.0 - dropout)
def input_module(self): """encode raw texts into vector representation""" story_embedding = tf.nn.embedding_lookup( self.embedding, self.story) # [batch_size,story_length,sequence_length,embed_size] story_embedding = tf.reshape(story_embedding, (self.batch_size, self.story_length, self.fact_len * self.embedding_size)) hidden_state = tf.ones((self.batch_size, self.hidden_size), dtype=tf.float32) cell = rnn.GRUCell(self.hidden_size) self.story_embedding, hidden_state = tf.nn.dynamic_rnn( cell, story_embedding, dtype=tf.float32, scope="input_module")
def gru_encoder(self, input, cell_size, length, scope): with tf.variable_scope(scope): cells = [] for direction in ["forward", "backward"]: with tf.variable_scope(direction): cells.append(rnn.GRUCell(cell_size)) outputs, final_states = tf.nn.bidirectional_dynamic_rnn( cell_fw=cells[0], cell_bw=cells[1], inputs=input, sequence_length=length, dtype=tf.float32) return tf.concat(outputs, -1)
def get_rnn(self, rnn_type): ''' 根据rnn_type创建RNN层 :param rnn_type: RNN类型 :return: ''' if rnn_type == 'lstm': cell = rnn.LSTMCell(num_units=self.config.hidden_dim) else: cell = rnn.GRUCell(num_units=self.config.hidden_dim) cell = rnn.DropoutWrapper(cell=cell, input_keep_prob=self.input_keep_prob) return cell
def encode_L_bidirection(seq, seq_len, dim=384, scope='encode_L_bi', reuse=tf.AUTO_REUSE, cell_type='LSTM'): with tf.variable_scope(scope, reuse=reuse) as scope: dim1 = int(math.ceil(dim / 2.0)) dim2 = int(math.floor(dim / 2.0)) log.warning(scope.name) if cell_type == 'LSTM': cell1 = rnn.BasicLSTMCell(num_units=dim1, state_is_tuple=True) cell2 = rnn.BasicLSTMCell(num_units=dim2, state_is_tuple=True) elif cell_type == 'GRU': cell1 = rnn.GRUCell(num_units=dim1) cell2 = rnn.GRUCell(num_units=dim2) else: raise ValueError('Unknown cell_type') bi_outputs, encoder_state = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell1, cell_bw=cell2, inputs=seq, sequence_length=seq_len, dtype=tf.float32) if cell_type == 'LSTM': raise RuntimeError('Check how LSTM works with bidirectional rnn') elif cell_type == 'GRU': output = tf.concat(bi_outputs, -1) output_state = tf.concat(encoder_state, -1) return output, output_state
def _init_sent_encoder(self): with tf.variable_scope('sentence') as scope: sentence_rnn_inputs = tf.reshape( self.word_outputs, [-1, self.max_num_sents, 2 * self.hidden_dim]) # sentence encoder cell_fw = rnn.GRUCell(self.hidden_dim) cell_bw = rnn.GRUCell(self.hidden_dim) init_state_fw = tf.tile( tf.get_variable('init_state_fw', shape=[1, self.hidden_dim], initializer=tf.constant_initializer(1.0)), multiples=[get_shape(sentence_rnn_inputs)[0], 1]) init_state_bw = tf.tile( tf.get_variable('init_state_bw', shape=[1, self.hidden_dim], initializer=tf.constant_initializer(1.0)), multiples=[get_shape(sentence_rnn_inputs)[0], 1]) sentence_rnn_outputs, _ = bidirectional_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=sentence_rnn_inputs, input_lengths=self.document_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) self.sentence_outputs, self.sent_att_weights, self.img_att_weights = visual_aspect_attention( text_input=sentence_rnn_outputs, visual_input=self.images, att_dim=self.att_dim, sequence_lengths=self.document_lengths) self.sentence_outputs = tf.nn.dropout( self.sentence_outputs, keep_prob=self.dropout_keep_prob)
def _rnn_cell(self, size=None, activation=None, dropout=None, residual=False): cell = rnn.GRUCell((size or self._hidden_size), activation=activation) if residual: cell = rnn.ResidualWrapper(cell) if dropout is not None: cell = rnn.DropoutWrapper(cell, input_keep_prob=(1.0 - dropout)) return cell
def build_var(self): with tf.variable_scope(self.name) as scope: with tf.variable_scope('embedding'): self.embedding = tf.get_variable('embedding', initializer=tf.constant(self.node_vec, dtype=tf.float32)) with tf.variable_scope('BiGRU'): self.gru_fw_cell = rnn.GRUCell(self.n_hidden_gru) self.gru_bw_cell = rnn.GRUCell(self.n_hidden_gru) with tf.variable_scope('attention'): self.p_step = tf.get_variable('p_step', initializer=self.initializer([1, self.n_steps]), dtype=tf.float32) self.a_geo = tf.get_variable('a_geo', initializer=self.initializer([1])) with tf.variable_scope('dense'): self.weights = { 'dense1': tf.get_variable('dense1_weight', initializer=self.initializer([2 * self.n_hidden_gru, self.n_hidden_dense1])), 'dense2': tf.get_variable('dense2_weight', initializer=self.initializer([self.n_hidden_dense1, self.n_hidden_dense2])), 'out': tf.get_variable('out_weight', initializer=self.initializer([self.n_hidden_dense2, 1])) } self.biases = { 'dense1': tf.get_variable('dense1_bias', initializer=self.initializer([self.n_hidden_dense1])), 'dense2': tf.get_variable('dense2_bias', initializer=self.initializer([self.n_hidden_dense2])), 'out': tf.get_variable('out_bias', initializer=self.initializer([1])) }
def BidirectionalGRUEncoder(self, inputs, name): GRU_cell_fw = rnn.GRUCell(self.hidden_size) GRU_cell_bw = rnn.GRUCell(self.hidden_size) GRU_cell_fw = tf.contrib.rnn.DropoutWrapper(GRU_cell_fw, input_keep_prob=0.9, output_keep_prob=0.9) GRU_cell_bw = tf.contrib.rnn.DropoutWrapper(GRU_cell_bw, input_keep_prob=0.9, output_keep_prob=0.9) #[batch_size, max_time, voc_size] with tf.variable_scope(name) as gru_scope: # gru_scope.reuse_variables() #fw_outputs和bw_outputs的size都是[batch_size, max_time, hidden_size] ((fw_outputs, bw_outputs), (_, _)) = tf.nn.bidirectional_dynamic_rnn( cell_fw=GRU_cell_fw, cell_bw=GRU_cell_bw, inputs=inputs, sequence_length=length(inputs), dtype=tf.float32) #[batch_size, max_time, hidden_size*2] outputs = tf.concat((fw_outputs, bw_outputs), 2) return outputs
def BidirectionalGRUEncoder(self, inputs, name): """ # 双向GRU的编码层,将一句话中的所有单词或者一个文档中的所有句子向量进行编码得到一个 2×hidden_size的输出向量, 然后在经过Attention层,将所有的单词或句子的输出向量加权得到一个最终的句子/文档向量。 内部使用 GRU :param inputs: :param name: :return: """ # 输入inputs的shape是 [batch_size, max_time, voc_size] = [batch_size * sent_in_doce, word_in_sent, embedding_size] with tf.variable_scope(name): GRU_cell_fw = rnn.GRUCell(self.hidden_size) GRU_cell_bw = rnn.GRUCell(self.hidden_size) # fw_outputs和bw_outputs的size都是[batch_size, max_time, hidden_size] ((fw_outputs, bw_outputs), (_, _)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=GRU_cell_fw, cell_bw=GRU_cell_bw, inputs=inputs, sequence_length=length(inputs), dtype=tf.float32) # outputs的size是[batch_size, max_time, hidden_size*2] = # [batch_size * sent_in_doce, word_in_sent, hidden_size*2] outputs = tf.concat((fw_outputs, bw_outputs), 2) return outputs
def RNN(x, weights, biases): #这里GRU是另外一种加了门的RNN,可以看成是LSTM的变体 layer = rnn.GRUCell(n_hidden) #layer=rnn.BasicLSTMCell(n_hidden, forget_bias=1.0) #包裹dropout防止过拟和 layer = rnn.DropoutWrapper(cell=layer, output_keep_prob=keep_prob) #最后一层不放dropout layer_out = rnn.GRUCell(n_hidden) #拼装成整体 #这个地方写成列表的相加会陷入死循环,没有找到原因,所以就用最笨的方法全部写出来,如果要增加层数就直接增加layer layers = rnn.MultiRNNCell(cells=[layer, layer, layer_out]) #计算实际长度 length = real_len(x) #时间推进 outputs, states = tf.nn.dynamic_rnn(cell=layers, inputs=x, dtype=tf.float32, sequence_length=length) #输出的outpus进行裁剪 outputs = cut_output(outputs, length) # 输出函数使用的是线性函数 # 时间序列的最后一个作为输出 return tf.matmul(outputs, weights['out']) + biases['out']
def dyn_rnn(inputs): cells = [rnn.GRUCell(rnn_size) for i in range(stacked_layers)] cells = [ tf.contrib.rnn.DropoutWrapper(cell=cell, output_keep_prob=drop(i, stacked_layers)) for i, cell in enumerate(cells) ] multi_cell = tf.nn.rnn_cell.MultiRNNCell(cells, state_is_tuple=True) _, states = tf.nn.dynamic_rnn(multi_cell, inputs, dtype=tf.float32, sequence_length=seql(inputs)) output = tf.layers.dense(inputs=states[-1], units=n_classes) return output
def model_fn(features, labels, mode, params): word_embeddings = get_word_embedding(features, params["vocab_size"]) rnn_fw_cell, rnn_bw_cell = None, None if FLAGS.cell_type == "lstm": rnn_fw_cell = rnn.BasicLSTMCell(FLAGS.rnn_dimension) rnn_bw_cell = rnn.BasicLSTMCell(FLAGS.rnn_dimension) elif FLAGS.cell_type == "gru": rnn_fw_cell = rnn.GRUCell(FLAGS.rnn_dimension) rnn_bw_cell = rnn.GRUCell(FLAGS.rnn_dimension) else: print("unknown cell type: %s" % FLAGS.cell_type) exit(-1) outputs, _ = tf.nn.bidirectional_dynamic_rnn(rnn_fw_cell, rnn_bw_cell, word_embeddings, dtype=tf.float32) text_embedding = tf.concat(outputs, 2) if FLAGS.use_attention: hidden_layer = attention(text_embedding, FLAGS.attention_dimension) else: hidden_layer = tf.reduce_mean(text_embedding, axis=-2) return get_estimator_spec(hidden_layer, mode, labels)
def RNN(x, weights, biases, N_lag, N_units): x = tf.unstack(x, N_lag, 1) # 3 layeres LSTM definition #rnn_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(num_units=n) for n in N_units]) #rnn_cell = rnn.MultiRNNCell([tf.nn.rnn_cell.DropoutWrapper([rnn.BasicLSTMCell(num_units=n) for n in N_units], output_keep_prob=0.5)]) rnn_cell = rnn.MultiRNNCell([rnn.GRUCell(num_units=n) for n in N_units]) #rnn_cell = rnn.MultiRNNCell([rnn.LayerNormBasicLSTMCell(num_units=n) for n in N_units]) # generate prediction outputs, states = rnn.static_rnn(rnn_cell, x, dtype=tf.float32) pred_LSTM = tf.add(tf.matmul(outputs[-1], weights['out']), biases['out'], name="pred_LSTM") return pred_LSTM
def _witch_cell(self): """ RNN 类型 :return: """ cell_tmp = None if self.cell_type == 'lstm': cell_tmp = rnn.BasicLSTMCell(self.hidden_unit) elif self.cell_type == 'gru': cell_tmp = rnn.GRUCell(self.hidden_unit) # 是否需要进行dropout if self.droupout_rate is not None: cell_tmp = rnn.DropoutWrapper(cell_tmp, output_keep_prob=self.droupout_rate) return cell_tmp
def create_gru_unit(self, hidden_size): """ create gru unit :param hidden_size: GRU output hidden_size :return: GRU cell """ with tf.name_scope('create_gru_cell'): gru_cell = rnn.GRUCell(hidden_size) gru_cell = rnn.DropoutWrapper( cell=gru_cell, input_keep_prob=1.0, output_keep_prob=self.gru_output_keep_prob) return gru_cell
def __init__(self, vocab_size): self._vocab_size = vocab_size self.g = tf.Graph() with self.g.as_default(): with tf.variable_scope(NAMESPACE): config = tf.ConfigProto(allow_soft_placement=True) self.sess = tf.Session(config=config) # Input variables self.sentence_vectors_fw = tf.placeholder(tf.float32, shape=(None, None, self._vocab_size), name='sentence_vectors_inp_fw') # The sentence is pre-processed by a bi-GRU self.Wq = tf.Variable(tf.random_uniform([self._vocab_size, self._context_dim], -0.1, 0.1)) self.internal_projection = lambda x: tf.nn.relu(tf.matmul(x, self.Wq)) self.sentence_int_fw = tf.map_fn(self.internal_projection, self.sentence_vectors_fw) self.rnn_cell_fw = rnn.MultiRNNCell( [rnn.GRUCell(self._memory_dim) for _ in range(self._stack_dimension)], state_is_tuple=True) with tf.variable_scope('fw'): output_fw, _ = tf.nn.dynamic_rnn(self.rnn_cell_fw, self.sentence_int_fw, time_major=True, dtype=tf.float32) self.sentence_vector = output_fw[-1] # Final feedforward layers self.Ws1 = tf.Variable(tf.random_uniform([self._memory_dim, self._context_dim], -0.1, 0.1), name='Ws1') self.bs1 = tf.Variable(tf.random_uniform([self._context_dim], -0.1, 0.1), name='bs1') self.hidden = tf.nn.relu(tf.matmul(self.sentence_vector, self.Ws1) + self.bs1) self.outputs = tf.nn.softmax(tf.matmul(self.hidden, tf.transpose(self.Wq, (1, 0)))) # Loss function and training self.y_ = tf.placeholder(tf.float32, shape=(None, self._vocab_size), name='y_') self.one = tf.ones_like(self.outputs) self.tiny = TINY * self.one self.cross_entropy = -tf.reduce_sum(self.y_ * tf.log(self.outputs + self.tiny)) # Clipping the gradient optimizer = tf.train.AdamOptimizer(1e-3) gvs = optimizer.compute_gradients(self.cross_entropy) capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs if var.name.find(NAMESPACE) != -1] self.train_step = optimizer.apply_gradients(capped_gvs) self.sess.run(tf.global_variables_initializer()) # Adding the summaries tf.summary.scalar('cross_entropy', self.cross_entropy) self.merged = tf.summary.merge_all() self.train_writer = tf.summary.FileWriter('./tf_train', self.sess.graph)
def create_cell(device): if rnn_type == "GRU": cell = rnn.GRUCell(rnn_size) elif rnn_type == "LSTM": if 'reuse' in inspect.signature(tf.contrib.rnn.BasicLSTMCell.__init__).parameters: cell = rnn.LayerNormBasicLSTMCell(rnn_size, forget_bias=0.0, reuse=tf.get_variable_scope().reuse) else: cell = rnn.LayerNormBasicLSTMCell(rnn_size, forget_bias=0.0) elif rnn_type == "RWA": cell = RWACell(rnn_size) elif rnn_type == "RAN": cell = RANCell(rnn_size, normalize=self.is_training) cell = SwitchableDropoutWrapper(rnn.DeviceWrapper(cell, device="/gpu:{}".format(device)), is_train=self.is_training) return cell
def _init_word_encoder(self): with tf.variable_scope('word') as scope: word_rnn_inputs = tf.reshape( self.embedded_inputs, [-1, self.max_num_words, self.emb_size]) sentence_lengths = tf.reshape(self.sentence_lengths, [-1]) # word encoder cell_fw = rnn.GRUCell(self.hidden_dim) cell_bw = rnn.GRUCell(self.hidden_dim) init_state_fw = tf.tile( tf.get_variable('init_state_fw', shape=[1, self.hidden_dim], initializer=tf.constant_initializer(1.0)), multiples=[get_shape(word_rnn_inputs)[0], 1]) init_state_bw = tf.tile( tf.get_variable('init_state_bw', shape=[1, self.hidden_dim], initializer=tf.constant_initializer(1.0)), multiples=[get_shape(word_rnn_inputs)[0], 1]) word_rnn_outputs, _ = bidirectional_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=word_rnn_inputs, input_lengths=sentence_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) self.word_outputs, self.word_att_weights = text_attention( inputs=word_rnn_outputs, att_dim=self.att_dim, sequence_lengths=sentence_lengths) self.word_outputs = tf.nn.dropout(self.word_outputs, keep_prob=self.dropout_keep_prob)
def build_var(self): with tf.variable_scope(self.name) as scope: with tf.variable_scope('embedding'): self.embedding = tf.compat.v1.get_variable( 'embedding', initializer=self.initializer2( [self.n_nodes, self.embedding_size]), dtype=tf.float32) with tf.variable_scope('BiGRU'): self.gru_fw_cell = rnn.GRUCell(2 * self.n_hidden_gru) with tf.variable_scope('SumPooling'): self.time_weight = tf.compat.v1.get_variable( 'time_weight', initializer=self.initializer([self.n_time_interval]), dtype=tf.float32) #self.time_weight = tf.mul(self.time_weight_temp,self.time_weight_temp) with tf.variable_scope('dense'): self.weights = { 'dense1': tf.compat.v1.get_variable('dense1_weight', initializer=self.initializer([ 2 * self.n_hidden_gru, self.n_hidden_dense1 ])), 'dense2': tf.compat.v1.get_variable('dense2_weight', initializer=self.initializer([ self.n_hidden_dense1, self.n_hidden_dense2 ])), 'out': tf.compat.v1.get_variable('out_weight', initializer=self.initializer( [self.n_hidden_dense2, 1])) } self.biases = { 'dense1': tf.compat.v1.get_variable('dense1_bias', initializer=self.initializer( [self.n_hidden_dense1])), 'dense2': tf.compat.v1.get_variable('dense2_bias', initializer=self.initializer( [self.n_hidden_dense2])), 'out': tf.compat.v1.get_variable('out_bias', initializer=self.initializer([1 ])) }
def Demo_Encoder(s_h, per, seq_lengths, scope='Demo_Encoder', reuse=False): with tf.variable_scope(scope, reuse=reuse) as scope: if not reuse: log.warning(scope.name) state_features = tf.reshape( State_Encoder(tf.reshape(s_h, [-1, self.h, self.w, depth]), tf.reshape(per, [-1, self.per_dim]), self.batch_size * max_demo_len, reuse=reuse), [self.batch_size, max_demo_len, -1]) if self.encoder_rnn_type == 'bilstm': fcell = rnn.BasicLSTMCell( num_units=math.ceil(self.num_lstm_cell_units), state_is_tuple=True) bcell = rnn.BasicLSTMCell( num_units=math.floor(self.num_lstm_cell_units), state_is_tuple=True) new_h, cell_state = tf.nn.bidirectional_dynamic_rnn( fcell, bcell, state_features, sequence_length=seq_lengths, dtype=tf.float32) new_h = tf.reduce_sum(tf.stack(new_h, axis=2), axis=2) cell_state = rnn.LSTMStateTuple( tf.reduce_sum(tf.stack( [cs.c for cs in cell_state], axis=1), axis=1), tf.reduce_sum(tf.stack( [cs.h for cs in cell_state], axis=1), axis=1)) elif self.encoder_rnn_type == 'lstm': cell = rnn.BasicLSTMCell( num_units=self.num_lstm_cell_units, state_is_tuple=True) new_h, cell_state = tf.nn.dynamic_rnn( cell=cell, dtype=tf.float32, sequence_length=seq_lengths, inputs=state_features) elif self.encoder_rnn_type == 'rnn': cell = rnn.BasicRNNCell(num_units=self.num_lstm_cell_units) new_h, cell_state = tf.nn.dynamic_rnn( cell=cell, dtype=tf.float32, sequence_length=seq_lengths, inputs=state_features) elif self.encoder_rnn_type == 'gru': cell = rnn.GRUCell(num_units=self.num_lstm_cell_units) new_h, cell_state = tf.nn.dynamic_rnn( cell=cell, dtype=tf.float32, sequence_length=seq_lengths, inputs=state_features) else: raise ValueError('Unknown encoder rnn type') if self.concat_state_feature_direct_prediction: all_states = tf.concat([new_h, state_features], axis=-1) else: all_states = new_h return all_states, cell_state.h, cell_state.c
def _init_sent_encoder(self): with tf.variable_scope('sent-encoder') as scope: sent_inputs = tf.reshape( self.word_outputs, [-1, self.max_sent_length, 2 * self.cell_dim]) # sentence encoder cell_fw = rnn.GRUCell(self.cell_dim, name='cell_fw') cell_bw = rnn.GRUCell(self.cell_dim, name='cell_bw') init_state_fw = tf.tile(tf.get_variable( 'init_state_fw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(sent_inputs)[0], 1]) init_state_bw = tf.tile(tf.get_variable( 'init_state_bw', shape=[1, self.cell_dim], initializer=tf.constant_initializer(0)), multiples=[get_shape(sent_inputs)[0], 1]) rnn_outputs, _ = bidirectional_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=sent_inputs, input_lengths=self.sent_lengths, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, scope=scope) sent_outputs, sent_att_weights = attention( inputs=rnn_outputs, att_dim=self.att_dim, sequence_lengths=self.sent_lengths) self.sent_outputs = tf.layers.dropout(sent_outputs, self.dropout_rate, training=self.is_training)
def input_module(self): """encode raw texts into vector representation""" story_embedding = tf.nn.embedding_lookup(self.Embedding, self.story) story_embedding = tf.reshape( story_embedding, ( self.batch_size, self.story_length, self.sequence_length * self.embed_size, ), ) cell = rnn.GRUCell(self.hidden_size) self.story_embedding, hidden_state = tf.nn.dynamic_rnn( cell, story_embedding, dtype = tf.float32, scope = 'input_module' )