def build_cell(num_units, num_layers, is_train, cell_type, dropout=0.0, forget_bias=0.0, use_residual=False, dim_project=None): with tf.name_scope(cell_type): list_cell = [ single_cell(num_units=num_units, is_train=is_train, cell_type=cell_type, dropout=dropout, forget_bias=forget_bias, dim_project=dim_project) for _ in range(num_layers) ] # Residual if use_residual: for c in range(1, len(list_cell)): list_cell[c] = ResidualWrapper(list_cell[c]) return MultiRNNCell(list_cell) if num_layers > 1 else list_cell[0]
def __init__(self, rnn_layers, seq_length, dynamic=False, bidirectional=False): """ Parameters ---------- rnn_layers : list List of RNN layers to stack. seq_length : int Max length of the input sequences. dynamic : boolean Influences whether the layer will be working as dynamic RNN or static. The difference between static and dynamic is that in case of static TensorFlow builds static graph and the RNN will always go through each time step in the sequence. In case of dynamic TensorFlow will be creating RNN `in a while loop`, that is to say that using dynamic RNN you can pass sequences of variable length, but you have to provide list of sequences' lengthes. Currently API for using dynamic RNNs is not provided. bidirectional : boolean Influences whether the layer will be bidirectional. """ self.rnn_layers = rnn_layers self.rnn_cells = [] for layer in rnn_layers: self.rnn_cells.append(layer.cells) self.seq_length = seq_length self.dynamic = dynamic self.bidirectional = bidirectional self.stacked_cells = MultiRNNCell(cells=self.rnn_cells) self.cell_type = CellType.get_cell_type(bidirectional, dynamic) self.params = [] self.named_params_dict = {} for layer in rnn_layers: self.params += layer.get_params() self.named_params_dict.update(layer.get_params_dict())
def decoder(self, decoder_inputs, enc_output, enc_states, target_sequence_length): """Memory is a tuple containing the forward and backward final states (output_states_fw,output_states_bw)""" with tf.variable_scope("decoder"): basic_cell = [] for i in xrange(len(self.hidden_layer_size)): if self.hidden_layer_type[i] == "tanh": basic_cell.append( tf.contrib.rnn.BasicRNNCell( num_units=self.encoder_layer_size[i])) if self.hidden_layer_type[i] == "lstm": basic_cell.append( tf.contrib.rnn.BasicLSTMCell( num_units=self.encoder_layer_size[i])) if self.hidden_layer_type[i] == "gru": basic_cell.append( GRUCell(num_units=self.encoder_layer_size[i])) multicell = MultiRNNCell(basic_cell) if not self.attention: dec_output,_=tf.nn.bidirectional_dynamic_rnn(cell_fw=multicell,cell_bw=multicell,inputs=decoder_inputs,initial_state_fw=enc_states[0],\ sequence_length=target_sequence_length,initial_state_bw=enc_states[1]) else: attention_size = decoder_inputs.get_shape().as_list()[-1] attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( attention_size, enc_output, target_sequence_length, normalize=True, probability_fn=tf.nn.softmax) cell_with_attention = tf.contrib.seq2seq.AttentionWrapper( multicell, attention_mechanism, attention_size) dec_output, _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_with_attention, cell_bw=cell_with_attention, inputs=decoder_inputs, dtype=tf.float32) return dec_output
class CRPolicy(tf.keras.Model): def __init__(self, n_actions): super(CRPolicy, self).__init__() cells = [GRUCell(128, kernel_initializer=orthogonal(np.sqrt(2))) for _ in range(2)] self.gru = MultiRNNCell(cells) self.s0 = self.gru.zero_state(batch_size=1, dtype=tf.float32) self.cv1 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.mp1 = MaxPool2D() self.cv2 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.mp2 = MaxPool2D() self.cv3 = Conv2D(32, 3, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.mp3 = MaxPool2D() self.flatten = Flatten() self.fc1 = Dense(128, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.fc2 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.fc3 = Dense(100, activation='relu', kernel_initializer=orthogonal(np.sqrt(2))) self.pol = Dense(n_actions, kernel_initializer=orthogonal(0.01)) self.val = Dense(1, kernel_initializer=orthogonal(1)) def call(self, obs, state): x = tf.constant(obs, dtype=tf.float32) x = self.cv1(x) x = self.mp1(x) x = self.cv2(x) x = self.mp2(x) x = self.cv3(x) x = self.mp3(x) x = self.flatten(x) x = self.fc1(x) x = tf.expand_dims(x, axis=0) x, state = dynamic_rnn(self.gru, x, initial_state=state) x = tf.reshape(x, shape=[-1, 128]) pi = self.fc2(x) v = self.fc3(x) return self.pol(pi), self.val(v), state
def __init__(self, state_size, num_layers, dropout_prob, base_cell): """Define the cell by composing/wrapping with tf.contrib.rnn functions. Args: state_size: number of units in the cell. num_layers: how many cells to include in the MultiRNNCell. dropout_prob: probability of a node being dropped. base_cell: (str) name of underling cell to use (e.g. 'GRUCell') """ self._state_size = state_size self._num_layers = num_layers self._dropout_prob = dropout_prob self._base_cell = base_cell def single_cell(): """Convert cell name (str) to class, and create it.""" return getattr(tf.contrib.rnn, base_cell)(num_units=state_size) if num_layers == 1: self._cell = single_cell() else: self._cell = MultiRNNCell( [single_cell() for _ in range(num_layers)])
def build_rnn_layers(cell_type, num_units_per_layer, use_dropout, dropout_probability, mode, dtype, residual_connections=False, highway_connections=False, as_list=False): cell_list = [] for layer, units in enumerate(num_units_per_layer): cell = _build_single_cell( cell_type=cell_type, num_units=units, use_dropout=use_dropout, dropout_probability=dropout_probability, mode=mode, dtype=dtype, ) if highway_connections is True and layer > 0: cell = HighwayWrapper(cell) elif residual_connections is True and layer > 0: cell = ResidualWrapper(cell) cell_list.append(cell) if len(cell_list) == 1: return cell_list[0] else: if as_list is False: return MultiRNNCell(cell_list) else: return cell_list
def _net(self): # RNN and dense layers rnn_layer = MultiRNNCell( [GRUCell(self.hidden_size) for _ in range(self.n_layer)]) output_rnn, rnn_state = tf.nn.dynamic_rnn(rnn_layer, self.x_mixed, dtype=tf.float32) input_size = shape(self.x_mixed)[2] y_hat_src1 = tf.layers.dense(inputs=output_rnn, units=input_size, activation=tf.nn.relu, name='y_hat_src1') y_hat_src2 = tf.layers.dense(inputs=output_rnn, units=input_size, activation=tf.nn.relu, name='y_hat_src2') # time-freq masking layer y_tilde_src1 = y_hat_src1 / (y_hat_src1 + y_hat_src2 + np.finfo(float).eps) * self.x_mixed y_tilde_src2 = y_hat_src2 / (y_hat_src1 + y_hat_src2 + np.finfo(float).eps) * self.x_mixed return y_tilde_src1, y_tilde_src2
def training_decode(enc_outputs, seq_len, helper, out_dim): dec_prenet_outputs = DecoderPrenetWrapper(GRUCell(hp.embed_size), is_training=True, prenet_sizes=hp.embed_size, dropout_prob=hp.dropout) attention_mechanism = BahdanauAttention(hp.embed_size, enc_outputs, normalize=True, memory_sequence_length=seq_len, probability_fn=tf.nn.softmax) attn_cell = AttentionWrapper(dec_prenet_outputs, attention_mechanism, alignment_history=True, output_attention=False) concat_cell = ConcatOutputAndAttentionWrapper(attn_cell) decoder_cell = MultiRNNCell([ OutputProjectionWrapper(concat_cell, hp.embed_size), ResidualWrapper(GRUCell(hp.embed_size)), ResidualWrapper(GRUCell(hp.embed_size)) ], state_is_tuple=True) output_cell = OutputProjectionWrapper(decoder_cell, out_dim) initial_state = output_cell.zero_state(batch_size=tf.shape(enc_outputs)[0], dtype=tf.float32) decoder = BasicDecoder(cell=output_cell, helper=helper, initial_state=initial_state) (outputs, _), last_state, _ = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, maximum_iterations=hp.max_len) # for attention plot alignments = tf.transpose(last_state[0].alignment_history.stack(), [1, 2, 0]) return outputs, alignments
def decoder_rnn(decoder_embedded_inputs, decoder_embedding_matrix, encoder_state, num_words, sequence_length, rnn_size, num_of_layers, sos_id, eos_id, keep_prob, batch_size): # perform on decoding scope with tf.variable_scope("decoding") as decoding_scope: _lstm = BasicLSTMCell(rnn_size) lstm = DropoutWrapper(_lstm, input_keep_prob=keep_prob) cell = MultiRNNCell([lstm] * num_of_layers) # Initialize weights and biases weights = tf.truncated_normal_initializer(stddev=0.1) biases = tf.zeros_initializer() # define output function output_function = lambda x: tf.contrib.layers.fully_connected( x, num_words, None, scope=decoding_scope, weights_initializer=weights, biases_initializer=biases) training_prediction = decode_training_set( encoder_state, cell, decoder_embedded_inputs, sequence_length, decoding_scope, output_function, keep_prob, batch_size) decoding_scope.reuse_variables() test_predictions = decode_test_set(encoder_state, cell, decoder_embedding_matrix, sos_id, eos_id, sequence_length - 1, num_words, decoding_scope, output_function, keep_prob, batch_size) return training_prediction, test_predictions
def _dynamic_birnn(self, x, seq_len, batch_size, max_seq_len): cell_fw = MultiRNNCell([GRUCell(cell_hidden) for cell_hidden in self.cell_hidden]) cell_bw = MultiRNNCell([GRUCell(cell_hidden) for cell_hidden in self.cell_hidden]) init_state_fw = cell_fw.zero_state(batch_size, dtype=tf.float32) init_state_bw = cell_bw.zero_state(batch_size, dtype=tf.float32) outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=x, initial_state_fw=init_state_fw, initial_state_bw=init_state_bw, sequence_length=seq_len ) # outputs = tf.concat(outputs, 2) # # if not self.avg_output: # index = tf.range(0, batch_size) * max_seq_len + (seq_len - 1) # outputs = tf.reshape(outputs, [-1, self.cell_hidden[-1] * 2]) # outputs = tf.gather(outputs, index) # else: # outputs = tf.reduce_sum(outputs, axis=1) # outputs = tf.divide(outputs, tf.cast(seq_len[:, None], tf.float32)) outputs = (outputs[0] + outputs[1]) / 2 if not self.avg_output: index = tf.range(0, batch_size) * max_seq_len + (seq_len - 1) outputs = tf.reshape(outputs, [-1, self.cell_hidden[-1]]) outputs = tf.gather(outputs, index) else: outputs = tf.reduce_sum(outputs, axis=1) outputs = tf.divide(outputs, tf.cast(seq_len[:, None], tf.float32)) fc = tf.layers.dense(outputs, 1000) fc = tf.nn.leaky_relu(fc, 0.2) fc = tf.layers.dense(fc, self.n_class) return fc
def build_decoder(self, encoder_output, encoder_state, triple_input, decoder_input, train_mode=True): if self.cell_class == 'GRU': decoder_cell = MultiRNNCell( [GRUCell(self.num_units) for _ in range(self.num_layers)]) elif self.cell_class == 'LSTM': decoder_cell = MultiRNNCell( [LSTMCell(self.num_units) for _ in range(self.num_layers)]) else: decoder_cell = MultiRNNCell( [RNNCell(self.num_units) for _ in range(self.num_layers)]) if train_mode: with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE) as scope: if self.use_trans_select: kd_context = self.transfer_matching( encoder_output, triple_input) else: kd_context = None # prepare attention attention_keys, attention_values, attention_construct_fn \ = prepare_attention(encoder_output, kd_context, 'bahdanau', self.num_units) decoder_fn_train = attention_decoder_train( encoder_state=encoder_state, attention_keys=attention_keys, attention_values=attention_values, attention_construct_fn=attention_construct_fn) # train decoder decoder_output, _, _ = dynamic_rnn_decoder( cell=decoder_cell, decoder_fn=decoder_fn_train, inputs=decoder_input, sequence_length=self.responses_length, scope=scope) output_fn = create_output_fn(vocab_size=self.vocab_size) output_logits = output_fn(decoder_output) return output_logits else: with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE) as scope: if self.use_trans_select: kd_context = self.transfer_matching( encoder_output, triple_input) else: kd_context = None attention_keys, attention_values, attention_construct_fn \ = prepare_attention(encoder_output, kd_context, 'bahdanau', self.num_units, reuse=tf.AUTO_REUSE) output_fn = create_output_fn(vocab_size=self.vocab_size) # inference decoder decoder_fn_inference = attention_decoder_inference( num_units=self.num_units, num_decoder_symbols=self.vocab_size, output_fn=output_fn, encoder_state=encoder_state, attention_keys=attention_keys, attention_values=attention_values, attention_construct_fn=attention_construct_fn, embeddings=self.word_embed, start_of_sequence_id=GO_ID, end_of_sequence_id=EOS_ID, maximum_length=self.max_length) # get decoder output decoder_distribution, _, _ = dynamic_rnn_decoder( cell=decoder_cell, decoder_fn=decoder_fn_inference, scope=scope) return decoder_distribution
def build_encoder(self, post_word_input, corr_responses_input): if self.cell_class == 'GRU': encoder_cell = MultiRNNCell( [GRUCell(self.num_units) for _ in range(self.num_layers)]) elif self.cell_class == 'LSTM': encoder_cell = MultiRNNCell( [LSTMCell(self.num_units) for _ in range(self.num_layers)]) else: encoder_cell = MultiRNNCell( [RNNCell(self.num_units) for _ in range(self.num_layers)]) with tf.variable_scope('encoder', reuse=tf.AUTO_REUSE) as scope: encoder_output, encoder_state = tf.nn.dynamic_rnn( encoder_cell, post_word_input, self.posts_length, dtype=tf.float32, scope=scope) batch_size, encoder_len = tf.shape(self.posts)[0], tf.shape( self.posts)[1] corr_response_input = tf.reshape(corr_responses_input, [batch_size, -1, self.dim_emb]) corr_cum_len = tf.shape(corr_response_input)[1] with tf.variable_scope('mutual_attention', reuse=tf.AUTO_REUSE): encoder_out_trans = tf.layers.dense(encoder_output, self.num_units, name='encoder_out_transform') corr_response_trans = tf.layers.dense( corr_response_input, self.num_units, name='corr_response_transform') encoder_out_trans = tf.expand_dims(encoder_out_trans, axis=1) encoder_out_trans = tf.tile(encoder_out_trans, [1, corr_cum_len, 1, 1]) encoder_out_trans = tf.reshape(encoder_out_trans, [-1, encoder_len, self.num_units]) corr_response_trans = tf.reshape(corr_response_trans, [-1, self.num_units]) corr_response_trans = tf.expand_dims(corr_response_trans, axis=1) # TODO: try bilinear attention v = tf.get_variable("attention_v", [self.num_units], dtype=tf.float32) score = tf.reduce_sum( v * tf.tanh(encoder_out_trans + corr_response_trans), axis=2) alignments = tf.nn.softmax(score) encoder_out_tiled = tf.expand_dims(encoder_output, axis=1) encoder_out_tiled = tf.tile(encoder_out_tiled, [1, corr_cum_len, 1, 1]) encoder_out_tiled = tf.reshape(encoder_out_tiled, [-1, encoder_len, self.num_units]) context_mutual = tf.reduce_sum(tf.expand_dims(alignments, 2) * encoder_out_tiled, axis=1) context_mutual = tf.reshape(context_mutual, [batch_size, -1, self.num_units]) context_mutual = tf.reduce_mean(context_mutual, axis=1) encoder_output = tf.concat( [encoder_output, tf.expand_dims(context_mutual, 1)], axis=1) if self.use_trans_repr: trans_output = tf.layers.dense(self.trans_reprs, self.num_units, name='trans_reprs_transform', reuse=tf.AUTO_REUSE) encoder_output = tf.concat([encoder_output, trans_output], axis=1) return encoder_output, encoder_state
c.act_factor = tf.placeholder(tf.float32, shape=(), name="act_factor") c.adapt = tf.placeholder(tf.float32, shape=(), name="adapt") c.tau_m = tf.placeholder(tf.float32, shape=(), name="tau_m") input = tf.placeholder(tf.float32, shape=(seq_size, batch_size, input_size), name="Input") sequence_length = tf.placeholder(shape=(batch_size, ), dtype=tf.int32) Finput = tf.placeholder(tf.float32, shape=(filter_len * input_size, layer_size), name="Finput") net = MultiRNNCell([ LCACell(input_size, layer_size, filter_len, c, tf.nn.relu, Finput=Finput), ]) state = tuple( tuple(( tf.placeholder(tf.float32, [batch_size, cell.layer_size], name="u"), tf.placeholder(tf.float32, [batch_size, cell.layer_size], name="a"), tf.placeholder(tf.float32, [batch_size, cell.layer_size], name="a_m"), tf.placeholder( tf.float32, [batch_size, cell.filter_len * cell.input_size, cell.layer_size], name="dF"), )) for cell in net._cells) get_zero_state = lambda: tuple( np.zeros((batch_size, ) + tuple(t.get_shape().as_list()[1:]))
def create_model(self): """ 当前Model rnn q & p p2q atten1 :<p_emb_bi|q_emb_bi> self atten2 : W*d_emb_bi, W*q_emb_bi new_d_emb_bi : softmax(atten1 + atten2) * d_emb_bi rnn(new_d_emb_bi) :return: """ num_layers = self.args.num_layers hidden_size = self.args.hidden_size char_hidden_size = self.args.char_hidden_size char_embedding_dim = self.args.char_embedding_dim cell = LSTMCell if self.args.use_lstm else GRUCell q_input = tf.placeholder(dtype=tf.int32, shape=[None, self.q_len], name='questions_bt') d_input = tf.placeholder(dtype=tf.int32, shape=[None, self.d_len], name='documents_bt') answer_s = tf.placeholder(dtype=tf.float32, shape=[None, None], name='answer_start') answer_e = tf.placeholder(dtype=tf.float32, shape=[None, None], name='answer_end') q_input_char = tf.placeholder( dtype=tf.int32, shape=[None, self.q_len, self.q_char_len], name='questions_bt_char') d_input_char = tf.placeholder( dtype=tf.int32, shape=[None, self.d_len, self.d_char_len], name='documents_bt_char') init_embed = tf.constant(self.embedding_matrix, dtype=tf.float32) embedding_matrix = tf.get_variable(name='embdding_matrix', initializer=init_embed, dtype=tf.float32) q_real_len = tf.reduce_sum(tf.sign(tf.abs(q_input)), axis=1) d_real_len = tf.reduce_sum(tf.sign(tf.abs(d_input)), axis=1) # d_mask = tf.sequence_mask(dtype = tf.float32, maxlen = self.d_len, lengths = d_real_len) # q_mask = tf.sequence_mask(dtype = tf.float32, maxlen = self.q_len, lengths = d_real_len) _EPSILON = 10e-8 self.d_real_len = d_real_len batch_size = tf.shape(q_input)[0] if self.args.use_char_embedding: char_embedding = tf.get_variable(name='can_embdding_matrix', initializer=tf.constant( self.char_embedding_matrix, dtype=tf.float32), dtype=tf.float32, trainable=True) q_char_embed = tf.nn.embedding_lookup(char_embedding, q_input_char) d_char_embed = tf.nn.embedding_lookup(char_embedding, d_input_char) q_char_embed = tf.nn.dropout(q_char_embed, keep_prob=self.args.keep_prob) d_char_embed = tf.nn.dropout(d_char_embed, keep_prob=self.args.keep_prob) # with tf.variable_scope('char_embedding', reuse = tf.AUTO_REUSE) as scp: # # q_char_embed = tf.reshape(q_char_embed, [-1, self.q_len, self.d_char_len * char_embedding_dim]) # d_char_embed = tf.reshape(d_char_embed, [-1, self.d_len, self.q_char_len * char_embedding_dim]) # # char_rnn_f = MultiRNNCell( # cells = [DropoutWrapper(cell(char_hidden_size), output_keep_prob = self.args.keep_prob)]) # char_rnn_b = MultiRNNCell( # cells = [DropoutWrapper(cell(char_hidden_size), output_keep_prob = self.args.keep_prob)]) # # d_char_embed_out, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = char_rnn_f, cell_bw = char_rnn_b, inputs = d_char_embed, # sequence_length = d_real_len, initial_state_bw = None, # dtype = "float32", parallel_iterations = None, # swap_memory = True, time_major = False, scope = 'char_rnn') # q_char_embed_out, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = char_rnn_f, cell_bw = char_rnn_b, inputs = q_char_embed, # sequence_length = q_real_len, initial_state_bw = None, # dtype = "float32", parallel_iterations = None, # swap_memory = True, time_major = False, scope = 'char_rnn') with tf.variable_scope('char_conv', reuse=tf.AUTO_REUSE) as scp: q_char_embed = tf.transpose( q_char_embed, perm=[0, 2, 3, 1]) # [batch, height, width, channels] filter = tf.get_variable( 'q_filter_w', shape=[5, 5, self.q_len, self.q_len] ) # [filter_height, filter_width, in_channels, out_channels] cnned_char = tf.nn.conv2d( q_char_embed, filter, strides=[1, 1, 1, 1], padding='VALID', use_cudnn_on_gpu=True, data_format="NHWC", name=None ) # [B, (char_len-filter_size/stride), (word_len-filter_size/stride), d_len] q_char_embed_out = tf.nn.max_pool(cnned_char, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding='VALID', data_format="NHWC", name=None) char_out_size = q_char_embed_out.get_shape().as_list( )[1] * q_char_embed_out.get_shape().as_list()[2] q_char_embed_out = tf.reshape( tf.transpose(q_char_embed_out, perm=[0, 3, 1, 2]), shape=[batch_size, self.q_len, char_out_size]) d_char_embed = tf.transpose( d_char_embed, perm=[0, 2, 3, 1]) # [batch, height, width, channels] filter = tf.get_variable( 'd_filter_w', shape=[5, 5, self.d_len, self.d_len] ) # [filter_height, filter_width, in_channels, out_channels] cnned_char = tf.nn.conv2d( d_char_embed, filter, strides=[1, 1, 1, 1], padding='VALID', use_cudnn_on_gpu=True, data_format="NHWC", name=None ) # [B, (char_len-filter_size/stride), (word_len-filter_size/stride), d_len] d_char_embed_out = tf.nn.max_pool(cnned_char, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding='VALID', data_format="NHWC", name=None) char_out_size = d_char_embed_out.get_shape().as_list( )[1] * d_char_embed_out.get_shape().as_list()[2] d_char_embed_out = tf.reshape( tf.transpose(d_char_embed_out, perm=[0, 3, 1, 2]), shape=[batch_size, self.d_len, char_out_size]) d_char_embed_out = tf.reshape( d_char_embed_out, shape=[batch_size, self.d_len, char_out_size]) d_char_out = tf.concat(d_char_embed_out, -1) q_char_out = tf.concat(q_char_embed_out, -1) with tf.variable_scope('q_encoder') as scp: q_embed = tf.nn.embedding_lookup(embedding_matrix, q_input) if self.args.use_char_embedding: q_embed = tf.concat([q_embed, q_char_out], -1) q_rnn_f = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) for _ in range(num_layers) ]) q_rnn_b = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) for _ in range(num_layers) ]) outputs, q_last_states = tf.nn.bidirectional_dynamic_rnn( cell_fw=q_rnn_f, cell_bw=q_rnn_b, inputs=q_embed, sequence_length=q_real_len, initial_state_bw=None, dtype="float32", parallel_iterations=None, swap_memory=True, time_major=False, scope=None) # last_states -> (output_state_fw, output_state_bw) # q_emb_bi = tf.concat([q_last_states[0][-1], q_last_states[1][-1]], axis = -1) q_emb_bi = tf.concat(outputs, axis=-1) if self.args.use_lstm: q_last_states_con = tf.concat( [q_last_states[0][-1][-1], q_last_states[1][-1][-1]], axis=-1) else: q_last_states_con = tf.concat(q_last_states, -1) logger("q_encoded_bf shape {}".format(q_emb_bi.get_shape())) with tf.variable_scope('d_encoder'): d_embed = tf.nn.embedding_lookup(embedding_matrix, d_input) if self.args.use_char_embedding: d_embed = tf.concat([d_embed, d_char_out], -1) d_rnn_f = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) for _ in range(num_layers) ]) d_rnn_b = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) for _ in range(num_layers) ]) d_rnn_out, last_states = tf.nn.bidirectional_dynamic_rnn( cell_bw=d_rnn_b, cell_fw=d_rnn_f, inputs=d_embed, sequence_length=d_real_len, swap_memory=True, dtype="float32", ) d_emb_bi = tf.concat(d_rnn_out, axis=-1) self.d_emb_bi = d_emb_bi logger("d_encoded_bf shape {}".format(d_emb_bi.get_shape())) with tf.variable_scope('attention_dq'): atten_q2d, atten_d2q = context_query_attention( context=d_emb_bi, query=q_emb_bi, scope='context_query_att', reuse=None) attened_d = tf.concat([ tf.add(d_emb_bi, atten_d2q), tf.add(d_emb_bi, atten_q2d), d_emb_bi ], axis=-1) # computing c dot b # atten_d_q = tf.einsum('bij,bjk->bik', d_emb_bi, tf.transpose(q_emb_bi, perm = [0, 2, 1])) # atten_d = tf.reduce_sum(atten_d_q, axis = -1) # attened_d_masked = atten_d / tf.expand_dims(tf.reduce_sum(atten_d, -1), -1) * d_mask # there should be [None, seq_len, hidden_size] # attened_d = tf.multiply(d_emb_bi, tf.expand_dims(attened_d_masked, # -1)) # self.sess.run([self.atten_d, self.attened_d, self.result_s[-1], self.result_e[-1]], data) self.attened_d = attened_d q_emb_rl = q_last_states_con memory = tf.concat( [q_last_states[0][-1][-1], q_last_states[1][-1][-1]], axis=-1) memory_cell = cell(hidden_size * 4) m_state = memory_cell.zero_state(batch_size=tf.shape(d_emb_bi)[0], dtype=tf.float32) candi_embed = d_embed result_ss = tf.zeros( shape=[tf.shape(d_emb_bi)[0], tf.shape(d_emb_bi)[1]]) result_ee = tf.zeros( shape=[tf.shape(d_emb_bi)[0], tf.shape(d_emb_bi)[1]]) activ = 'tanh' def inference(hidden_d, memory, m_state, result_s, result_e): # position = tf.stack([tf.range(0, tf.shape(d_real_len)[0], dtype = tf.int32), # tf.mod(i, d_real_len)], axis = 1) # F**k, x.get_shape()[0] is not equal tf.shape(x)[0], f**k!!! # hidden_d = tf.reshape(tf.gather_nd(attened_d, position), shape = [-1, d_emb_bi.get_shape()[-1]]) x_context, m_state = memory_cell( tf.concat([memory, hidden_d], axis=-1), state=m_state) # just use for gru cell, x = m_state # tensorflow.python.framework.errors_impl.InternalError: Dst tensor is not initialized. # Cause by GPU memory full # update memory: use the question and the context to update with tf.variable_scope('reinforce', reuse=tf.AUTO_REUSE) as scp: context_and_q = tf.concat([x_context, hidden_d, q_emb_rl], axis=-1) rl_w = tf.get_variable(name='w', shape=[ context_and_q.get_shape()[-1], context_and_q.get_shape()[-1] ]) if activ == 'tanh': rl_mul_context_q = tf.tanh(tf.matmul(context_and_q, rl_w)) else: rl_mul_context_q = tf.nn.relu( tf.matmul(context_and_q, rl_w)) out = tf.nn.tanh( rl_mul_context_q ) # b * 1, Note: should use the bias here, while select_prob == 0 !!!!! memory_update_w = tf.get_variable( "memory_update_w", shape=[ context_and_q.get_shape()[-1], memory.get_shape()[-1] ]) memory = tf.multiply( tf.nn.tanh(tf.matmul(out, memory_update_w)), memory) # inference : use the new memory to inference the answer with tf.variable_scope('inference', reuse=tf.AUTO_REUSE) as scp: context = tf.nn.dropout(tf.concat([memory, hidden_d, q_emb_rl], -1), keep_prob=self.args.keep_prob) infer_bilinear_start = tf.get_variable( 'infer_bilinear_start', shape=[ context.get_shape()[-1], candi_embed.get_shape()[-1] ]) pre_anw = tf.squeeze(tf.einsum( 'bij,bjk->bik', candi_embed, tf.expand_dims(tf.matmul(context, infer_bilinear_start), -1)), axis=-1) # pre_anw = tf.reduce_sum( # tf.multiply(tf.transpose(candi_embed, [1, 0, 2]), tf.nn.relu(tf.matmul(context, infer_bilinear_start))), axis = -1) pre_anw_pro_s = pre_anw infer_bilinear_end = tf.get_variable( 'infer_bilinear_end', shape=[ context.get_shape()[-1], candi_embed.get_shape()[-1] ]) pre_anw = tf.squeeze(tf.einsum( 'bij,bjk->bik', candi_embed, tf.expand_dims(tf.matmul(context, infer_bilinear_end), -1)), axis=-1) # pre_anw = tf.reduce_sum( # tf.multiply(tf.transpose(candi_embed, [1, 0, 2]), tf.nn.relu(tf.matmul(context, infer_bilinear_end))), axis = -1) pre_anw_pro_e = pre_anw # pre_anw_pro = self.softmax_with_mask(tf.transpose(pre_anw), mask = d_mask, axis = -1) # with tf.variable_scope('interence_end', reuse = tf.AUTO_REUSE): logger("pre_anw_pro_s shape {}".format(pre_anw_pro_s.get_shape())) return [memory, m_state, pre_anw_pro_s, pre_anw_pro_e] _, self.m_state, result_s, result_e = tf.scan( fn=lambda pre, x: inference(tf.squeeze(x, 0), *pre), elems=[tf.transpose(attened_d, perm=[1, 0, 2])], initializer=[memory, m_state, result_ss, result_ee], name='scan', swap_memory=True) self.result_s = result_s[-1] self.result_e = result_e[-1] self.answer_s = answer_s self.answer_e = answer_e losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.result_s, labels=tf.argmax(answer_s, -1)) losses += tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.result_e, labels=tf.argmax(answer_e, -1)) self.loss = tf.reduce_mean(losses) # 如果使用log,那mask必须为1 # self.loss = -tf.reduce_mean( tf.reduce_sum(tf.multiply(tf.log(result_prob_s), answer_s) + tf.multiply(tf.log(result_prob_e), answer_e))) # self.add_loss(answer_s, answer_e) self.correct_prediction = tf.reduce_sum( tf.sign( tf.cast(tf.logical_and( tf.equal( tf.argmax(self.answer_s, 1, output_type=tf.int32), tf.argmax(self.result_s, -1, output_type=tf.int32)), tf.equal( tf.argmax(self.answer_e, 1, output_type=tf.int32), tf.argmax(self.result_e, -1, output_type=tf.int32))), dtype='float'))) self.begin_acc = tf.reduce_sum( tf.sign( tf.cast(tf.equal( tf.argmax(self.answer_s, 1, output_type=tf.int32), tf.argmax(self.result_s, -1, output_type=tf.int32)), dtype='float'))) self.end_acc = tf.reduce_sum( tf.sign( tf.cast(tf.equal( tf.argmax(self.answer_e, 1, output_type=tf.int32), tf.argmax(self.result_e, -1, output_type=tf.int32)), dtype='float')))
def __init__(self, is_training, config, input_): self._is_training = is_training self._input = input_ self.batch_size = input_.batch_size self.num_steps = input_.num_steps self._input_data = input_.input_data size = config.X_dim hidden_size = config.hidden_size vocab_size = config.vocab_size self._targets = input_.targets # Construct prior prior = Prior(config.prior_pi, config.log_sigma1, config.log_sigma2) # Fetch embeddings inputs = input_.input_data # Build the BBB LSTM cells cells = [] for i in range(config.num_layers): if (i == 0): LSTM_input_size = config.X_dim else: LSTM_input_size = config.hidden_size cells.append( BayesianLSTMCell(LSTM_input_size, config.hidden_size, prior, is_training, forget_bias=0.0, name="bbb_lstm_{}".format(i))) cell = MultiRNNCell(cells, state_is_tuple=True) self._initial_state = cell.zero_state(config.batch_size, data_type()) state = self._initial_state # Forward pass for the truncated mini-batch outputs = [] with tf.variable_scope("RNN"): for time_step in range(self.num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_size]) # Softmax weights softmax_w = sample_posterior((hidden_size, vocab_size), "softmax_w", prior, is_training) softmax_b = sample_posterior((vocab_size, 1), "softmax_b", prior, is_training) logits = tf.nn.xw_plus_b(output, softmax_w, tf.squeeze(softmax_b)) logits = tf.reshape(logits, [self.batch_size, self.num_steps, vocab_size]) self._output = tf.nn.softmax(logits) loss = tf.contrib.seq2seq.sequence_loss( logits, input_.targets, tf.ones([self.batch_size, self.num_steps], dtype=data_type()), average_across_timesteps=False, average_across_batch=False) # Update the cost # Remember to divide by batch size self._cost = tf.reduce_sum(loss) / self.batch_size self._kl_loss = 0. self._final_state = state if not is_training: return #Compute KL divergence #B = number of batches aka the epoch size #C = number of truncated sequences in a batch aka batch_size variable B = self._input.epoch_size C = self.batch_size kl_loss = tf.add_n(tf.get_collection("KL_layers"), "kl_divergence") kl_factor = 1.0 / (B * C) self._kl_loss = kl_factor * kl_loss self._total_loss = self._cost + self._kl_loss self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm( tf.gradients(self._total_loss, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self._lr) self._train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step()) self._new_lr = tf.placeholder(data_type(), shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr)
def bidirectional_GRU(inputs, inputs_len, cell=None, cell_fn=tf.contrib.rnn.GRUCell, units=Params.attn_size, layers=1, scope="Bidirectional_GRU", output=0, is_training=True, reuse=None): ''' Bidirectional recurrent neural network with GRU cells. Args: inputs: rnn input of shape (batch_size, timestep, dim) inputs_len: rnn input_len of shape (batch_size, ) cell: rnn cell of type RNN_Cell. output: if 0, output returns rnn output for every timestep, if 1, output returns concatenated state of backward and forward rnn. ''' with tf.variable_scope(scope, reuse=reuse): if cell is not None: (cell_fw, cell_bw) = cell else: shapes = inputs.get_shape().as_list() if len(shapes) > 3: inputs = tf.reshape(inputs, (shapes[0] * shapes[1], shapes[2], -1)) inputs_len = tf.reshape(inputs_len, (shapes[0] * shapes[1], )) # if no cells are provided, use standard GRU cell implementation if layers > 1: cell_fw = MultiRNNCell([ apply_dropout(cell_fn(units), size=inputs.shape[-1] if i == 0 else units, is_training=is_training) for i in range(layers) ]) cell_bw = MultiRNNCell([ apply_dropout(cell_fn(units), size=inputs.shape[-1] if i == 0 else units, is_training=is_training) for i in range(layers) ]) else: cell_fw, cell_bw = [ apply_dropout(cell_fn(units), size=inputs.shape[-1], is_training=is_training) for _ in range(2) ] outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, inputs, # sequence_length = inputs_len, dtype=tf.float32) if output == 0: return tf.concat(outputs, 2) elif output == 1: return tf.reshape(tf.concat(states, 1), (Params.batch_size, shapes[1], 2 * units))
def lstm_rnn_subgraph(self): """ Defines the forward pass through the decoder LSTM-RNN. """ with tf.variable_scope('lstm_rnn', reuse=None), tf.device('/gpu:0'): # Same functionality as within the encoder def _lstm_cell(model_opt): """ Defines a basic LSTM cell to which various wrappers can be applied. """ base_cell = BasicLSTMCell(model_opt.dec_hidden_dims, forget_bias=2.5, state_is_tuple=True) if model_opt.allow_dropout: base_cell = DropoutWrapper( base_cell, output_keep_prob=self.rnn_keep_prob) return base_cell if self.opt.dec_num_layers > 1: cell = MultiRNNCell([ _lstm_cell(self.opt) for _ in range(self.opt.dec_num_layers) ]) else: cell = _lstm_cell(self.opt) # Obtain sequences decoded from the encoder's sentence representations # <PAD> slice output by the decoder after each generated batch sequence has ended in <EOS> pad_step_idx = tf.fill([self.batch_length], self.vocab.pad_id) pad_step_embeds = tf.nn.embedding_lookup(self.embedding_table, pad_step_idx, name='pad_step_embeds') # raw_rnn expects input to be served in form of a TensorArray inputs_ta = tf.TensorArray(size=self.batch_steps, dtype=self.float_type) \ .unstack(tf.transpose(self.input_data, perm=[1, 0, 2]), name='rnn_input_array') # Initial decoder state set equal to the final state of the encoder initial_state = self.encoder.decoder_state # Initialize tensor for tracking sentence completion if self.eos_tracker is None: self.eos_tracker = tf.zeros([self.batch_length], dtype=self.int_type) # Define the raw_rnn loop which allows for greater control over the generated output, as compared # to dynamic_rnn() def loop_fn(time, cell_output, cell_state, loop_state): """ Defines the loop function utilized by the raw_rnn. """ # Helper function for obtaining the output embeddings def _get_predictions(): """ Projects the likeliest raw_rnn output predictions into the embedding space. """ # Flatten RNN output to two dimensions flat_step_outputs = tf.reshape( cell_output, [-1, self.opt.dec_hidden_dims]) projected_step_outputs = tf.nn.xw_plus_b( flat_step_outputs, self.projection_weights, self.projection_biases) step_logits = tf.nn.xw_plus_b(projected_step_outputs, tf.transpose( self.embedding_table), self.output_embedding_biases, name='logits') # Isolate highest-probability predictions predicted_scores = tf.nn.softmax(step_logits, -1) idx_predictions = tf.cast(tf.argmax(predicted_scores, axis=-1), dtype=self.int_type) # Embed predicted word indices embedded_predictions = tf.nn.embedding_lookup( self.embedding_table, idx_predictions) return idx_predictions, embedded_predictions def _attend(): """ Executes the decoding-with-attention mechanism utilizing global 'Luong' attention. """ # Project encoder hidden states, 'memories', to match the dimensionality of the decoder, # i.e. target, hidden states memory_values = self.encoder.rnn_outputs flat_values = tf.reshape( memory_values, [-1, tf.shape(memory_values)[-1]]) flat_keys = tf.matmul(flat_values, self.memory_key_weights) memory_keys = tf.reshape(flat_keys, [ self.encoder.batch_length, self.encoder.batch_steps, self.opt.dec_hidden_dims ]) # Apply length to the memory keys so as to restrict attention to non-padded positions score_mask = tf.sequence_mask( self.encoder.length_mask, maxlen=tf.reduce_max(self.encoder.length_mask), dtype=self.float_type) score_mask = tf.expand_dims(score_mask, -1) score_mask = tf.matmul(score_mask, tf.ones([ self.encoder.batch_length, self.opt.dec_hidden_dims, 1 ]), transpose_b=True) memory_keys = tf.where(tf.cast(score_mask, dtype=tf.bool), memory_keys, tf.zeros_like(memory_keys)) # Obtain target query, i.e. the current decoder hidden state target_hidden_state = cell_state[-1][-1] target_query = tf.expand_dims(target_hidden_state, 1) # Compute alignments globally, by attending to all encoder states at once score = tf.matmul(target_query, memory_keys, transpose_b=True) score = tf.squeeze(score, [1]) alignments = tf.nn.softmax(score) # Compute the context vector by applying calculated alignments to encoder states expanded_alignments = tf.expand_dims(alignments, 1) context = tf.matmul(expanded_alignments, memory_values) context = tf.squeeze(context, [1]) # Compute the attentional vector by combining encoder context with decoder query attention = tf.tanh( tf.matmul( tf.concat([context, target_hidden_state], -1), self.attention_weights)) return attention # Initialize the loop function emit_output = cell_output # no output is emitted during initialization next_loop_state = None # Check if to terminate the loop; # length slack denotes how much longer the output sequence is allowed to be than the input elements_finished = tf.greater_equal( time, self.length_mask + self.opt.length_slack) # Once stopping conditions are met for all batch elements, terminate loop finished = tf.reduce_all(elements_finished) if cell_output is None: # i.e. during initialization only # Set initial values self.eos_tracker *= 0 next_cell_state = initial_state next_input = inputs_ta.read(0) # At time-step 1+ else: # Pass on the cell state next_cell_state = cell_state # Get predictions from previous time-step predicted_idx, predicted_embeds = _get_predictions() # Check if stopping conditions are met # 1. Check if all decoded batch items contain an <EOS> prediction self.eos_tracker += tf.cast( tf.equal(predicted_idx, self.vocab.eos_id), self.int_type) # 2. Check if all decoded batch items are equal in length to corresponding encoder inputs boundary_reached = tf.greater_equal(time, self.length_mask) if not self.opt.is_train or not self.opt.use_reconstruction_objective: # Extended stopping criterion during inference, # as output length is allowed to exceed input length via the slack_length parameter self.eos_tracker += tf.cast( tf.equal(predicted_idx, self.vocab.eos_id), self.int_type) elements_finished = tf.logical_or( tf.greater(self.eos_tracker, 0), tf.greater_equal( time, (self.length_mask + self.opt.length_slack))) finished = tf.reduce_all(elements_finished) # Scheduled sampling: If flip value is smaller than sampling probability, the output of the # decoder at the current time-step is fed as input to the decoder at the subsequent time-step flip = tf.random_uniform(shape=[], minval=0.0, maxval=1.0) input_tensor = tf.cond( tf.logical_or(tf.less(self.sampling_bias, flip), tf.reduce_all(boundary_reached)), lambda: predicted_embeds, lambda: inputs_ta.read(time)) # If stopping conditions have been met, output a <PAD> slice, then terminate loop next_input = tf.cond(finished, lambda: pad_step_embeds, lambda: input_tensor) if self.opt.attentive_decoding: # Input feeding: Combine attentive information with the input to the decoder at the # subsequent time-step (either target tokens or predictions from the current time-step) attentional_hidden_state = _attend() next_input = tf.matmul( tf.concat([next_input, attentional_hidden_state], -1), self.dec_mixture_weights) if self.opt.attentive_encoding: # Unused next_input = tf.matmul( tf.concat( [next_input, self.encoder.sentence_encodings], -1), self.enc_mixture_weights) return elements_finished, next_input, next_cell_state, emit_output, next_loop_state # Get RNN outputs rnn_outputs_tensor_array, final_state, _ = tf.nn.raw_rnn( cell, loop_fn) rnn_outputs = rnn_outputs_tensor_array.stack() rnn_outputs = tf.transpose(rnn_outputs, perm=[1, 0, 2]) flat_rnn_outputs = tf.reshape(rnn_outputs, [-1, self.opt.enc_hidden_dims], name='reshaped_rnn_outputs') # Project RNN outputs into the embedding space, followed by the projection into vocabulary space projected_rnn_outputs = tf.nn.xw_plus_b(flat_rnn_outputs, self.projection_weights, self.projection_biases) logits = tf.nn.xw_plus_b(projected_rnn_outputs, tf.transpose( self.encoder.embedding_table), self.output_embedding_biases, name='logits') return final_state, flat_rnn_outputs, projected_rnn_outputs, logits
def create_model(self): ######################### # b ... position of the example within the batch # t ... position of the word within the document/question # f ... features of the embedding vector or the encoded feature vector # i ... position of the word in candidates list ######################### num_layers = self.args.num_layers hidden_size = self.args.hidden_size cell = LSTMCell if self.args.use_lstm else GRUCell # model input questions_bt = tf.placeholder(dtype=tf.int32, shape=(None, self.q_len), name="questions_bt") documents_bt = tf.placeholder(dtype=tf.int32, shape=(None, self.d_len), name="documents_bt") candidates_bi = tf.placeholder(dtype=tf.int32, shape=(None, self.dataset.A_len), name="candidates_bi") y_true_bi = tf.placeholder(shape=(None, self.dataset.A_len), dtype=tf.float32, name="y_true_bi") # shape=(None) the length of inputs context_lengths = tf.reduce_sum(tf.sign(tf.abs(documents_bt)), 1) question_lengths = tf.reduce_sum(tf.sign(tf.abs(questions_bt)), 1) context_mask_bt = tf.sequence_mask(context_lengths, self.d_len, dtype=tf.float32) init_embedding = tf.constant(self.embedding_matrix, dtype=tf.float32, name="embedding_init") embedding = tf.get_variable(initializer=init_embedding, name="embedding_matrix", dtype=tf.float32) with tf.variable_scope('q_encoder', initializer=tf.orthogonal_initializer()): # encode question to fixed length of vector # output shape: (None, max_q_length, embedding_dim) question_embed_btf = tf.nn.embedding_lookup( embedding, questions_bt) logger("q_embed_btf shape {}".format( question_embed_btf.get_shape())) q_cell_fw = MultiRNNCell( cells=[cell(hidden_size) for _ in range(num_layers)]) q_cell_bw = MultiRNNCell( cells=[cell(hidden_size) for _ in range(num_layers)]) outputs, last_states = tf.nn.bidirectional_dynamic_rnn( cell_bw=q_cell_bw, cell_fw=q_cell_fw, dtype="float32", sequence_length=question_lengths, inputs=question_embed_btf, swap_memory=True) # q_encoder output shape: (None, hidden_size * 2) q_encoded_bf = tf.concat([last_states[0][-1], last_states[1][-1]], axis=-1) logger("q_encoded_bf shape {}".format(q_encoded_bf.get_shape())) with tf.variable_scope('d_encoder', initializer=tf.orthogonal_initializer()): # encode each document(context) word to fixed length vector # output shape: (None, max_d_length, embedding_dim) d_embed_btf = tf.nn.embedding_lookup(embedding, documents_bt) logger("d_embed_btf shape {}".format(d_embed_btf.get_shape())) d_cell_fw = MultiRNNCell( cells=[cell(hidden_size) for _ in range(num_layers)]) d_cell_bw = MultiRNNCell( cells=[cell(hidden_size) for _ in range(num_layers)]) outputs, last_states = tf.nn.bidirectional_dynamic_rnn( cell_bw=d_cell_bw, cell_fw=d_cell_fw, dtype="float32", sequence_length=context_lengths, inputs=d_embed_btf, swap_memory=True) # d_encoder output shape: (None, max_d_length, hidden_size * 2) d_encoded_btf = tf.concat(outputs, axis=-1) logger("d_encoded_btf shape {}".format(d_encoded_btf.get_shape())) def att_dot(x): # attention """attention dot product function""" d_btf, q_bf = x # (None, max_d_length, hidden_size * 2) & (None, hidden_size * 2, 1) res = tf.matmul(tf.expand_dims(q_bf, -1), d_btf, adjoint_a=True, adjoint_b=True) return tf.reshape(res, [-1, self.d_len]) with tf.variable_scope('merge'): mem_attention_pre_soft_bt = att_dot([d_encoded_btf, q_encoded_bf]) mem_attention_pre_soft_masked_bt = tf.multiply( mem_attention_pre_soft_bt, context_mask_bt, name="attention_mask") mem_attention_bt = tf.nn.softmax( logits=mem_attention_pre_soft_masked_bt, name="softmax_attention") # attention-sum process def sum_prob_of_word(word_ix, sentence_ixs, sentence_attention_probs): word_ixs_in_sentence = tf.where(tf.equal(sentence_ixs, word_ix)) return tf.reduce_sum( tf.gather(sentence_attention_probs, word_ixs_in_sentence)) # noinspection PyUnusedLocal def sum_probs_single_sentence(prev, cur): candidate_indices_i, sentence_ixs_t, sentence_attention_probs_t = cur result = tf.scan(fn=lambda previous, x: sum_prob_of_word( x, sentence_ixs_t, sentence_attention_probs_t), elems=[candidate_indices_i], initializer=tf.constant(0., dtype="float32")) return result def sum_probs_batch(candidate_indices_bi, sentence_ixs_bt, sentence_attention_probs_bt): result = tf.scan(fn=sum_probs_single_sentence, elems=[ candidate_indices_bi, sentence_ixs_bt, sentence_attention_probs_bt ], initializer=tf.Variable([0] * self.dataset.A_len, dtype="float32")) return result # output shape: (None, i) i = max_candidate_length = 10 y_hat = sum_probs_batch(candidates_bi, documents_bt, mem_attention_bt) # crossentropy output = y_hat / tf.reduce_sum(y_hat, axis=-1, keep_dims=True) # manual computation of crossentropy epsilon = tf.convert_to_tensor(_EPSILON, output.dtype.base_dtype, name="epsilon") output = tf.clip_by_value(output, epsilon, 1. - epsilon) self.loss = tf.reduce_mean( -tf.reduce_sum(y_true_bi * tf.log(output), axis=-1)) # correct prediction nums self.correct_prediction = tf.reduce_sum( tf.sign( tf.cast(tf.equal(tf.argmax(y_hat, 1), tf.argmax(y_true_bi, 1)), "float")))
def __init__(self, inp, inp_mask, decode_time_steps, hyper_params=None, name='Tacotron'): """ Build the computational graph. :param inp: :param inp_mask: :param decode_time_steps: :param hyper_params: :param name: """ super(Tacotron, self).__init__(name) self.hyper_params = HyperParams( ) if hyper_params is None else hyper_params with tf.variable_scope(name): self.global_step = tf.Variable(0, name='global_step', trainable=False) batch_size = tf.shape(inp)[0] input_time_steps = tf.shape(inp)[1] reduc = self.hyper_params.reduction_rate output_time_steps = decode_time_steps * reduc ### Encoder [begin] with tf.variable_scope('character_embedding'): embed_inp = EmbeddingLayer(self.hyper_params.embed_class, self.hyper_params.embed_dim)(inp) with tf.variable_scope('encoder_pre_net'): pre_ed_inp = tf.layers.dropout(tf.layers.dense( embed_inp, 256, tf.nn.relu), training=False) pre_ed_inp = tf.layers.dropout(tf.layers.dense( pre_ed_inp, 128, tf.nn.relu), training=False) encoder_output = modules.cbhg(pre_ed_inp, training=False, k=16, bank_filters=128, projection_filters=(128, 128), highway_layers=4, highway_units=128, bi_gru_units=128, sequence_length=inp_mask, name='encoder_cbhg', reuse=False) ### Encoder [end] ### Attention Module with tf.variable_scope('attention'): att_module = AttentionModule(256, encoder_output, sequence_length=inp_mask, time_major=False) ### Decoder [begin] att_cell = ZoneoutWrapper(sGRUCell(256), 0.1, False) dec_cell = MultiRNNCell( [ResidualWrapper(GRUCell(256)) for _ in range(2)]) # prepare output alpha TensorArray with tf.variable_scope('prepare_decode'): # prepare output alpha TensorArray reduced_time_steps = tf.div(output_time_steps, reduc) init_att_cell_state = att_cell.zero_state( batch_size, tf.float32) init_dec_cell_state = dec_cell.zero_state( batch_size, tf.float32) init_state_tup = tuple( [init_att_cell_state, init_dec_cell_state]) init_output_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) init_alpha_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) go_array = tf.zeros( [batch_size, self.hyper_params.seq2seq_dim], dtype=tf.float32) init_context = tf.zeros([batch_size, 256], dtype=tf.float32) init_time = tf.constant(0, dtype=tf.int32) cond = lambda x, *_: tf.less(x, reduced_time_steps) def body(this_time, old_output_ta, old_alpha_ta, old_state_tup, last_context, last_output): with tf.variable_scope('decoder_pre_net'): dec_pre_ed_inp = last_output dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense( dec_pre_ed_inp, 256, tf.nn.relu), training=True) dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense( dec_pre_ed_inp, 128, tf.nn.relu), training=True) with tf.variable_scope('attention_rnn'): att_cell_inp = tf.concat([last_context, dec_pre_ed_inp], axis=-1) att_cell_out, att_cell_state = att_cell( att_cell_inp, old_state_tup[0]) with tf.variable_scope('attention'): query = att_cell_state context, alpha = att_module(query) new_alpha_ta = old_alpha_ta.write(this_time, alpha) with tf.variable_scope('decoder_rnn'): dec_input = tf.layers.dense( tf.concat([att_cell_out, context], axis=-1), 256) dec_cell_out, dec_cell_state = dec_cell( dec_input, old_state_tup[1]) dense_out = tf.layers.dense( dec_cell_out, self.hyper_params.seq2seq_dim * reduc) new_output_ta = old_output_ta.write(this_time, dense_out) new_output = dense_out[:, -self.hyper_params.seq2seq_dim:] new_state_tup = tuple([att_cell_state, dec_cell_state]) return tf.add( this_time, 1 ), new_output_ta, new_alpha_ta, new_state_tup, context, new_output # run loop _, seq2seq_output_ta, alpha_ta, *_ = tf.while_loop( cond, body, [ init_time, init_output_ta, init_alpha_ta, init_state_tup, init_context, go_array ]) with tf.variable_scope('reshape_decode'): seq2seq_output = tf.reshape( seq2seq_output_ta.stack(), shape=(reduced_time_steps, batch_size, self.hyper_params.seq2seq_dim * reduc)) seq2seq_output = tf.reshape( tf.transpose(seq2seq_output, perm=(1, 0, 2)), shape=(batch_size, output_time_steps, self.hyper_params.seq2seq_dim)) self.seq2seq_output = seq2seq_output alpha_output = tf.reshape(alpha_ta.stack(), shape=(reduced_time_steps, batch_size, input_time_steps)) alpha_output = tf.expand_dims( tf.transpose(alpha_output, perm=(1, 0, 2)), -1) self.alpha_output = alpha_output ### Decoder [end] ### PostNet [begin] post_output = modules.cbhg( seq2seq_output, training=False, k=8, bank_filters=128, projection_filters=(256, self.hyper_params.seq2seq_dim), highway_layers=4, highway_units=128, bi_gru_units=128, sequence_length=None, name='decoder_cbhg', reuse=False) post_output = tf.layers.dense(post_output, self.hyper_params.post_dim, name='post_linear_transform') self.post_output = post_output
def build_decoder_cell(self, encoder_outputs, encoder_state): """ 构建解码器cell :param encoder_outputs: :param encoder_state: :return: """ encoder_input_length = self.encoder_inputs_length batch_size = self.batch_size if self.bidirection: encoder_state = encoder_state[-self.depth:] if self.time_major: encoder_outputs = tf.transpose(encoder_outputs, (1, 0, 2)) if self.use_beamsearch_decode: # 复制多份 encoder_outputs = seq2seq.tile_batch( encoder_outputs, multiplier=self.beam_width ) encoder_state = seq2seq.tile_batch( encoder_state, multiplier=self.beam_width ) encoder_input_length = seq2seq.tile_batch( self.encoder_inputs_length, multiplier=self.beam_width ) batch_size *= self.beam_width if self.attention_type.lower() == 'luong': self.attention_mechanism = LuongAttention( num_units=self.hidden_size, memory=encoder_outputs, memory_sequence_length=encoder_input_length ) else: self.attention_mechanism = BahdanauAttention( num_units=self.hidden_size, memory=encoder_outputs, memory_sequence_length=encoder_input_length ) cell = MultiRNNCell([ self.build_single_cell( self.hidden_size, use_residual=self.use_residual) for _ in range(self.depth) ]) alignment_history = ( self.mode != 'train' and not self.use_beamsearch_decode ) def cell_input_fn(inputs, attention): if not self.use_residual: return array_ops.concat([inputs, attention], -1) attn_projection = layers.Dense(self.hidden_size, dtype=tf.float32, use_bias=False, name='attention_cell_input_fn') return attn_projection(array_ops.concat([inputs, attention], -1)) cell = AttentionWrapper( cell=cell, attention_mechanism=self.attention_mechanism, attention_layer_size=self.hidden_size, alignment_history=alignment_history, cell_input_fn=cell_input_fn, name='Attention_Wrapper' ) decoder_initial_state = cell.zero_state( batch_size, tf.float32) # 传递encoder状态 decoder_initial_state = decoder_initial_state.clone( cell_state=encoder_state ) return cell, decoder_initial_state
def _build_network(self, dropout): # Legend for tensor shapes below: # B := batch size # C := number of classes # H := number of hidden units (aka layer size) # S := sequence length # keep a reference to _config to make code below simpler config = self._config # Create size BxS input and target placeholder tensors # These will be filled in with actual values at session runtime data_dims = [self._batch_size, self._seq_len] self._input_ids = tf.placeholder(tf.int32, data_dims) self._target_ids = tf.placeholder(tf.int64, data_dims) # Create an embedding tensor to represent integer inputs into H dimensions # This must be done on the CPU, according to: # https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/word2vec/word2vec_basic.py#L143 # (Ops and variables pinned to the CPU because of missing GPU implementation) with tf.device("/cpu:0"): # embeddings is a CxH tensor embeddings = tf.get_variable('embeddings', [config.num_classes, config.num_hidden]) # embedded is a BxSxH tensor embedded = tf.nn.embedding_lookup(embeddings, self._input_ids) # sequences is a list of length S containing Bx1xH tensors sequences = tf.split(embedded, self._seq_len, 1) # perform a "squeeze" on each item in the sequence list # inputs is a list of length S containing BxH tensors inputs = [tf.squeeze(seq, [1]) for seq in sequences] # create LSTM cell and stack cell = BasicLSTMCell(config.num_hidden) if dropout > 0: keep_prob = 1 - dropout cell = DropoutWrapper(cell, output_keep_prob=keep_prob) self._stack = MultiRNNCell([cell]*config.num_layers) self._state = self._stack.zero_state(self._batch_size, tf.float32) # Pump the inputs through the RNN layers # outputs is a list of length S containing BxH tensors outputs, self._state = static_rnn(self._stack, inputs, initial_state=self._state) # assert len(outputs) == self._seq_len #assert outputs[0].get_shape() == (self._batch_size, config.num_hidden), outputs[0].get_shape() # Softmax weight tensor is HxC W_soft = tf.get_variable('W_soft', [config.num_hidden, config.num_classes]) # Softmax bias tensor is Cx1 b_soft = tf.get_variable('b_soft', [config.num_classes]) # Reshape the output so that we can use it with the softmax weights and bias: # - concat makes list into a BxSH tensor, # - reshape converts the BxSH tensor into a BSxH tensor output = tf.reshape(tf.concat(outputs, 1), [-1, config.num_hidden]) #assert output.get_shape() == (self._batch_size*self._seq_len, config.num_hidden), output.get_shape() # logits is a (BSxH).(HxC) + 1xC = BSxC + 1xC = BSxC tensor logits = tf.nn.xw_plus_b(output, W_soft, b_soft) #assert logits.get_shape() == (self._batch_size*self._seq_len, config.num_classes), logits.get_shape() # probs is a BSxC tensor, with entry (i,j) containing the probability that batch i is class j self._probs = tf.nn.softmax(logits) #assert self._probs.get_shape() == (self._batch_size*self._seq_len, config.num_classes), self._probs.get_shape() # targets is a BSx1 tensor targets = tf.reshape(self._target_ids, [self._batch_size*self._seq_len]) # cross_entropy is a BSx1 tensor cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets) #assert cross_entropy.get_shape() == (self._batch_size*self._seq_len) # loss is a scalar containing the mean of cross_entropy losses self._loss = tf.reduce_mean(cross_entropy)
def __init__(self, is_training, config, input_): """ This initializer function will read the hyperparameters, from that it will set the atchitecture of the network. The is_training flag is nice to build the network. If it is not for training then we do not need to builf to the graph the loss function and optimizer. """ # Variable to know if the model is being used for training self._is_training = is_training # TODO: This is the structure we just saw... self._input = input_ # Setting the chains properties self.batch_size = config.batch_size self.num_steps = input_.num_steps self._input_data = input_.input_data input_data_ids = input_.input_data self._targets = input_.targets # Setting the architectute properties # Dimensionality of the input !! # TODO: For now we set it the same as the hidden_size. Probably for matrix concatenation purposes ? # Dimensionality of the output ! In the case of classification, the cardinality of the output Y_cardinality = config.Y_cardinality # Size of the output # Construct prior prior = VI.Prior(config.prior_pi, config.log_sigma1, config.log_sigma2) ######################################################################## ############# Transform Categorial values (words) into real values vectors ############ ######################################################################## # Fetch embeddings # with tf.device("/cpu:0"): # embedding = VI.sample_posterior([vocab_size, size], "embedding", prior, is_training) # inputs = tf.nn.embedding_lookup(embedding, input_.input_data) # If we have discrete input X and we want to embed them in random vectors of size "size" # We also need to include the cardinality of the output Y. # if (type(config.X_dim) != type(None)): if (config.embedding == True): with tf.device("/cpu:0"): embedding = tf.get_variable( "embedding", [Y_cardinality, config.X_dim], dtype=VI.data_type()) inputs = tf.nn.embedding_lookup(embedding, input_data_ids) X_dim = config.X_dim else: X_dim = config.X_dim# inputs.get_shape()[-1].value # inputs = tf.get_variable("Continous_data_input", [self.batch_size,self.num_steps, X_dim], dtype=VI.data_type(), trainable = False) # inputs.assign(input_data_ids) # # caca = tf.zeros_initializer(tf.int32)((self.batch_size,Y_cardinality, tf.int32)) # targets = tf.get_variable("Discrete_Target", [self.batch_size,Y_cardinality], dtype=tf.int32, trainable = False, # initializer = caca) # targets.assign(input_.targets) # inputs = tf.Variable(input_data_ids, trainable = False) # targets = tf.Variable(input_.targets, trainable = False) inputs = input_data_ids targets = input_.targets # These are the chains in the Batch. They are represented by a 3D tensor with dimensions # - size_epoch: Number of chains in the batch # - num_steps: Number of elements of the chain # - D: Dimensionality of the elements of the chain. # TODO: maybe due to the initial embedding that has to be done, all inputs are given when defining the model, # we do not want that, we want them to be in a way where do the preprocessing before and we have chains as placeholder. input_chains = inputs[:, :, :] print ("-----------------------------") print ("Input Batch X shape", inputs.shape) print ("Input Batch Y shape", targets.shape) print ("Input_size: %i"%X_dim) print ("Output_size: %i"%Y_cardinality) print ("Number of chains in a batch: %i"%self.batch_size) print ("Number of elements in a chain: %i"%self.num_steps) print ("Number of hidden state neurons LTSM: %i"%config.hidden_size) ######################################################################## ############# Start Building the Architecute of the Network ############ ######################################################################## ###################################################################### ################ Build and Stack BBB LSTM cells ################ cells = [] for i in range(config.num_layers): if (i == 0): LSTM_input_size = X_dim else: LSTM_input_size = config.hidden_size cells.append(BLC.BayesianLSTMCell(LSTM_input_size, config.hidden_size, prior, is_training, forget_bias=0.0, name="bbb_lstm_{}".format(i))) # The following line will stack the LSTM cells together # They just need to follow the interface that we already wrote # Notice we use state_is_tuple=True since the LSTM cells have 2 states C_t and h_t DeepLSTMRNN = MultiRNNCell(cells, state_is_tuple=True) # Initialize the state values to 0 ? # TODO: We need to provide info about the Batch size ? That is the number of chains # we want to compute the output at once. ##################################################################################### ################ Propagate the chains in the batch from input to output ################ # Initialization. # This is the initial state for the LSTM when we feed it a new chain (is it just the 0s) probably. Then it should output the conditional most lilkely word. # We need to give it the batch_size because we are going to propagate the chains in parallel. # initial state will have dimensions [batch_size, (LSTM_hidden_size, LSTM_hidden_size)] since each state of the LSTM is made of the previous self._initial_state = DeepLSTMRNN.zero_state(config.batch_size, VI.data_type()) state = self._initial_state # Forward pass for the truncated mini-batch # hs_o: This list will contain in each of its elements, # the hidden state of the last LSTM of the network # for each of the number of steps (length of the chains that is has to be the same for every chain). # Each of this hidden states has dimensions [LSTM_hidden_size, num_batch] since we are computing in parallel for all chains in the batch. # Now we propagate the chains in parallel and the initial state through the Deep Bayesian LSTM. # At each time step we will save the hidden state of the last LSTM to convert it later to the real output and being able # to compute the cost function and the output ! # TODO: This is probably why we want the chains to have the same length. Also maybe to not having to worry later to weight the # cost functions by the length of the chains. Anyway... for now we will just accept it. hs_o = [] with tf.variable_scope("RNN"): # We put all the LSTMs under the name RNN. for time_step in range(self.num_steps): # For each element in the chain if (time_step > 0): # Maybe this is so that we do not create the LSTMS a lot of times in the TensorBoard ? tf.get_variable_scope().reuse_variables() # Now we start feeding the time_step-th element of each of the chains at the same time to the network, obtaining the state for (cell_output, state) = DeepLSTMRNN(input_chains[:,time_step,:], state) hs_o.append(cell_output) print (["size output state LSTM", cell_output.shape]) # print ("Num steps: %i"%self.num_steps) # Now we concatenate all the hidden spaces of dimension [num_batch, LSTM_hidden_size] # into in the list with dimension [num_batch x step_size, LSTM_hidden_size]. At the end of the day # all of the hidden spaces will be multiplied by the same weights of the dense softmax layer so we concatenate all of the # output hidden spaces for later multiplication. hs_o = tf.reshape( tf.concat(hs_o, 1), [-1, config.hidden_size]) print (["Size of the Concatenated output state of the last LSTM for all chains in batch and time-steps in a batch", hs_o.shape]) ###################################################################### ################ Build the output layer ############################ # In our case the output later is just a dense layer that transforms the hidden space # of the last LSTM into the prediction of each discrete output (word), applying a softmax # function to the output of the neurons. # The parameters of this layer are just the Weights and biases of it. # The next call function will create the weights if they have not been create before. # Identified by the names "" # TODO: Not really a TODO, but the important part here is that we changed size vy config.hidden_size softmax_w = VI.sample_posterior((config.hidden_size , Y_cardinality), "softmax_w", prior, is_training) softmax_b = VI.sample_posterior((Y_cardinality, 1), "softmax_b", prior, is_training) print ("Shape of the weights of the output Dense layer",softmax_w.shape) print ("Shape of the weights of the output Dense layer",softmax_b.shape) ## We propagate the hidden spaces through the network in order to obtain the outout of the network before ## the softmax function, which is called the logits. This logits will have dimensions ## [num_batch x step_size, LSTM_hidden_size] that we need to break down further. # Logits are the input to the softmax layer ! logits = tf.nn.xw_plus_b(hs_o, softmax_w, tf.squeeze(softmax_b)) # We reshape it back to the proper form [chain, sample, output] print ("Shape of logits after multiplication of ohs", logits.shape) logits = tf.reshape(logits, [self.batch_size, self.num_steps, Y_cardinality]) print ("Shape of logits after reshpaing", logits.shape) # We can compute the output of the chains ! # TODO: maybe do not execute this line in the training model to save computation ? Maybe it wouldnt be executed anyway ? self._output = tf.nn.softmax(logits) """ This is finally the output of the batch, our prediction of the word, for each of the words in the batch. Since we have: - self.batch_size number of chains in the batch - Each chain has the same number of words: self.num_steps - The prediction of each word is the probability of each of the vocab_size variables """ ##################################################################################### ################ Setting the Loss function ################ ##################################################################################### #B = number of batches aka the epoch size #C = number of truncated sequences in a batch aka batch_size variable B = self._input.epoch_size C = self.batch_size loss = tf.contrib.seq2seq.sequence_loss( logits, targets, tf.ones([self.batch_size, self.num_steps], dtype=VI.data_type()), average_across_timesteps=False, average_across_batch=False) # Update the cost # Remember to divide by batch size self._cost = tf.reduce_sum(loss) / self.batch_size self._kl_loss = 0. self._final_state = state if not is_training: return #Compute KL divergence ## We get the KL loss that was computed during the sampling of the variational posterior !! kl_loss = tf.add_n(tf.get_collection("KL_layers"), "kl_divergence") self._kl_loss = kl_loss /(B*C) # Compute the final loss, this is a proportion between the likelihood of the data (_cost) # And the KL divergence of the posterior # TODO: Remove increased by 2 the cost so that the total cost is more influenced # on the data ! self._total_loss = self._cost + self._kl_loss ##################################################################################### ################ Setting the training algorithm ################ ##################################################################################### ## Set the trainable variables, the variables for which the gradient with respect to the loss function # will be computed and will be modified by the optimizer when the session is run :) self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self._total_loss, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self._lr) self._train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step()) self._new_lr = tf.placeholder(VI.data_type(), shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr)
def create_model(self): num_layers = self.args.num_layers hidden_size = self.args.hidden_size char_hidden_size = self.args.char_hidden_size char_embedding_dim = self.args.char_embedding_dim cell = LSTMCell if self.args.use_lstm else GRUCell q_input = tf.placeholder(dtype=tf.int32, shape=[None, self.q_len], name='questions_bt') d_input = tf.placeholder(dtype=tf.int32, shape=[None, self.d_len], name='documents_bt') answer_s = tf.placeholder(dtype=tf.float32, shape=[None, None], name='answer_start') answer_e = tf.placeholder(dtype=tf.float32, shape=[None, None], name='answer_end') q_input_char = tf.placeholder( dtype=tf.int32, shape=[None, self.q_len, self.q_char_len], name='questions_bt_char') d_input_char = tf.placeholder( dtype=tf.int32, shape=[None, self.d_len, self.d_char_len], name='documents_bt_char') init_embed = tf.constant(self.embedding_matrix, dtype=tf.float32) embedding_matrix = tf.get_variable(name='embdding_matrix', initializer=init_embed, dtype=tf.float32) # can_embedding_matrix = tf.get_variable(name = 'can_embdding_matrix', initializer = init_embed, dtype = tf.float32, # trainable = False) q_real_len = tf.reduce_sum(tf.sign(tf.abs(q_input)), axis=1) d_real_len = tf.reduce_sum(tf.sign(tf.abs(d_input)), axis=1) d_mask = tf.sequence_mask(dtype=tf.float32, maxlen=self.d_len, lengths=d_real_len) q_mask = tf.sequence_mask(dtype=tf.float32, maxlen=self.q_len, lengths=d_real_len) _EPSILON = 10e-8 batch_size = tf.shape(q_input)[0] if self.args.use_char_embedding: char_embedding = tf.get_variable(name='can_embdding_matrix', initializer=tf.constant( self.char_embedding_matrix, dtype=tf.float32), dtype=tf.float32, trainable=True) with tf.variable_scope('char_embedding', reuse=tf.AUTO_REUSE) as scp: q_char_embed = tf.nn.embedding_lookup( char_embedding, q_input_char) # B * Q * C * emb d_char_embed = tf.nn.embedding_lookup( char_embedding, d_input_char) # B * D * C * emb # q_char_embed = tf.reshape(q_char_embed, [-1, self.q_len, self.d_char_len * char_embedding_dim]) # B * Q * C * emb # d_char_embed = tf.reshape(d_char_embed, [-1, self.d_len, self.q_char_len * char_embedding_dim]) # B * D * C * emb # char_rnn_f = MultiRNNCell( # cells = [DropoutWrapper(cell(char_hidden_size), output_keep_prob = self.args.keep_prob)]) # char_rnn_b = MultiRNNCell( # cells = [DropoutWrapper(cell(char_hidden_size), output_keep_prob = self.args.keep_prob)]) # # d_char_embed_out, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = char_rnn_f, cell_bw = char_rnn_b, inputs = d_char_embed, # sequence_length = d_real_len, initial_state_bw = None, # dtype = "float32", parallel_iterations = None, # swap_memory = True, time_major = False, scope = 'char_rnn') # q_char_embed_out, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw = char_rnn_f, cell_bw = char_rnn_b, inputs = q_char_embed, # sequence_length = q_real_len, initial_state_bw = None, # dtype = "float32", parallel_iterations = None, # swap_memory = True, time_major = False, scope = 'char_rnn') q_char_embed = tf.nn.dropout(q_char_embed, keep_prob=self.args.keep_prob) d_char_embed = tf.nn.dropout(d_char_embed, keep_prob=self.args.keep_prob) with tf.variable_scope('char_conv', reuse=tf.AUTO_REUSE) as scp: q_char_embed = tf.transpose( q_char_embed, perm=[0, 2, 3, 1]) # [batch, height, width, channels] filter = tf.get_variable( 'q_filter_w', shape=[5, 5, self.q_len, self.q_len] ) # [filter_height, filter_width, in_channels, out_channels] cnned_char = tf.nn.conv2d( q_char_embed, filter, strides=[1, 1, 1, 1], padding='VALID', use_cudnn_on_gpu=True, data_format="NHWC", name=None ) # [B, (char_len-filter_size/stride), (word_len-filter_size/stride), d_len] q_char_embed_out = tf.nn.max_pool(cnned_char, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding='VALID', data_format="NHWC", name=None) char_out_size = q_char_embed_out.get_shape().as_list( )[1] * q_char_embed_out.get_shape().as_list()[2] q_char_embed_out = tf.reshape( tf.transpose(q_char_embed_out, perm=[0, 3, 1, 2]), shape=[batch_size, self.q_len, char_out_size]) d_char_embed = tf.transpose( d_char_embed, perm=[0, 2, 3, 1]) # [batch, height, width, channels] filter = tf.get_variable( 'd_filter_w', shape=[5, 5, self.d_len, self.d_len] ) # [filter_height, filter_width, in_channels, out_channels] cnned_char = tf.nn.conv2d( d_char_embed, filter, strides=[1, 1, 1, 1], padding='VALID', use_cudnn_on_gpu=True, data_format="NHWC", name=None ) # [B, (char_len-filter_size/stride), (word_len-filter_size/stride), d_len] d_char_embed_out = tf.nn.max_pool(cnned_char, ksize=[1, 5, 5, 1], strides=[1, 1, 1, 1], padding='VALID', data_format="NHWC", name=None) char_out_size = d_char_embed_out.get_shape().as_list( )[1] * d_char_embed_out.get_shape().as_list()[2] d_char_embed_out = tf.reshape( tf.transpose(d_char_embed_out, perm=[0, 3, 1, 2]), shape=[batch_size, self.d_len, char_out_size]) d_char_embed_out = tf.reshape( d_char_embed_out, shape=[batch_size, self.d_len, char_out_size]) d_char_out = tf.concat(d_char_embed_out, -1) q_char_out = tf.concat(q_char_embed_out, -1) with tf.variable_scope('q_encoder') as scp: q_embed = tf.nn.embedding_lookup(embedding_matrix, q_input) if self.args.use_char_embedding: q_embed = tf.concat([q_embed, q_char_out], -1) q_rnn_f = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) for _ in range(num_layers) ]) q_rnn_b = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) for _ in range(num_layers) ]) outputs, q_last_states = tf.nn.bidirectional_dynamic_rnn( cell_fw=q_rnn_f, cell_bw=q_rnn_b, inputs=q_embed, sequence_length=q_real_len, initial_state_bw=None, dtype="float32", parallel_iterations=None, swap_memory=True, time_major=False, scope=None) # last_states -> (output_state_fw, output_state_bw) # q_emb_bi = tf.concat([q_last_states[0][-1], q_last_states[1][-1]], axis = -1) q_emb_bi = tf.concat(outputs, axis=-1) logger("q_encoded_bf shape {}".format(q_emb_bi.get_shape())) with tf.variable_scope('d_encoder'): d_embed = tf.nn.embedding_lookup(embedding_matrix, d_input) if self.args.use_char_embedding: d_embed = tf.concat([d_embed, d_char_out], -1) d_rnn_f = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) for _ in range(num_layers) ]) d_rnn_b = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) for _ in range(num_layers) ]) d_rnn_out, last_states = tf.nn.bidirectional_dynamic_rnn( cell_bw=d_rnn_b, cell_fw=d_rnn_f, inputs=d_embed, sequence_length=d_real_len, swap_memory=True, dtype="float32", ) d_emb_bi = tf.concat(d_rnn_out, axis=-1) logger("d_encoded_bf shape {}".format(d_emb_bi.get_shape())) # def attention1(x, y, w): # return tf.squeeze(tf.scan(fn = lambda pre, xx: tf.squeeze(tf.concat([xx, y, tf.multiply(xx, y)], axis = -1)) @ w, elems = [x], # initializer = tf.zeros(shape = [tf.shape(y)[0], 1])), axis = -1) with tf.variable_scope('ctq_att'): ctq_w = tf.get_variable(shape=[hidden_size * 6, 1], name='ctq_w') # dq_dot = tf.scan(fn = lambda pre, x: attention1(tf.transpose(d_emb_bi, perm = [1, 0, 2]), x, ctq_w), # elems = [tf.transpose(q_emb_bi, perm = [1, 0, 2])], # initializer = tf.zeros(shape = [self.d_len, tf.shape(q_emb_bi)[1]])) # should be Q * D * B # dq_dot = tf.transpose(dq_dot, perm = [0, 2, 1]) # Q * B * D d_expanded = tf.tile(tf.expand_dims(d_emb_bi, 2), [1, 1, self.q_len, 1]) q_expanded = tf.tile(tf.expand_dims(q_emb_bi, 1), [1, self.d_len, 1, 1]) dq_dot = tf.concat( [d_expanded, q_expanded, d_expanded * q_expanded], axis=-1) dq_dot = tf.squeeze(tf.tensordot(dq_dot, ctq_w, axes=((-1, ), (0, ))), axis=-1) dq_dot_softmax = self.softmax_with_mask( logits=dq_dot, axis=2, mask=tf.tile(tf.expand_dims(q_mask, axis=1), [1, self.d_len, 1])) # Q * B U_hat = tf.einsum("bij,bjk->bik", dq_dot_softmax, q_emb_bi) # B * D * hidden*2 # U_hat = tf.transpose(U_hat, [1, 0, 2]) max_atten = self.softmax_with_mask(tf.reduce_max(dq_dot, axis=-1), mask=d_mask, axis=-1) # B * D H_hat = tf.tile( tf.expand_dims(tf.reduce_sum( tf.multiply(tf.expand_dims(max_atten, axis=-1), d_emb_bi), 1), axis=1), [1, self.d_len, 1]) # B * D * hidden*2, G_belta = tf.concat( [d_emb_bi, U_hat, d_emb_bi * U_hat, d_emb_bi * H_hat], axis=-1) with tf.variable_scope('model_layer') as scp: model_cell_f = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) ]) model_cell_b = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) ]) outputs, last_states = tf.nn.bidirectional_dynamic_rnn( cell_fw=model_cell_f, cell_bw=model_cell_b, inputs=G_belta, sequence_length=d_real_len, swap_memory=True, dtype='float32') M = tf.concat(outputs, axis=-1) with tf.variable_scope('output_layer') as scp: w_p_1 = tf.get_variable('w_p_1', shape=[hidden_size * 10, 1]) out_cell_f = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) ]) out_cell_b = MultiRNNCell(cells=[ DropoutWrapper(cell(hidden_size), output_keep_prob=self.args.keep_prob) ]) outputs, last_states = tf.nn.bidirectional_dynamic_rnn( cell_fw=out_cell_f, cell_bw=out_cell_b, inputs=M, sequence_length=d_real_len, dtype='float32') M_2 = tf.concat(outputs, axis=-1) w_p_2 = tf.get_variable('w_p_2', shape=[hidden_size * 10, 1]) p1 = self.softmax_with_mask(logits=tf.reshape( tf.matmul( tf.reshape(tf.concat([G_belta, M], -1), [-1, hidden_size * 10]), w_p_1), [-1, self.d_len]), axis=-1, mask=d_mask) self.result_s = p1 p2 = self.softmax_with_mask(logits=tf.reshape( tf.matmul( tf.reshape(tf.concat([G_belta, M_2], -1), [-1, hidden_size * 10]), w_p_2), [-1, self.d_len]), axis=-1, mask=d_mask) self.result_e = p2 self.answer_s = answer_s self.answer_e = answer_e epsilon = tf.convert_to_tensor(_EPSILON, p1.dtype.base_dtype, name="epsilon") p1 = tf.clip_by_value(p1, epsilon, 1. - epsilon) p2 = tf.clip_by_value(p2, epsilon, 1. - epsilon) self.p1 = p1 self.p2 = p2 # self.loss = -tf.reduce_mean(tf.reduce_sum(tf.multiply(tf.log(p1), answer_s) + tf.multiply(tf.log(p2), answer_e))) losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.p1, labels=tf.argmax(self.answer_s, -1)) losses += tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.p2, labels=tf.argmax(self.answer_e, -1)) self.loss = tf.reduce_mean(losses) self.correct_prediction = tf.reduce_sum( tf.sign( tf.cast(tf.logical_and( tf.equal( tf.argmax(self.answer_s, 1, output_type=tf.int32), tf.argmax(self.result_s, -1, output_type=tf.int32)), tf.equal( tf.argmax(self.answer_e, 1, output_type=tf.int32), tf.argmax(self.result_e, -1, output_type=tf.int32))), dtype='float'))) self.begin_acc = tf.reduce_sum( tf.sign( tf.cast(tf.equal( tf.argmax(self.answer_s, 1, output_type=tf.int32), tf.argmax(self.result_s, -1, output_type=tf.int32)), dtype='float'))) self.end_acc = tf.reduce_sum( tf.sign( tf.cast(tf.equal( tf.argmax(self.answer_e, 1, output_type=tf.int32), tf.argmax(self.result_e, -1, output_type=tf.int32)), dtype='float')))
def initialize(self, inputs, input_lengths, mel_targets=None, linear_targets=None, pml_targets=None, gta=False, locked_alignments=None, logs_enabled=True): '''Initializes the model for inference. Sets "pml_outputs", and "alignments" fields. Args: inputs: int32 Tensor with shape [N, T_in] where N is batch size, T_in is number of steps in the input time series, and values are character IDs input_lengths: int32 Tensor with shape [N] where N is batch size and values are the lengths of each sequence in inputs. mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number of steps in the output time series, M is num_mels, and values are entries in the mel spectrogram. Only needed for training. linear_targets: float32 Tensor with shape [N, T_out, F] where N is batch_size, T_out is number of steps in the output time series, F is num_freq, and values are entries in the linear spectrogram. Only needed for training. pml_targets: float32 Tensor with shape [N, T_out, P] where N is batch_size, T_out is number of steps in the PML vocoder features trajectories, P is pml_dimension, and values are PML vocoder features. Only needed for training. gta: boolean flag that is set to True when ground truth alignment is required locked_alignments: when explicit attention alignment is required, the locked alignments are passed in this parameter and the attention alignments are locked to these values logs_enabled: boolean flag that defaults to True, if False no construction logs output ''' with tf.variable_scope('inference') as scope: is_training = pml_targets is not None batch_size = tf.shape(inputs)[0] hp = self._hparams # Embeddings embedding_table = tf.get_variable( 'embedding', [len(symbols), hp.embed_depth], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.5)) embedded_inputs = tf.nn.embedding_lookup( embedding_table, inputs) # [N, T_in, embed_depth=256] # Encoder prenet_outputs = prenet( embedded_inputs, is_training, hp.prenet_depths) # [N, T_in, prenet_depths[-1]=128] encoder_outputs = encoder_cbhg( prenet_outputs, input_lengths, is_training, # [N, T_in, encoder_depth=256] hp.encoder_depth) # Attention attention_cell = AttentionWrapper( GRUCell(hp.attention_depth), BahdanauAttention(hp.attention_depth, encoder_outputs), alignment_history=True, output_attention=False) # [N, T_in, attention_depth=256] # Apply prenet before concatenation in AttentionWrapper. attention_cell = DecoderPrenetWrapper(attention_cell, is_training, hp.prenet_depths) # Concatenate attention context vector and RNN cell output into a 2*attention_depth=512D vector. concat_cell = ConcatOutputAndAttentionWrapper( attention_cell) # [N, T_in, 2*attention_depth=512] # Decoder (layers specified bottom to top): decoder_cell = MultiRNNCell( [ OutputProjectionWrapper(concat_cell, hp.decoder_depth), ResidualWrapper(GRUCell(hp.decoder_depth)), ResidualWrapper(GRUCell(hp.decoder_depth)) ], state_is_tuple=True) # [N, T_in, decoder_depth=256] # Project onto r PML feature vectors (predict r outputs at each RNN step): output_cell = OutputProjectionWrapper( decoder_cell, hp.pml_dimension * hp.outputs_per_step) decoder_init_state = output_cell.zero_state(batch_size=batch_size, dtype=tf.float32) if is_training or gta: helper = TacoTrainingHelper(inputs, pml_targets, hp.pml_dimension, hp.outputs_per_step) else: helper = TacoTestHelper(batch_size, hp.pml_dimension, hp.outputs_per_step) (multi_decoder_outputs, _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode( BasicDecoder(output_cell, helper, decoder_init_state), maximum_iterations=hp.max_iters) # [N, T_out/r, P*r] # Reshape outputs to be one output per entry decoder_outputs = tf.reshape( multi_decoder_outputs, [batch_size, -1, hp.pml_dimension]) # [N, T_out, P] # Postnet: predicts a residual postnet_outputs = postnet(decoder_outputs, layers=hp.postnet_conv_layers, conv_width=hp.postnet_conv_width, channels=hp.postnet_conv_channels, is_training=is_training) pml_outputs = decoder_outputs + postnet_outputs # Grab alignments from the final decoder state: alignments = tf.transpose( final_decoder_state[0].alignment_history.stack(), [1, 2, 0]) self.inputs = inputs self.input_lengths = input_lengths self.pml_outputs = pml_outputs self.alignments = alignments self.pml_targets = pml_targets log('Initialized Tacotron model. Dimensions: ') log(' embedding: %d' % embedded_inputs.shape[-1]) log(' prenet out: %d' % prenet_outputs.shape[-1]) log(' encoder out: %d' % encoder_outputs.shape[-1]) log(' attention out: %d' % attention_cell.output_size) log(' concat attn & out: %d' % concat_cell.output_size) log(' decoder cell out: %d' % decoder_cell.output_size) log(' decoder out (%d frames): %d' % (hp.outputs_per_step, multi_decoder_outputs.shape[-1])) log(' decoder out (1 frame): %d' % pml_outputs.shape[-1])
def __init__(self, is_training, config, input_): self._is_training = is_training self._input = input_ self.batch_size = input_.batch_size self.num_steps = input_.num_steps self._input_data = input_.input_data size = config.X_dim hidden_size = config.hidden_size vocab_size = config.vocab_size self._targets = input_.targets # Construct prior prior = Prior(config.prior_pi, config.log_sigma1, config.log_sigma2) # Fetch embeddings inputs = input_.input_data # Build the BBB LSTM cells cells = [] for i in range(config.num_layers): if (i == 0): LSTM_input_size = config.X_dim else: LSTM_input_size = config.hidden_size cells.append(BayesianLSTMCell(LSTM_input_size, config.hidden_size, prior, is_training, forget_bias=0.0, name="bbb_lstm_{}".format(i))) cell = MultiRNNCell(cells, state_is_tuple=True) self._initial_state = cell.zero_state(config.batch_size, data_type()) state = self._initial_state # Forward pass for the truncated mini-batch outputs = [] with tf.variable_scope("RNN"): for time_step in range(self.num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_size]) # Softmax weights softmax_w = sample_posterior((hidden_size, vocab_size), "softmax_w", prior, is_training) softmax_b = sample_posterior((vocab_size, 1), "softmax_b", prior, is_training) logits = tf.nn.xw_plus_b(output, softmax_w, tf.squeeze(softmax_b)) logits = tf.reshape(logits, [self.batch_size, self.num_steps, vocab_size]) self._output = tf.nn.softmax(logits) loss = tf.contrib.seq2seq.sequence_loss( logits, input_.targets, tf.ones([self.batch_size, self.num_steps], dtype=data_type()), average_across_timesteps=False, average_across_batch=False) # Update the cost # Remember to divide by batch size self._cost = tf.reduce_sum(loss) / self.batch_size self._kl_loss = 0. self._final_state = state if not is_training: return #Compute KL divergence #B = number of batches aka the epoch size #C = number of truncated sequences in a batch aka batch_size variable B = self._input.epoch_size C = self.batch_size kl_loss = tf.add_n(tf.get_collection("KL_layers"), "kl_divergence") kl_factor = 1.0/(B*C) self._kl_loss = kl_factor * kl_loss self._total_loss = self._cost + self._kl_loss self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self._total_loss, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self._lr) self._train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.contrib.framework.get_or_create_global_step()) self._new_lr = tf.placeholder(data_type(), shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr)
def __init__(self, inp, inp_mask, decode_time_steps, ctr_flag, ctr_attention, hyper_params=None, name='Tacotron'): """ Build the computational graph. :param inp: :param inp_mask: :param decode_time_steps: :param hyper_params: :param name: """ super(Tacotron, self).__init__(name) self.hyper_params = HyperParams() if hyper_params is None else hyper_params with tf.variable_scope(name): self.global_step = tf.Variable(0, name='global_step', trainable=False) batch_size = tf.shape(inp)[0] input_time_steps = tf.shape(inp)[1] reduc = self.hyper_params.reduction_rate output_time_steps = decode_time_steps * reduc ### Encoder [begin] with tf.variable_scope('character_embedding'): embed_inp = EmbeddingLayer(self.hyper_params.embed_class, self.hyper_params.embed_dim)(inp) with tf.variable_scope("changeToVarible"): self.single_style_token = tf.get_variable('style_token', (1, self.hyper_params.styles_kind, self.hyper_params.style_dim), dtype=tf.float32) self.style_token = tf.tile(self.single_style_token, (batch_size, 1, 1)) with tf.variable_scope('encoder_pre_net'): pre_ed_inp = tf.layers.dropout(tf.layers.dense(embed_inp, 256, tf.nn.relu), training=False) pre_ed_inp = tf.layers.dropout(tf.layers.dense(pre_ed_inp, 128, tf.nn.relu), training=False) encoder_output = modules.cbhg(pre_ed_inp, training=False, k=16, bank_filters=128, projection_filters=(128, 128), highway_layers=4, highway_units=128, bi_gru_units=128, sequence_length=inp_mask, name='encoder_cbhg', reuse=False) with tf.variable_scope('post_text'): all_outputs, _ = tf.nn.dynamic_rnn(cell=GRUCell(256), inputs=encoder_output, sequence_length=inp_mask, dtype=encoder_output.dtype, parallel_iterations=unkonwn_parallel_iterations) all_outputs = tf.transpose(all_outputs, [1, 0, 2]) static_encoder_output = all_outputs[-1] ### Encoder [end] ### Attention Module with tf.variable_scope('attention'): att_module = AttentionModule(256, encoder_output, sequence_length=inp_mask, time_major=False) with tf.variable_scope("attention_style"): att_module_style = AttentionModule(256, self.style_token, time_major=False) ### Decoder [begin] att_cell = GRUCell(256) dec_cell = MultiRNNCell([ResidualWrapper(GRUCell(256)) for _ in range(2)]) # prepare output alpha TensorArray with tf.variable_scope('prepare_decode'): # prepare output alpha TensorArray reduced_time_steps = tf.div(output_time_steps, reduc) init_att_cell_state = att_cell.zero_state(batch_size, tf.float32) init_dec_cell_state = dec_cell.zero_state(batch_size, tf.float32) init_state_tup = tuple([init_att_cell_state, init_dec_cell_state]) init_output_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) init_alpha_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) init_weight_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) init_weight_per_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) init_alpha_style_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) go_array = tf.zeros([batch_size, self.hyper_params.seq2seq_dim], dtype=tf.float32) init_context = tf.zeros([batch_size, 256], dtype=tf.float32) init_context_style = tf.zeros([batch_size, 256], dtype=tf.float32) init_time = tf.constant(0, dtype=tf.int32) cond = lambda x, *_: tf.less(x, reduced_time_steps) def body(this_time, old_output_ta, old_alpha_ta, old_alpha_style_ta, old_weight_ta, old_weight_per_ta, old_state_tup, last_context, last_context_style, last_output): with tf.variable_scope('decoder_pre_net'): dec_pre_ed_inp = last_output dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(dec_pre_ed_inp, 256, tf.nn.relu), training=False) dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(dec_pre_ed_inp, 128, tf.nn.relu), training=False) with tf.variable_scope('attention_rnn'): # dec_pre_ed_inp = tf.Print(dec_pre_ed_inp, [dec_pre_ed_inp[0]], message='dec', summarize=10) att_cell_inp = tf.concat([last_context, dec_pre_ed_inp], axis=-1) att_cell_out, att_cell_state = att_cell(att_cell_inp, old_state_tup[0]) with tf.variable_scope('attention'): query = att_cell_state[0] context, alpha = att_module(query) new_alpha_ta = old_alpha_ta.write(this_time, alpha) with tf.variable_scope("attention_style"): query_style = att_cell_state[0] context_style, alpha_style = att_module_style(query_style) alpha_style = tf.cond(tf.equal(ctr_flag, 1), lambda: ctr_attention, lambda: alpha_style) alpha_style = tf.Print(alpha_style, [alpha_style], message='alpha:', summarize=10) context_style = tf.cond(tf.equal(ctr_flag, 1), lambda: tf.reduce_sum(tf.expand_dims(alpha_style, axis=-1) * self.style_token, axis=1), lambda: context_style) context_style = tf.Print(context_style, [context_style], message='style:', summarize=10) # alpha_style = ctr_attention # alpha_style = tf.Print(alpha_style, [alpha_style], message='alpha', summarize=20) # context_style = tf.reduce_sum(tf.expand_dims(alpha_style, axis=-1) * self.style_token, axis=1) # context_style = tf.Print(context_style, [context_style], message='ctxt_style', summarize=20) new_alpha_style_ta = old_alpha_style_ta.write(this_time, alpha_style) with tf.variable_scope("weighting"): weight_input = tf.concat([static_encoder_output, dec_pre_ed_inp], axis=-1) weighting = tf.layers.dense(weight_input, 2, tf.nn.sigmoid) # weighting = tf.Print(weighting, [weighting[1]], message='weighting') weighting = tf.nn.softmax(weighting) weight_text, weight_style = tf.split(weighting, [1, 1], -1) # weight_text = tf.Print(weight_text, [weight_text], message='weight_text:', summarize=20) weight_style = tf.Print(weight_style, [weight_style], message='weight_style:') new_weight_ta = old_weight_ta.write(this_time, weight_text) with tf.variable_scope('decoder_rnn'): weighting_context = weight_text * context + weight_style * context_style weight_per = tf.reduce_mean(tf.abs(weight_style * context_style) / ( tf.abs(weight_text * context) + tf.abs(weight_style * context_style))) new_weight_per_ta = old_weight_per_ta.write(this_time, weight_per) dec_input = tf.layers.dense(tf.concat([att_cell_out, weighting_context], axis=-1), 256) # dec_input = tf.layers.dense(tf.concat([att_cell_out, context], axis=-1), 256) dec_cell_out, dec_cell_state = dec_cell(dec_input, old_state_tup[1]) dense_out = tf.layers.dense(dec_cell_out, self.hyper_params.seq2seq_dim * reduc) new_output_ta = old_output_ta.write(this_time, dense_out) new_output = dense_out[:, -self.hyper_params.seq2seq_dim:] new_state_tup = tuple([att_cell_state, dec_cell_state]) return tf.add(this_time, 1), new_output_ta, new_alpha_ta, new_alpha_style_ta, new_weight_ta,\ new_weight_per_ta, new_state_tup, context, context_style, new_output # run loop _, seq2seq_output_ta, alpha_ta, alpha_style_ta, weight_ta, weight_per_ta, *_ = tf.while_loop(cond, body, [init_time, init_output_ta, init_alpha_ta, init_alpha_style_ta, init_weight_ta, init_weight_per_ta, init_state_tup, init_context, init_context_style, go_array ]) with tf.variable_scope('reshape_decode'): seq2seq_output = tf.reshape(seq2seq_output_ta.stack(), shape=(reduced_time_steps, batch_size, self.hyper_params.seq2seq_dim * reduc)) seq2seq_output = tf.reshape(tf.transpose(seq2seq_output, perm=(1, 0, 2)), shape=(batch_size, output_time_steps, self.hyper_params.seq2seq_dim)) self.seq2seq_output = seq2seq_output alpha_output = tf.reshape(alpha_ta.stack(), shape=(reduced_time_steps, batch_size, input_time_steps)) alpha_output = tf.expand_dims(tf.transpose(alpha_output, perm=(1, 0, 2)), -1) self.alpha_output = alpha_output alpha_output_style = tf.reshape(alpha_style_ta.stack(), shape=(reduced_time_steps, batch_size, self.hyper_params.styles_kind)) alpha_output_style = tf.expand_dims(tf.transpose(alpha_output_style, perm=(1, 0, 2)), -1) # batch major self.alpha_output_style = alpha_output_style weight_ta = tf.reshape(weight_ta.stack(), shape=(reduced_time_steps, batch_size, 1)) weight_ta = tf.transpose(weight_ta, perm=(1, 0, 2)) self.weight_ta = weight_ta weight_per_ta = tf.reshape(weight_per_ta.stack(), shape=(reduced_time_steps, 1)) self.weight_per_ta = weight_per_ta ### Decoder [end] ### PostNet [begin] post_output = modules.cbhg(seq2seq_output, training=False, k=8, bank_filters=128, projection_filters=(256, self.hyper_params.seq2seq_dim), highway_layers=4, highway_units=128, bi_gru_units=128, sequence_length=None, name='decoder_cbhg', reuse=False) post_output = tf.layers.dense(post_output, self.hyper_params.post_dim, name='post_linear_transform') self.post_output = post_output
def add_model(self, inputs, type_layer): '''Construction of the RNN model with LSTM cells. Arguments: - type_layer: should be 'Context' or 'Questions' ''' with tf.variable_scope( 'Hidden-Layers', initializer=tf.contrib.layers.xavier_initializer()) as scope: reuse = type_layer == "Questions" initializer = tf.random_uniform_initializer(-1, 1) if self.config.nb_hidden_layers > 1: if self.config.type_cell == "LSTM": cell_fw = MultiRNNCell([ LSTMCell(self.config.hidden_size, initializer=initializer, reuse=reuse) for _ in range(self.config.nb_hidden_layers) ]) if self.config.bidirectional: cell_bw = MultiRNNCell([ LSTMCell(self.config.hidden_size, initializer=initializer, reuse=reuse) for _ in range(self.config.nb_hidden_layers) ]) elif self.config.type_cell == "GRU": cell_fw = MultiRNNCell([ GRUCell(self.config.hidden_size, kernel_initializer=initializer, reuse=reuse) for _ in range(self.config.nb_hidden_layers) ]) if self.config.bidirectional: cell_bw = MultiRNNCell([ GRUCell(self.config.hidden_size, kernel_initializer=initializer, reuse=reuse) for _ in range(self.config.nb_hidden_layers) ]) else: raise NotImplementedError else: if self.config.type_cell == "LSTM": cell_fw = LSTMCell(self.config.hidden_size, initializer=initializer, reuse=reuse) if self.config.bidirectional: cell_bw = LSTMCell(self.config.hidden_size, initializer=initializer, reuse=reuse) elif self.config.type_cell == "GRU": cell_fw = GRUCell(self.config.hidden_size, kernel_initializer=initializer, reuse=reuse) if self.config.bidirectional: cell_bw = GRUCell(self.config.hidden_size, kernel_initializer=initializer, reuse=reuse) else: raise NotImplementedError if type_layer == "Context": batch_size = self.config.len_context sequence_length = self.context_len_placeholder elif type_layer == "Questions": batch_size = self.config.len_questions sequence_length = self.questions_len_placeholder cell_fw = DropoutWrapper(cell_fw, output_keep_prob=self.dropout_placeholder) initial_state_fw = cell_fw.zero_state(batch_size, tf.float32) if self.config.bidirectional: cell_bw = DropoutWrapper( cell_bw, output_keep_prob=self.dropout_placeholder) initial_state_bw = cell_bw.zero_state(batch_size, tf.float32) outputs, hidden_states = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, inputs, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, sequence_length=sequence_length) else: outputs, hidden_states = tf.nn.dynamic_rnn( cell_fw, inputs, initial_state=initial_state_fw, sequence_length=sequence_length) if self.config.output_type == "output": output = tf.transpose(outputs, [1, 0, 2]) output = tf.gather(output, self.config.len_questions - 1) elif self.config.output_type == "hs": if self.config.hidden_bidirectional: output = (hidden_states[0], hidden_states[1]) if self.config.nb_hidden_layers > 1: output = (output[0][-1], output[1][-1]) if self.config.type_cell == "LSTM": output = (output[0].h, output[1].h) else: output = hidden_states if self.config.nb_hidden_layers > 1: output = output[-1] if self.config.type_cell == "LSTM": output = output.h return output
def __init__(self, data_size, time_len, unit_size, num_layers, batch_size, learning_rate, feed_previous): ''' Create the basic encoder-decoder seq2seq model :param unit_size: number of units in each LSTM layer of the model :param num_layers: number of LSTM layers in the model :param batch_size: the size of batches used during training :param learning_rate: ''' self.input_size = data_size self.time_len = time_len self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name='lr') self.global_step = tf.Variable(0, trainable=False, name='global_step') def single_cell(): return BasicLSTMCell(unit_size) cell = single_cell() if num_layers > 1: cell = MultiRNNCell([single_cell() for _ in range(num_layers)]) print('state size', cell.state_size) print('zero state size', cell.zero_state(self.batch_size, dtype=tf.float32)) # Set placeholder for encoder's inputs self.encoder_inputs = [] self.decoder_inputs = [] for i in range(self.time_len): self.encoder_inputs.append( tf.placeholder(shape=[self.batch_size, self.input_size], name='encoder{}'.format(i), dtype=tf.float32)) self.decoder_inputs.append( tf.placeholder(shape=[self.batch_size, self.input_size], name='decoder{}'.format(i), dtype=tf.float32)) # The purpose is reconstruction, thus the targets should be the reverse of the input targets = self.encoder_inputs[::-1] outputs, _ = advanced_rnn_seq2seq( encoder_inputs=self.encoder_inputs, decoder_inputs=self.decoder_inputs, cell=cell, num_decoder_symbols=self.input_size, output_projection=None, feed_previous=feed_previous ) # the outputs have been projected based on the original lstm outputs targets = tf.stack(targets, axis=1) self.outputs = tf.stack(outputs, axis=1) self.loss = tf.losses.mean_squared_error(targets, self.outputs) self.error_vector = tf.abs(self.outputs - targets) # set up the train operation optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) self.train_op = optimizer.minimize(self.loss, global_step=self.global_step) # the saver for handling all parameters for the model self.saver = tf.train.Saver(tf.global_variables())
with tf.name_scope('decode_input'): decode_input = [tf.zeros_like(encode_input[0], dtype=int_type, name="GO")] + labels[:-1] with tf.name_scope('dropout'): keep_prob = tf.placeholder("float", name='keep_prob') # In[5]: cells = [ DropoutWrapper( BasicLSTMCell(num_hidden), output_keep_prob=keep_prob_val ) for i in range(num_layers) ] stacked_lstm = MultiRNNCell(cells) with tf.variable_scope("decoders") as scope: decode_outputs, decode_state = seq2seq.embedding_attention_seq2seq(encode_input, decode_input, stacked_lstm, vocab_size, vocab_size, num_hidden, dtype=float_type) scope.reuse_variables() decode_outputs_test, decode_state_test = seq2seq.embedding_attention_seq2seq(encode_input, decode_input, stacked_lstm, vocab_size, vocab_size, num_hidden, dtype=float_type, feed_previous=True) # In[6]: with tf.name_scope('loss'): loss_weights = [tf.ones_like(l, dtype=float_type) for l in labels] loss = seq2seq.sequence_loss(decode_outputs, labels, loss_weights, vocab_size)
def initialize(self, inputs, input_lengths, mel_targets=None, linear_targets=None): '''Initializes the model for inference. Sets "mel_outputs", "linear_outputs", and "alignments" fields. Args: inputs: int32 Tensor with shape [N, T_in] where N is batch size, T_in is number of steps in the input time series, and values are character IDs input_lengths: int32 Tensor with shape [N] where N is batch size and values are the lengths of each sequence in inputs. mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number of steps in the output time series, M is num_mels, and values are entries in the mel spectrogram. Only needed for training. linear_targets: float32 Tensor with shape [N, T_out, F] where N is batch_size, T_out is number of steps in the output time series, F is num_freq, and values are entries in the linear spectrogram. Only needed for training. ''' with tf.variable_scope('inference') as scope: is_training = linear_targets is not None batch_size = tf.shape(inputs)[0] hp = self._hparams # Embeddings symbols_length = 149 # BASED ON PREVIOUS LENGTH OF LIST embedding_table = tf.get_variable( 'embedding', [symbols_length, hp.embed_depth], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.5)) embedded_inputs = tf.nn.embedding_lookup( embedding_table, inputs) # [N, T_in, embed_depth=256] # Encoder prenet_outputs = prenet( embedded_inputs, is_training, hp.prenet_depths) # [N, T_in, prenet_depths[-1]=128] encoder_outputs = encoder_cbhg( prenet_outputs, input_lengths, is_training, # [N, T_in, encoder_depth=256] hp.encoder_depth) # Attention attention_cell = AttentionWrapper( GRUCell(hp.attention_depth), BahdanauAttention(hp.attention_depth, encoder_outputs), alignment_history=True, output_attention=False) # [N, T_in, attention_depth=256] # Apply prenet before concatenation in AttentionWrapper. attention_cell = DecoderPrenetWrapper(attention_cell, is_training, hp.prenet_depths) # Concatenate attention context vector and RNN cell output into a 2*attention_depth=512D vector. concat_cell = ConcatOutputAndAttentionWrapper( attention_cell) # [N, T_in, 2*attention_depth=512] # Decoder (layers specified bottom to top): decoder_cell = MultiRNNCell( [ OutputProjectionWrapper(concat_cell, hp.decoder_depth), ResidualWrapper(GRUCell(hp.decoder_depth)), ResidualWrapper(GRUCell(hp.decoder_depth)) ], state_is_tuple=True) # [N, T_in, decoder_depth=256] # Project onto r mel spectrograms (predict r outputs at each RNN step): output_cell = OutputProjectionWrapper( decoder_cell, hp.num_mels * hp.outputs_per_step) decoder_init_state = output_cell.zero_state(batch_size=batch_size, dtype=tf.float32) helper = TacoTestHelper(batch_size, hp.num_mels, hp.outputs_per_step) (decoder_outputs, _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode( BasicDecoder(output_cell, helper, decoder_init_state), maximum_iterations=hp.max_iters) # [N, T_out/r, M*r] # Reshape outputs to be one output per entry mel_outputs = tf.reshape( decoder_outputs, [batch_size, -1, hp.num_mels]) # [N, T_out, M] # Add post-processing CBHG: post_outputs = post_cbhg( mel_outputs, hp.num_mels, is_training, # [N, T_out, postnet_depth=256] hp.postnet_depth) linear_outputs = tf.layers.dense(post_outputs, hp.num_freq) # [N, T_out, F] # Grab alignments from the final decoder state: alignments = tf.transpose( final_decoder_state[0].alignment_history.stack(), [1, 2, 0]) self.inputs = inputs self.input_lengths = input_lengths self.mel_outputs = mel_outputs self.linear_outputs = linear_outputs self.alignments = alignments self.mel_targets = mel_targets self.linear_targets = linear_targets
def initialize(self, inputs, input_lengths, mel_targets=None, linear_targets=None): '''Initializes the model for inference. Sets "mel_outputs", "linear_outputs", and "alignments" fields. Args: inputs: int32 Tensor with shape [N, T_in] where N is batch size, T_in is number of steps in the input time series, and values are character IDs input_lengths: int32 Tensor with shape [N] where N is batch size and values are the lengths of each sequence in inputs. mel_targets: float32 Tensor with shape [N, T_out, M] where N is batch size, T_out is number of steps in the output time series, M is num_mels, and values are entries in the mel spectrogram. Only needed for training. linear_targets: float32 Tensor with shape [N, T_out, F] where N is batch_size, T_out is number of steps in the output time series, F is num_freq, and values are entries in the linear spectrogram. Only needed for training. ''' with tf.variable_scope('inference') as scope: is_training = linear_targets is not None batch_size = tf.shape(inputs)[0] hp = self._hparams # Embeddings # embedding_table = tf.get_variable( # 'embedding', [len(symbols), 256], dtype=tf.float32, # initializer=tf.truncated_normal_initializer(stddev=0.5)) # embedded_inputs = tf.nn.embedding_lookup(embedding_table, inputs) # [N, T_in, 256] # embedded_inputs = inputs # Encoder # n_fft = (self._hparams.num_src_freq - 1) * 2 # in_layer_size = n_fft in_layer_size = self._hparams.num_src_freq prenet_outputs = prenet(inputs, is_training, layer_sizes=[in_layer_size, 128]) # [N, T_in, 128] encoder_outputs = encoder_cbhg(prenet_outputs, input_lengths, is_training) # [N, T_in, 256] # Attention attention_cell = AttentionWrapper( DecoderPrenetWrapper(GRUCell(256), is_training), BahdanauAttention(256, encoder_outputs), alignment_history=True, output_attention=False) # [N, T_in, 256] # Concatenate attention context vector and RNN cell output into a 512D vector. concat_cell = ConcatOutputAndAttentionWrapper( attention_cell) # [N, T_in, 512] # Decoder (layers specified bottom to top): decoder_cell = MultiRNNCell([ OutputProjectionWrapper(concat_cell, 256), ResidualWrapper(GRUCell(256)), ResidualWrapper(GRUCell(256)) ], state_is_tuple=True) # [N, T_in, 256] # Project onto r mel spectrograms (predict r outputs at each RNN step): output_cell = OutputProjectionWrapper( decoder_cell, hp.num_mels * hp.outputs_per_step) decoder_init_state = output_cell.zero_state(batch_size=batch_size, dtype=tf.float32) if is_training: helper = TacoTrainingHelper(inputs, mel_targets, hp.num_mels, hp.outputs_per_step) else: helper = TacoTestHelper(batch_size, hp.num_mels, hp.outputs_per_step) (decoder_outputs, _), final_decoder_state, _ = tf.contrib.seq2seq.dynamic_decode( BasicDecoder(output_cell, helper, decoder_init_state), maximum_iterations=hp.max_iters) # [N, T_out/r, M*r] # Reshape outputs to be one output per entry mel_outputs = tf.reshape( decoder_outputs, [batch_size, -1, hp.num_mels]) # [N, T_out, M] # Add post-processing CBHG: post_outputs = post_cbhg(mel_outputs, hp.num_mels, is_training) # [N, T_out, 256] linear_outputs = tf.layers.dense(post_outputs, hp.num_freq) # [N, T_out, F] # Grab alignments from the final decoder state: alignments = tf.transpose( final_decoder_state[0].alignment_history.stack(), [1, 2, 0]) self.inputs = inputs self.input_lengths = input_lengths self.mel_outputs = mel_outputs self.linear_outputs = linear_outputs self.alignments = alignments self.mel_targets = mel_targets self.linear_targets = linear_targets log('Initialized Tacotron model. Dimensions: ') log(' input: %d' % inputs.shape[-1]) log(' prenet out: %d' % prenet_outputs.shape[-1]) log(' encoder out: %d' % encoder_outputs.shape[-1]) log(' attention out: %d' % attention_cell.output_size) log(' concat attn & out: %d' % concat_cell.output_size) log(' decoder cell out: %d' % decoder_cell.output_size) log(' decoder out (%d frames): %d' % (hp.outputs_per_step, decoder_outputs.shape[-1])) log(' decoder out (1 frame): %d' % mel_outputs.shape[-1]) log(' postnet out: %d' % post_outputs.shape[-1]) log(' linear out: %d' % linear_outputs.shape[-1])
return DropoutWrapper(make_rnn_cell(), input_keep_prob=keep_prob) X = tf.placeholder(tf.float32, [None, n_steps, n_inputs]) y = tf.placeholder(tf.float32, [None, n_steps, n_outputs]) # 现在在每个时间迭代,有一个大小为100的输出向量,但是实际上我们需要一个单独的输出值。 # 最简单的解决方案是将单元格包装在OutputProjectionWrapper中。 # cell = OutputProjectionWrapper(BasicRNNCell(num_units=n_neurous, activation=tf.nn.relu), output_size=n_outputs) # 用技巧提高速度 layers = [make_rnn_cell() for _ in range(n_layers)] if is_training: layers = [make_drop_cell() for _ in range(n_layers)] multi_layer_cell = MultiRNNCell(layers) rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32) stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurous]) stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None) outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs]) loss = tf.reduce_mean(tf.square(outputs - y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(loss) init = tf.global_variables_initializer() X_data = np.linspace(0, 15, 101) ''' # 应用丢弃机制
def _LSTMCells(unit_list, act_fn_list): return MultiRNNCell([ LSTMCell(unit, activation=act_fn) for unit, act_fn in zip(unit_list, act_fn_list) ])