def zglobal_encoder(label_input,zsent_sample,seq_len, batch_size): """ Pre-stochastic layer encoder for z1 (latent segment variable) Args: x(tf.Tensor): tensor of shape (bs, T, F) z2(tf.Tensor): tensor of shape (bs, D1) rhus(list): list of numbers of LSTM layer hidden units Return: out(tf.Tensor): concatenation of hidden states of all LSTM layers """ # prepare input # print("------------",label_input.shape) # print("********",zsent_sample.shape) # # zsent_sample=[zsent_sample]*(tf.shape(label_input)[1]) # z_dash=tf.tile(zsent_sample,[tf.shape(label_input.shape)[1],1]) # z_dash=tf.split(z_dash,tf.shape(label_input.shape)[1], axis=0) # zsent_sample_1=tf.stack(z_dash,axis=0) # # for i in range(int(label_input.shape[1])): # # zsent_sample_1=tf.stack(zsent_sample,axis=1) # l_zsent = tf.concat([label_input,zsent_sample_1],axis=-1) ##MIGHT NEED MODIFICATION # print(l_zsent.shape) # encoder_input=l_zsent bs, T = tf.shape(label_input)[0], tf.shape(label_input)[1] zsent_sample = tf.tile(tf.expand_dims(zsent_sample, 1), (1, T, 1)) x_z2 = tf.concat([label_input, zsent_sample], axis=-1) encoder_input=x_z2 if params.base_cell == 'lstm': base_cell = tf.contrib.rnn.LSTMCell elif params.base_cell == 'rnn': base_cell = tf.contrib.rnn.RNNCell else: base_cell = tf.contrib.rnn.GRUCell cell = model.make_rnn_cell([params.encoder_hidden for _ in range( params.decoder_rnn_layers)], base_cell=base_cell) #cell2=model.make_rnn_cell([params.decoder_hidden for _ in range(params.decoder_rnn_layers)], base_cell=base_cell) initial = cell.zero_state(batch_size, dtype=tf.float64) #initial2=cell.zero_state(batch_size, dtype=tf.float64) if params.keep_rate < 1: encoder_input = tf.nn.dropout(encoder_input, params.keep_rate) #encoder_input2=tf.nn.dropout(encoder_input2, params.keep_rate) outputs, final_state = tf.nn.dynamic_rnn(cell, inputs=encoder_input, sequence_length=seq_len, initial_state=initial, swap_memory=True, dtype=tf.float64, scope="zglobal_encoder_rnn") final_state = tf.concat(final_state[0], 1) return final_state
def q_net(x, seq_len, batch_size=params.batch_size): with zs.BayesianNet() as encoder: # construct lstm # cell = tf.nn.rnn_cell.BasicLSTMCell(params.cell_hidden_size) # cells = tf.nn.rnn_cell.MultiRNNCell([cell]*params.rnn_layers) cell = model.make_rnn_cell( [params.decoder_hidden for _ in range(params.decoder_rnn_layers)], base_cell=params.base_cell) initial = cell.zero_state(batch_size, dtype=tf.float32) print(int) if params.keep_rate < 1: x = tf.nn.dropout(x, params.keep_rate) s_l = tf.shape(x)[1] # Higway network [S.Sementiuta et.al] for i in range(params.highway_lc): with tf.variable_scope("hw_layer_enc{0}".format(i)) as scope: if i == 0: # first, input layer x = tf.reshape(x, [-1, params.embed_size]) prev_y = tf.layers.dense(x, params.highway_ls, tf.nn.relu) elif i == params.highway_lc - 1: # last, output layer encoder_input = tf.layers.dense(prev_y, params.embed_size) encoder_input = tf.reshape( encoder_input, [params.batch_size, s_l, params.embed_size]) else: # hidden layers print(i) prev_y = model.highway_network(prev_y, params.highway_ls) outputs, final_state = tf.nn.dynamic_rnn(cell, inputs=encoder_input, sequence_length=seq_len, initial_state=initial, swap_memory=True, dtype=tf.float32) final_state = tf.concat(final_state[0], 1) lz_mean = tf.layers.dense(inputs=final_state, units=params.latent_size, activation=None) lz_logstd = tf.layers.dense(inputs=final_state, units=params.latent_size, activation=None) # define latent variable`s Stochastic Tensor z = zs.Normal('z', lz_mean, lz_logstd, group_event_ndims=1) tf.summary.histogram('latent_space', z) return z
def q_net(encoder_input, seq_len, batch_size): with zs.BayesianNet() as encoder: # construct lstm # cell = tf.nn.rnn_cell.BasicLSTMCell(params.cell_hidden_size) # cells = tf.nn.rnn_cell.MultiRNNCell([cell]*params.rnn_layers) if params.base_cell == 'lstm': base_cell = tf.contrib.rnn.LSTMCell else: base_cell = tf.contrib.rnn.GRUCell cell = model.make_rnn_cell([params.decoder_hidden for _ in range( params.decoder_rnn_layers)], base_cell=base_cell) initial = cell.zero_state(batch_size, dtype=tf.float32) if params.keep_rate < 1: encoder_input = tf.nn.dropout(encoder_input, params.keep_rate) outputs, final_state = tf.nn.dynamic_rnn(cell, inputs=encoder_input, sequence_length=seq_len, initial_state=initial, swap_memory=True, dtype=tf.float32) final_state = tf.concat(final_state[0], 1) if params.encode == 'hw': # Higway network [S.Sementiuta et.al] for i in range(params.highway_lc): with tf.variable_scope("hw_layer_enc{0}".format(i)) as scope: if i == 0: # first, input layer prev_y = tf.layers.dense(final_state, params.highway_ls) elif i == params.highway_lc - 1: # last, output layer final_state = tf.layers.dense(prev_y, params.latent_size * 2) else: # hidden layers prev_y = model.highway_network(prev_y, params.highway_ls) lz_mean, lz_logstd = tf.split(final_state, 2, axis=1) elif params.encode == 'mlp': lz_mean = tf.layers.dense(inputs=final_state, units=params.latent_size) lz_logstd = tf.layers.dense(inputs=final_state, units=params.latent_size) # define latent variable`s Stochastic Tensor z = zs.Normal('z', lz_mean, lz_logstd, group_event_ndims=1) tf.summary.histogram('latent_space', z) return z
def zsent_encoder(encoder_input, seq_len, batch_size): """ Pre-stochastic layer encoder for z2 (latent sequence variable) Args: x(tf.Tensor): tensor of shape (bs, T, F) rhus(list): list of numbers of LSTM layer hidden units Return: out(tf.Tensor): concatenation of hidden states of all LSTM layers """ # construct lstm # cell = tf.nn.rnn_cell.BasicLSTMCell(params.cell_hidden_size) # cells = tf.nn.rnn_cell.MultiRNNCell([cell]*params.rnn_layers) if params.base_cell == 'lstm': base_cell = tf.contrib.rnn.LSTMCell elif params.base_cell == 'rnn': base_cell = tf.contrib.rnn.RNNCell else: base_cell = tf.contrib.rnn.GRUCell cell = model.make_rnn_cell([params.encoder_hidden for _ in range(params.decoder_rnn_layers)], base_cell=base_cell) #cell2=model.make_rnn_cell([params.decoder_hidden for _ in range(params.decoder_rnn_layers)], base_cell=base_cell) initial = cell.zero_state(batch_size, dtype=tf.float64) #initial2=cell.zero_state(batch_size, dtype=tf.float64) if params.keep_rate < 1: encoder_input = tf.nn.dropout(encoder_input, params.keep_rate) #encoder_input2=tf.nn.dropout(encoder_input2, params.keep_rate) # print(encoder_input.shape) outputs, final_state = tf.nn.dynamic_rnn(cell, inputs=encoder_input, sequence_length=seq_len, initial_state=initial, swap_memory=True, dtype=tf.float64, scope="zsent_encoder_rnn") final_state = tf.concat(final_state[0], 1) return final_state
'./PTB_DATA/data') data, labels_arr, _, data_dict = data_.prepare_data( train_data_raw, params_c) with tf.Graph().as_default() as graph: inputs = tf.placeholder(shape=[None, None], dtype=tf.int32) with tf.device("/cpu:0"): embedding = tf.get_variable( "embedding", [data_dict.vocab_size, params['embed_size']], dtype=tf.float32) vect_inputs = tf.nn.embedding_lookup(embedding, inputs) # inputs = tf.unstack(inputs, num=num_steps, axis=1) keep_rate = tf.placeholder(tf.float32) if params['mode_train'] and params['keep_rate'] < 1: vect_inputs = tf.nn.dropout(vect_inputs, keep_rate) labels = tf.placeholder(shape=[None, None], dtype=tf.int64) cell = model.make_rnn_cell([params['num_hidden']]*params['num_layers'], base_cell=tf.contrib.rnn.GRUCell) initial_state = tf.placeholder_with_default(input=cell.zero_state(tf.shape(vect_inputs)[0], dtype=tf.float32), shape=[None, None, params['num_hidden']]) zs = cell.zero_state(params['batch_size'], dtype=tf.float32) length = tf.placeholder(shape=[None], dtype=tf.float32) ins = tf.reshape(initial_state, [-1, params['num_hidden']]) # TODO: find a way how to initialize 2-layers network outputs, final_state = tf.nn.dynamic_rnn(cell, inputs=vect_inputs, sequence_length=length, initial_state=(ins, )*params['num_layers'], swap_memory=False, dtype=tf.float32) fc_layer = tf.layers.dense(inputs=outputs, units=data_dict.vocab_size, activation=None) prnt = tf.Print(fc_layer, [tf.shape(final_state), tf.shape(zs)]) # define optimization with lr decay, lr decay can be use with SGD oprtimizer global_step = tf.Variable(0, trainable=False) # learning_rate = tf.train.exponential_decay(params['learning_rate'], global_step, 500, 0.96)
def vae_lstm(observed, batch_size, d_seq_l, embed, d_inputs, vocab_size, gen_mode=False): with zs.BayesianNet(observed=observed) as decoder: # prepare input z_mean = tf.zeros([batch_size, params.latent_size]) z = zs.Normal('z', mean=z_mean, std=0.1, group_event_ndims=0) tf.summary.histogram('z|x', z) # z = [bath_size, l_s] -> [batch_size, seq_len, l_s] with tf.device("/cpu:0"): dec_inps = tf.nn.embedding_lookup(embed, d_inputs) # turn off dropout for generation: if params.dec_keep_rate < 1 and not gen_mode: dec_inps = tf.nn.dropout(dec_inps, params.dec_keep_rate) max_sl = tf.shape(dec_inps)[1] # define cell if params.base_cell == 'lstm': base_cell = tf.contrib.rnn.LSTMCell else: # not working for now base_cell = tf.contrib.rnn.GRUCell cell = model.make_rnn_cell([ params.decoder_hidden for _ in range( params.decoder_rnn_layers)], base_cell=base_cell) if params.decode == 'hw': # Higway network [S.Sementiuta et.al] for i in range(params.highway_lc): with tf.variable_scope("hw_layer_dec{0}".format(i)) as scope: if i == 0: # first, input layer prev_y = tf.layers.dense(z, params.decoder_hidden * 2) elif i == params.highway_lc - 1: # last, output layer z_dec = tf.layers.dense(prev_y, params.decoder_hidden * 2) else: # hidden layers prev_y = model.highway_network(prev_y, params.highway_ls) inp_h, inp_c = tf.split(z_dec, 2, axis=1) initial_state = rnn_placeholders( (tf.contrib.rnn.LSTMStateTuple(inp_c, inp_h), )) elif params.decode == 'concat': z_out = tf.reshape( tf.tile(tf.expand_dims(z, 1), (1, max_sl, 1)), [batch_size, -1, params.latent_size]) dec_inps = tf.concat([dec_inps, z_out], 2) initial_state = rnn_placeholders( cell.zero_state(tf.shape(dec_inps)[0], tf.float32)) elif params.decode == 'mlp': # z->decoder initial state w1 = tf.get_variable('whl', [params.latent_size, params.highway_ls], tf.float32, initializer=tf.truncated_normal_initializer()) b1 = tf.get_variable('bhl', [params.highway_ls], tf.float32, initializer=tf.ones_initializer()) z_dec = tf.matmul(z, w1) + b1 inp_h, inp_c = tf.split(tf.layers.dense(z_dec, params.decoder_hidden * 2), 2, axis=1) initial_state = rnn_placeholders( (tf.contrib.rnn.LSTMStateTuple(inp_c, inp_h), )) outputs, final_state = tf.nn.dynamic_rnn(cell, inputs=dec_inps, sequence_length=d_seq_l, initial_state=initial_state, swap_memory=True, dtype=tf.float32) # define decoder network if gen_mode: # only interested in the last output outputs = outputs[:, -1, :] outputs_r = tf.reshape(outputs, [-1, params.decoder_hidden]) x_logits = tf.layers.dense(outputs_r, units=vocab_size, activation=None) if params.beam_search: sample = tf.nn.softmax(x_logits) else: sample = tf.multinomial(x_logits / params.temperature, 1)[0][0] return x_logits, (initial_state, final_state), sample
def lstm_decoder_words(z_in, d_inputs, label_logits, d_seq_l, batch_size, embed, vocab_size, gen_mode=False, zsent=None, scope=None): with tf.variable_scope(scope, "decoder") as sc: with tf.device("/cpu:0"): dec_inps = tf.nn.embedding_lookup(embed, d_inputs) # turn off dropout for generation: if params.dec_keep_rate < 1 and not gen_mode: dec_inps = tf.nn.dropout(dec_inps, params.dec_keep_rate) label_logits = tf.nn.softmax(label_logits) dep = int(label_logits.shape[1]) bs, T = tf.shape(dec_inps)[0], tf.shape(dec_inps)[1] print(bs, T) label_logits = tf.reshape(label_logits, [bs, T, dep]) print(label_logits) print(dec_inps) dec_inps = tf.concat([dec_inps, label_logits], axis=-1) print(dec_inps) # exit() max_sl = tf.shape(dec_inps)[1] # define cell if params.base_cell == 'lstm': base_cell = tf.contrib.rnn.LSTMCell elif params.base_cell == 'rnn': base_cell = tf.contrib.rnn.RNNCell else: # not working for now base_cell = tf.contrib.rnn.GRUCell cell = model.make_rnn_cell( [params.decoder_hidden for _ in range(params.decoder_rnn_layers)], base_cell=base_cell) if gen_mode: z = zsent else: z = z_in if params.decode == 'hw': # Higway network [S.Sementiuta et.al] for i in range(params.highway_lc): with tf.variable_scope("hw_layer_dec{0}".format(i)) as scope: z_dec = fully_connected( z, params.decoder_hidden * 2, activation_fn=tf.nn.sigmoid, weights_initializer=xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope="decoder_inp_state") inp_h, inp_c = tf.split(z_dec, 2, axis=1) initial_state = rnn_placeholders( (tf.contrib.rnn.LSTMStateTuple(inp_c, inp_h), )) elif params.decode == 'concat': z_out = tf.reshape(tf.tile(tf.expand_dims(z, 1), (1, max_sl, 1)), [batch_size, -1, params.latent_size]) dec_inps = tf.concat([dec_inps, z_out], 2) initial_state = rnn_placeholders( cell.zero_state(tf.shape(dec_inps)[0], tf.float64)) elif params.decode == 'mlp': # z->decoder initial state w1 = tf.get_variable('whl', [params.latent_size, params.highway_ls], tf.float64, initializer=tf.truncated_normal_initializer()) b1 = tf.get_variable('bhl', [params.highway_ls], tf.float64, initializer=tf.ones_initializer()) z_dec = tf.matmul(z, w1) + b1 inp_h, inp_c = tf.split(tf.layers.dense(z_dec, params.decoder_hidden * 2), 2, axis=1) initial_state = rnn_placeholders( (tf.contrib.rnn.LSTMStateTuple(inp_c, inp_h), )) outputs, final_state = tf.nn.dynamic_rnn(cell, inputs=dec_inps, sequence_length=d_seq_l, initial_state=initial_state, swap_memory=True, dtype=tf.float64) # define decoder network if gen_mode: # only interested in the last output outputs = outputs[:, -1, :] # print(outputs.shape) outputs_r = tf.reshape(outputs, [-1, params.decoder_hidden]) # print(outputs_r.shape, "===============") x_logits = tf.layers.dense(outputs_r, units=vocab_size, activation=None) print(x_logits) if params.beam_search: sample = tf.nn.softmax(x_logits) else: sample = tf.multinomial(x_logits / params.temperature, 10)[0] print(sample) return x_logits, (initial_state, final_state), sample
def vae_lstm(observed, batch_size, d_seq_l, embed, d_inputs, vocab_size, dropout_off=False): with zs.BayesianNet(observed=observed) as decoder: # prepare input z_mean = tf.zeros([batch_size, params.latent_size]) z_logstd = tf.zeros([batch_size, params.latent_size]) z = zs.Normal('z', mean=z_mean, logstd=z_logstd, group_event_ndims=0) tf.summary.histogram('z|x', z) # z = [bath_size, l_s] -> [batch_size, seq_len, l_s] with tf.device("/cpu:0"): dec_inps = tf.nn.embedding_lookup(embed, d_inputs) # turn off dropout for generation: if params.dec_keep_rate < 1 and not dropout_off: dec_inps = tf.nn.dropout(dec_inps, params.dec_keep_rate) max_sl = tf.shape(dec_inps)[1] z_out = tf.reshape(tf.tile(tf.expand_dims(z, 1), (1, max_sl, 1)), [batch_size, -1, params.latent_size]) c_inputs = tf.concat([dec_inps, z_out], 2) # z->decoder initial state w1 = tf.get_variable('whl', [params.latent_size, params.highway_ls], tf.float32, initializer=tf.truncated_normal_initializer()) b1 = tf.get_variable('bhl', [params.highway_ls], tf.float32, initializer=tf.ones_initializer()) z_dec = tf.nn.relu(tf.matmul(z, w1) + b1) inp_h = tf.layers.dense(z_dec, params.decoder_hidden) inp_c = tf.layers.dense(z_dec, params.decoder_hidden) cell = model.make_rnn_cell( [params.decoder_hidden for _ in range(params.decoder_rnn_layers)], base_cell=params.base_cell) initial_state = rnn_placeholders( (tf.contrib.rnn.LSTMStateTuple(inp_c, inp_h), )) for tensor in flatten(initial_state): tf.add_to_collection('rnn_decoder_state_input', tensor) outputs, final_state = tf.nn.dynamic_rnn(cell, inputs=c_inputs, sequence_length=d_seq_l, initial_state=initial_state, swap_memory=True, dtype=tf.float32) for tensor in flatten(final_state): tf.add_to_collection('rnn_decoder_state_output', tensor) # define decoder network outputs_r = tf.reshape(outputs, [-1, params.decoder_hidden]) x_logits = tf.layers.dense(outputs_r, units=vocab_size, activation=None) print("x_logits", x_logits) # take unnormalized log-prob of the last word in sequence and sample from multinomial distibution if params.beam_search: logits_ = tf.reshape( x_logits, [tf.shape(outputs)[0], tf.shape(outputs)[1], vocab_size])[:, -1] top_k = tf.nn.top_k(logits_, params.beam_size) sample = top_k.indices norm_log_prob = tf.log(tf.nn.softmax(top_k.values)) sample_gr = tf.multinomial( tf.reshape( x_logits, [tf.shape(outputs)[0], tf.shape(outputs)[1], vocab_size])[:, -1] / params.temperature, 1)[:, 0][:] return decoder, x_logits, initial_state, final_state, sample_gr, sample, norm_log_prob