def embedding_lookup(params, ids, name='embedding_lookup'): """Provides a N dimensional version of tf.embedding_lookup. Ids are flattened to a 1d tensor before being passed to embedding_lookup then, they are unflattend to match the original ids shape plus an extra leading dimension of the size of the embeddings. Args: params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1. ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1. Must contain indexes into params. name: Optional name for the op. Returns: A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1 containing the values from the params tensor(s) for indecies in ids. Raises: ValueError: if some parameters are invalid. """ with ops.name_scope(name, 'embedding_lookup', [params, ids]): params = ops.convert_to_tensor(params) ids = ops.convert_to_tensor(ids) shape = array_ops_.shape(ids) ids_flat = array_ops_.reshape( ids, math_ops.reduce_prod(shape, keep_dims=True)) embeds_flat = nn.embedding_lookup(params, ids_flat, name) embed_shape = array_ops_.concat([shape, [-1]], 0) embeds = array_ops_.reshape(embeds_flat, embed_shape) embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:])) return embeds
def embedding_lookup(params, ids, name='embedding_lookup'): """Provides a N dimensional version of tf.embedding_lookup. Ids are flattened to a 1d tensor before being passed to embedding_lookup then, they are unflattend to match the original ids shape plus an extra leading dimension of the size of the embeddings. Args: params: List of tensors of size D0 x D1 x ... x Dn-2 x Dn-1. ids: N-dimensional tensor of B0 x B1 x .. x Bn-2 x Bn-1. Must contain indexes into params. name: Optional name for the op. Returns: A tensor of size B0 x B1 x .. x Bn-2 x Bn-1 x D1 x ... x Dn-2 x Dn-1 containing the values from the params tensor(s) for indecies in ids. Raises: ValueError: if some parameters are invalid. """ with ops.name_scope(name, 'embedding_lookup', [params, ids]): params = ops.convert_to_tensor(params) ids = ops.convert_to_tensor(ids) shape = array_ops_.shape(ids) ids_flat = array_ops_.reshape( ids, math_ops.reduce_prod(shape, keep_dims=True)) embeds_flat = nn.embedding_lookup(params, ids_flat, name) embed_shape = array_ops_.concat_v2([shape, [-1]], 0) embeds = array_ops_.reshape(embeds_flat, embed_shape) embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:])) return embeds
def network(x, y1, y2, la, lr): del x with variable_scope.variable_scope("vs", use_resource=True): w = variable_scope.get_variable( "w", shape=[200, 10], dtype=np.float32, initializer=init_ops.constant_initializer(2.)) y = array_ops.reshape(w, [10, 200]) g1 = nn.embedding_lookup(y, y1) g2 = nn.embedding_lookup(y, y2) g = array_ops.concat([g1, g2], axis=1) ce = losses.absolute_difference(labels=la, predictions=g) loss = math_ops.reduce_mean(ce) optimizer = gradient_descent.GradientDescentOptimizer(lr) train = optimizer.minimize(loss) return loss, train
def __init__(self, cfg, word_embd, max_ques_len, input_producer, generated=None): batch_size = cfg.batch_size vocab_size = len(word_embd) with tf.variable_scope('disc'): word_embd = tf.get_variable( 'word_embd', shape=word_embd.shape, initializer=tf.constant_initializer(word_embd)) if generated: self.ques = generated['ques'] self.ques_len = generated['ques_len'] # soft embedding_lookup ques = tf.reshape(self.ques, [-1, vocab_size]) ques = tf.matmul(ques, word_embd) ques = tf.reshape(ques, [batch_size, -1, cfg.embed_dim]) else: self.ques = tf.placeholder(tf.int32, shape=[None, max_ques_len], name='question') self.ques_len = tf.placeholder(tf.int32, shape=[None], name='question_length') ques = embedding_lookup(word_embd, self.ques) self.answ = input_producer.answ_disc cell = GRUCell(cfg.hidden_size) _, state = dynamic_rnn(cell, ques, sequence_length=self.ques_len, dtype=tf.float32) output_layer = Dense(vocab_size) logits = output_layer(state) labels = tf.one_hot(self.answ, vocab_size) self.pred = tf.argmax(logits, 1) loss = softmax_cross_entropy_with_logits(labels=labels, logits=logits) self.loss = tf.reduce_mean(loss)
def __init__(self, input_producer, embed_mat, config, is_train): x_enc = input_producer.x_enc x_dec = input_producer.x_dec y_dec = input_producer.y_dec len_enc = input_producer.len_enc len_dec = input_producer.len_dec self.answer = input_producer.answ_disc max_len = input_producer.seq_max_length vocab_num = input_producer.vocab_num config.update(**dict(max_len=max_len, vocab_num=vocab_num)) # import ipdb; ipdb.set_trace() self.kl_weight = tf.Variable(0.0, "KL_weight") self.input_ids = y_dec modeler = CtrlVAEModelingHelper(config, embed_mat) with tf.variable_scope("CtrlVAE"): ### VAE ############################################################ # encoder x_enc_onehot = tf.one_hot(x_enc, vocab_num) out_tuple = modeler.encoder(x_enc_onehot=x_enc_onehot, len_enc=len_enc) (vae_z, vae_mu, vae_logvar) = out_tuple # holistic representation with tf.device("/cpu:0"): vae_c = embedding_lookup(modeler.embed, self.answer) vae_c = tf.reshape(vae_c, [config.batch_size, -1]) vae_represent = tf.concat([vae_z, vae_c], axis=1) # decoder x_dec_onehot = tf.one_hot(x_dec, config.vocab_num) out_tuple = modeler.decoder(initial_state=vae_represent, x_dec_onehot=x_dec_onehot, len_dec=len_dec, is_teacher_forcing=True) (vae_outputs, vae_state, vae_outputs_len) = out_tuple # final (self.vae_output, self.vae_sample) = vae_outputs ### Generator ###################################################### # random z and c from the prior self.gen_z = tf.random_normal( [config.batch_size, config.hidden_size]) self.gen_c = vae_c gen_represent = tf.concat([self.gen_z, self.gen_c], axis=1) # generator (decoder) x_dec_onehot = tf.one_hot(x_dec, config.vocab_num) out_tuple = modeler.decoder(initial_state=gen_represent, x_dec_onehot=x_dec_onehot, len_dec=len_dec, is_teacher_forcing=True, reuse=True) (gen_outputs, gen_state, gen_outputs_len) = out_tuple # final (self.gen_output, self.gen_sample) = gen_outputs gen_outputs_onehot = softmax(self.gen_output / ALMOST_ZERO) # discriminator (for c code) out_tuple = modeler.discriminator(inputs=gen_outputs_onehot, inputs_length=gen_outputs_len) (self.gen_c_output, self.gen_c_sample) = out_tuple # encoder again (for z code ; additional discriminator) out_tuple = modeler.encoder(x_enc_onehot=gen_outputs_onehot, len_enc=gen_outputs_len, reuse=True) (gen_z, dis_mu, dis_logvar) = out_tuple ### Discriminator ################################################## # discriminator (for training) x_dis_onehot = tf.one_hot(x_enc, config.vocab_num) out_tuple = modeler.discriminator(inputs=x_dis_onehot, inputs_length=gen_outputs_len, reuse=True) (self.dis_outputs, self.dis_sample) = out_tuple ######################################################################## # get all the variables in this scope self.vars = get_variables("CtrlVAE") self.enc_vars = get_variables("CtrlVAE/encoder") self.gen_vars = get_variables("CtrlVAE/decoder") self.dis_vars = get_variables("CtrlVAE/discriminator") self.vae_vars = self.enc_vars + self.gen_vars ######################################################################## # compute AE loss (reconstruction) len_out = tf.reduce_max(vae_outputs_len) targets = y_dec[:, :len_out] weights = tf.sequence_mask(vae_outputs_len, dtype=tf.float32) softmax_loss = sequence_loss(logits=self.vae_output, targets=targets, weights=weights, average_across_timesteps=False, average_across_batch=False) # NOTE: fix later! loss_sum = tf.reduce_sum(softmax_loss, axis=1) self.ae_loss = self.ae_loss_mean = tf.reduce_mean(loss_sum, axis=0) #self.ae_loss_mean = tf.reduce_mean(softmax_loss) # compute KL loss (regularization) KL_term = 1 + vae_logvar - tf.pow(vae_mu, 2) - tf.exp(vae_logvar) self.kl_loss = -0.5 * tf.reduce_sum(KL_term, reduction_indices=1) self.kl_loss_mean = tf.reduce_mean(self.kl_loss) # VAE total loss self.vae_loss = self.ae_loss + self.kl_weight * self.kl_loss_mean ######################################################################## # c code loss answer_labels = tf.one_hot(self.answer, config.vocab_num) c_loss = softmax_cross_entropy_with_logits(labels=answer_labels, logits=self.gen_c_output) self.c_loss = tf.reduce_mean(c_loss) # z code loss mu_loss = mean_pairwise_squared_error(vae_mu, dis_mu) logvar_loss = mean_pairwise_squared_error(vae_logvar, dis_logvar) self.z_loss = (mu_loss + logvar_loss) / 2 # generator total loss self.gen_loss = self.c_loss + self.z_loss ######################################################################## # discriminator training loss dis_loss = softmax_cross_entropy_with_logits(labels=answer_labels, logits=self.dis_outputs) self.dis_loss = tf.reduce_mean(dis_loss) ######################################################################## # optimization lr = config.learning_rate self.vae_lr = tf.Variable(lr, trainable=False, name="vae_lr") self.gen_lr = tf.Variable(0.0, trainable=False, name="gen_lr") self.dis_lr = tf.Variable(lr, trainable=False, name="dis_lr") vae_optim = tf.train.AdamOptimizer(self.vae_lr) gen_optim = tf.train.AdamOptimizer(self.gen_lr) dis_optim = tf.train.AdamOptimizer(self.dis_lr) vae_grads = tf.gradients(self.vae_loss, self.vae_vars) gen_grads = tf.gradients(self.gen_loss, self.gen_vars) dis_grads = tf.gradients(self.dis_loss, self.dis_vars) vae_grads, _ = tf.clip_by_global_norm(vae_grads, config.max_grad_norm) gen_grads, _ = tf.clip_by_global_norm(gen_grads, config.max_grad_norm) dis_grads, _ = tf.clip_by_global_norm(dis_grads, config.max_grad_norm) self.global_step = get_or_create_global_step() self.vae_train = vae_optim.apply_gradients( zip(vae_grads, self.vae_vars)) self.gen_train = gen_optim.apply_gradients( zip(gen_grads, self.gen_vars)) self.dis_train = dis_optim.apply_gradients( zip(dis_grads, self.dis_vars), self.global_step) # learning_rate update self.new_gen_lr = tf.placeholder(tf.float32, shape=[], name="new_gen_lr") self.gen_lr_update = tf.assign(self.gen_lr, self.new_gen_lr) # KL weight update self.new_kl_weight = tf.placeholder(tf.float32, shape=[], name="new_kl") self.kl_weight_update = tf.assign(self.kl_weight, self.new_kl_weight) # summaries tf.summary.scalar("Loss/ae_mean", self.ae_loss_mean) tf.summary.scalar("Loss/kl_mean", self.kl_loss_mean) tf.summary.scalar("Loss/Total", self.ae_loss_mean + self.kl_loss_mean) tf.summary.scalar("Misc/kl_weight", self.kl_weight) tf.summary.scalar("Misc/mu_mean", tf.reduce_mean(vae_mu)) tf.summary.scalar("Misc/logvar_mean", tf.reduce_mean(vae_logvar)) tf.summary.scalar("Misc/gen_lr", self.gen_lr) self.summary_op = tf.summary.merge_all()
def test_indexed_slice(self): inp = random_ops.random_uniform([3, 2]) output = nn.embedding_lookup(inp, [0, 2]) pfor_jacobian = gradients.jacobian(output, inp, use_pfor=True) while_jacobian = gradients.jacobian(output, inp, use_pfor=False) self.run_and_assert_equal(while_jacobian, pfor_jacobian)
def testEmbeddingLookupBatchSize2(self): ids = constant_op.constant([[1, 2, 3], [3, 4, 5]]) paras = np.array([[10], [20], [80], [40], [50], [60]]) emb_lookup_tf = nn.embedding_lookup(paras, ids) emb_lookup_ipu = kerasIPUEmbeddingLookup(paras, ids, name="emb_test_1") self.assertAllClose(emb_lookup_tf, emb_lookup_ipu)
def __init__(self, input_producer, embed_mat, config, is_train): with tf.variable_scope("VAE") as var_scope: x_enc = input_producer.x_enc x_dec = input_producer.x_dec y_dec = input_producer.y_dec len_enc = input_producer.len_enc len_dec = input_producer.len_dec max_len = input_producer.seq_max_length vocab_num = input_producer.vocab_num batch_size = config.batch_size hidden_size = config.hidden_size embed_dim = config.embed_dim is_GRU = config.is_GRU is_argmax_sampling = config.is_argmax_sampling word_keep_prob = config.word_dropout_keep_prob max_grad_norm = config.max_grad_norm learning_rate = config.learning_rate self.KL_weight = tf.Variable(0.0, "KL_weight") self.input_ids = y_dec def _lstm_cell(): return BasicLSTMCell(num_units=hidden_size, forget_bias=1.0, state_is_tuple=True, reuse=tf.get_variable_scope().reuse) def _gru_cell(): return GRUCell(num_units=hidden_size, reuse=tf.get_variable_scope().reuse) cell = _gru_cell if is_GRU else _lstm_cell self.initial_state = cell().zero_state(batch_size, tf.float32) # encoder with tf.device("/cpu:0"): embed_init = tf.constant_initializer(embed_mat)\ if (embed_mat is not None) else None embedding = tf.get_variable("embedding", [vocab_num, embed_dim], initializer=embed_init, trainable=True) in_enc = embedding_lookup(embedding, x_enc) with tf.variable_scope("encoder"): out_tuple = dynamic_rnn(cell=cell(), inputs=in_enc, sequence_length=len_enc, initial_state=self.initial_state) (_, encoder_hidden) = out_tuple # linear layers for mu and log(var) latent_dim = hidden_size # may have to change this later W_mu = tf.get_variable("W_mu", [hidden_size,latent_dim]) b_mu = tf.get_variable("b_mu", [latent_dim]) W_logvar = tf.get_variable("W_logvar", [hidden_size,latent_dim]) b_logvar = tf.get_variable("b_logvar", [latent_dim]) #l2_loss = tf.nn.l2_loss(W_mu) + tf.nn.l2_loss(W_logvar) mu = tf.matmul(encoder_hidden, W_mu) + b_mu logvar = tf.matmul(encoder_hidden, W_logvar) + b_logvar # sample epsilon epsilon = tf.random_normal(tf.shape(logvar), name='epsilon') # sample latent variable stddev = tf.exp(0.5 * logvar) # standard deviation self.z = mu + tf.multiply(stddev, epsilon) # decoder with tf.device("/cpu:0"): in_dec = embedding_lookup(embedding, x_dec) with tf.variable_scope("decoder"): helper = WordDropoutTrainingHelper( inputs=in_dec, sequence_length=len_dec, embedding=embedding, dropout_keep_prob=word_keep_prob, drop_token_id=UNK_ID, is_argmax_sampling=is_argmax_sampling) # projection layer output_layer = Dense(units=vocab_num, activation=None, use_bias=True, trainable=True) # decoder decoder = BasicDecoder(cell=cell(), helper=helper, initial_state=self.z, output_layer=output_layer) # dynamic_decode out_tuple = dynamic_decode(decoder=decoder, output_time_major=False, # speed impute_finished=True) # get all the variables in this scope self.vars = tf.contrib.framework.get_variables(var_scope) # (ouputs, state, sequence_length) (self.outputs, _, self.cell_outputs_len) = out_tuple # final # (cell_outputs, sample_ids) (self.cell_outputs, self.sampled_ids) = self.outputs # compute softmax loss (reconstruction) len_out = tf.reduce_max(len_dec) targets = y_dec[:,:len_out] weights = tf.sequence_mask(self.cell_outputs_len, dtype=tf.float32) softmax_loss = sequence_loss(logits=self.cell_outputs, targets=targets, weights=weights, average_across_timesteps=True, average_across_batch=True) self.AE_loss = self.AE_loss_mean = softmax_loss # compute KL loss (regularization) KL_term = 1 + logvar - tf.pow(mu, 2) - tf.exp(logvar) self.KL_loss = -0.5 * tf.reduce_sum(KL_term, reduction_indices=1) self.KL_loss_mean = tf.reduce_mean(self.KL_loss) # total loss self.loss = self.AE_loss + self.KL_weight * self.KL_loss_mean # optimization self.lr = tf.Variable(learning_rate, trainable=False, name="lr") grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, self.vars), max_grad_norm) optimizer = tf.train.AdamOptimizer(self.lr) self.global_step = get_or_create_global_step() self.train_op = optimizer.apply_gradients(zip(grads, self.vars), global_step=self.global_step) # learning_rate update self.new_lr = tf.placeholder(tf.float32, shape=[], name="new_lr") self.lr_update = tf.assign(self.lr, self.new_lr) # KL weight update self.new_KL_weight = tf.placeholder(tf.float32, shape=[], name="new_kl") self.KL_weight_update = tf.assign(self.KL_weight, self.new_KL_weight) # summaries tf.summary.scalar("Loss/AE_mean", self.AE_loss_mean) tf.summary.scalar("Loss/KL_mean", self.KL_loss_mean) tf.summary.scalar("Loss/Total", self.AE_loss_mean + self.KL_loss_mean) tf.summary.scalar("Misc/KL_weight", self.KL_weight) tf.summary.scalar("Misc/mu_mean", tf.reduce_mean(mu)) tf.summary.scalar("Misc/sigma_mean", tf.reduce_mean(stddev)) tf.summary.scalar("Misc/learning_rate", self.lr) self.summary_op = tf.summary.merge_all()
def network(w, y): g = nn.embedding_lookup(w, y) return g
def __init__( self, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0, embedding_type='static'): # Placeholders for input, output and dropout self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # Embedding layer # with tf.device('/cpu:0'), tf.name_scope("embedding"): with tf.device('/cpu:0'), tf.name_scope("embedding"): # use pretrained word2vec embeddings if embedding_type in 'static': ids = self.input_x embeddings = vs.get_variable('w2v' + "_embeddings", [num_classes, embedding_size]) name = "embedding_lookup" ids = ops.convert_to_tensor(ids) # used to load w2v as follows # w2v = word2vec_basic.load_word_2_vec() # load from nm.save() w2v = np.load("w2v.model.en.npy") print("Loaded w2v....") params = tf.Variable(w2v) # shape shape = array_ops_.shape(ids) # concatenates all the ids from all the sentences ids_flat = array_ops_.reshape(ids, math_ops.reduce_prod(shape, keep_dims=True)) # embeds_flat = nn.embedding_lookup(params, ids_flat, name) embed_shape = array_ops_.concat(0, [shape, [-1]]) embeds = array_ops_.reshape(embeds_flat, embed_shape) embeds.set_shape(ids.get_shape().concatenate(params.get_shape()[1:])) self.embedded_chars = embeds self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) else: W = tf.Variable( tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W") self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x) self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d( self.embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(3, pooled_outputs) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) # Add dropout with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.get_variable( "W", shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") # CalculateMean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")