def _build_graph(self, layer, previous_state): with layer_scope(self): if previous_state is None: input_batch = tf.shape(layer.tensor)[0] zero_state = tf.zeros([input_batch, self.n_units]) self.previous_state = tx.TensorLayer(zero_state, self.n_units) if self.share_state_with is None: # determines the weight of the previous state # we could add the bias at the end but this way we just define a single bias for the r unit self.r_current_w = tx.Linear(layer, self.n_units, bias=True, weight_init=self.init, name="r_current_w") self.r_recurrent_w = tx.Linear(self.previous_state, self.n_units, bias=False, weight_init=self.recurrent_init, name="r_current_w") self.u_current_w = tx.Linear(layer, self.n_units, bias=True, weight_init=self.init, name="u_current_w") self.u_recurrent_w = tx.Linear(self.previous_state, self.n_units, bias=False, weight_init=self.recurrent_init, name="u_current_w") self.current_w = tx.Linear(layer, self.n_units, bias=True, weight_init=self.init, name="current_w") self.recurrent_w = tx.Linear(self.previous_state, self.n_units, bias=False, weight_init=self.recurrent_init, name="recurrent_w") # kernel_gate = tx.Activation() kernel_act = tx.Activation(kernel_linear, self.activation) self.kernel = tx.Compose(kernel_linear, kernel_act) else: self.kernel = self.share_state_with.kernel.reuse_with(layer) self.recurrent_kernel = self.share_state_with.recurrent_kernel.reuse_with( self.previous_state) r_state = tx.Add(r_current_w, r_recurrent_w) r_state = tx.Bias(r_state) r_gate = tx.Activation(r_state, fn=tx.sigmoid, name="r_gate") # """Gated recurrent unit (GRU) with nunits cells.""" return self.kernel.tensor + self.recurrent_kernel.tensor
def _build_graph(self, layer, previous_state): with layer_scope(self): if previous_state is None: input_batch = tf.shape(layer.tensor)[0] zero_state = tf.zeros([input_batch, self.n_units]) self.previous_state = tx.TensorLayer(zero_state, self.n_units) if self.share_state_with is None: kernel_linear = tx.Linear(layer, self.n_units, bias=True, weight_init=self.init, name="linear_kernel") kernel_act = tx.Activation(kernel_linear, self.activation) self.kernel = tx.Compose([kernel_linear, kernel_act]) self.recurrent_kernel = tx.Linear( self.previous_state, self.n_units, bias=False, weight_init=self.recurrent_init, name="recurrent_kernel") else: self.kernel = self.share_state_with.kernel.reuse_with(layer) self.recurrent_kernel = self.share_state_with.recurrent_kernel.reuse_with( self.previous_state) # TODO this might be wrong, I might need to couple the activation: act(kernel + recurrent + bias) # TODO it is wrong https://github.com/tensorflow/tensorflow/blob/r1.8/tensorflow/python/ops/rnn_cell_impl.py # """Most basic RNN: output = new_state = act(W * input + U * state + B).""" return self.kernel.tensor + self.recurrent_kernel.tensor
import tensorflow as tf import tensorx as tx from deepsign.models.nrp import RandomIndexTensor from deepsign.rp.ri import Generator, RandomIndex import numpy as np sess = tf.InteractiveSession() vocab_size = 8 k = 6 s = 2 emebd = 3 generator = Generator(k, s) ris = [generator.generate() for _ in range(vocab_size)] ri_tensor = RandomIndexTensor.from_ri_list(ris, k, s) ri_input = ri_tensor.gather([[0, 1, 0], [1, 2, 0]]) sp = ri_input.to_sparse_tensor() sp = tx.TensorLayer(sp, k) print(sp.tensor.eval()) embed = tx.Lookup(sp, seq_size=3, lookup_shape=[k, 3]) tf.global_variables_initializer().run() print(np.shape(embed.tensor.eval()))
batch_size = 2 generator = Generator(k, s) print([vocab[w] for w in vocab.keys()]) ri_dict = {vocab[word]: generator.generate() for word in vocab.keys()} tokens = [vocab[w] for w in tokens] data_it = window_it(tokens, seq_size) data_it = batch_it(data_it, batch_size) vocab_tensor = [ri_dict[i] for i in range(len(vocab))] sp_ri = deepsign.data.transform.ris_to_sp_tensor_value(vocab_tensor, dim=k) inputs = tx.Input(n_units=2) ri_inputs = tx.gather_sparse(sp_ri, inputs.tensor) ri_inputs = tx.TensorLayer(ri_inputs, k) embed = tx.Lookup(ri_inputs, seq_size, [k, embed_dim]) # logits: take the embeddings and get the features for all random indexes ri_layer = tx.TensorLayer(sp_ri, n_units=k) logits = tx.Linear(input_layer=ri_layer, n_units=embed_dim, shared_weights=embed.weights, bias=True) single_input = tx.Input(1) ri_input = tx.TensorLayer(tx.gather_sparse(sp_ri, single_input.tensor), k) logit = logits.reuse_with(ri_input)
from tensorflow.contrib.compiler import xla jit_scope = tf.contrib.compiler.jit.experimental_jit_scope os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' input_size = 10000 var_size = 500 batch_size = 20 seq_size = 30 inputs = tf.constant(np.random.randint(0, 10, size=[batch_size, seq_size]), name="inputs") targets = tf.constant(np.random.randint(0, 10, size=[batch_size * seq_size]), name="targets") targets = tf.one_hot(targets, input_size) inputs = tx.TensorLayer(inputs) with jit_scope(): with tf.name_scope("scope1"): lookup = tx.Lookup(inputs, seq_size=seq_size, lookup_shape=[input_size, var_size], name="lookup") seq = lookup.permute_batch_time() seq = tx.Reshape(seq, [-1, var_size], name="flatten") mul1 = tx.Linear(seq, input_size, name="test_logits") mul2 = tx.Linear(seq, n_units=input_size, shared_weights=lookup.weights, transpose_weights=True, name="shared_embeddings") with tf.name_scope("scope2"): mul1 = mul1.reuse_with(seq)
# ************************************* generator = Generator(k, s) ris = [generator.generate() for _ in range(vocab_size)] ri_tensor = ris_to_sp_tensor_value(ris, k) ri_tensor = tf.convert_to_tensor_or_sparse_tensor(ri_tensor) # ************************************* # DUMMY INPUT DATA # ************************************* # batch of word sequence indices ctx_size = 3 input_data = np.array([[0, 1, 2], [0, 2, 2], [1, 3, 5], [3, 0, 2]]) input_labels = tf.constant(np.array([[3], [1], [10], [25]], dtype=np.int64)) input_labels = tx.TensorLayer(input_labels, n_units=1) input_layer = tx.TensorLayer(input_data, n_units=3, dtype=tf.int64) ri_layer = tx.TensorLayer(ri_tensor, k) ri_inputs = tx.gather_sparse(ri_layer.tensor, input_layer.tensor) ri_inputs = tx.TensorLayer(ri_inputs, k) lookup = tx.Lookup(ri_inputs, ctx_size, [k, embed_size], weight_init=tx.random_normal(0, 0.1), name="lookup") feature_predict = tx.Linear(lookup, embed_size, bias=True) all_embeddings = tx.Linear(ri_layer, embed_size, shared_weights=lookup.weights,
lookup_shape=feature_shape) # [batch x seq_size * feature_shape[1]] # reshape to [batch x seq_size x feature_shape[1]] lookup_to_seq = tf.reshape(lookup.tensor, [-1, seq_size, embed_dim]) # type of rnn cell cell = tf.nn.rnn_cell.LSTMCell(num_units=n_hidden, state_is_tuple=True) val, state = tf.nn.dynamic_rnn(cell, lookup_to_seq, dtype=tf.float32) val = tf.transpose(val, [1, 0, 2]) # last = tf.gather(val, int(val.get_shape()[0]) - 1) last = val[-1] lstm_out = tx.TensorLayer(last, n_hidden) logits = tx.Linear(lstm_out, vocab_size, bias=True) out = tx.Activation(logits, tx.softmax) labels = tx.dense_one_hot(loss_inputs.tensor, vocab_size) loss = tf.reduce_mean(tx.categorical_cross_entropy(labels=labels, logits=logits.tensor)) # setup optimizer optimizer = tx.AMSGrad(learning_rate=0.01) model = tx.Model(run_inputs=in_layer, run_outputs=out, train_inputs=in_layer, train_outputs=out, train_in_loss=loss_inputs, train_out_loss=loss, eval_out_score=loss, eval_in_score=loss_inputs) print(model.feedable_train())
def __init__(self, ctx_size, vocab_size, k_dim, s_active, ri_tensor, embed_dim, h_dim, embed_init=tx.random_uniform(minval=-0.01, maxval=0.01), logit_init=tx.random_uniform(minval=-0.01, maxval=0.01), num_h=1, h_activation=tx.relu, h_init=tx.he_normal_init, use_dropout=False, embed_dropout=False, keep_prob=0.95, l2_loss=False, l2_loss_coef=1e-5, f_init=tx.random_uniform(minval=-0.01, maxval=0.01), embed_share=True, logit_bias=False, use_nce=False, nce_samples=100, noise_level=0.1): run_inputs = tx.Input(ctx_size, dtype=tf.int32) loss_inputs = tx.Input(n_units=1, dtype=tf.int64) eval_inputs = loss_inputs if run_inputs.dtype != tf.int32 and run_inputs.dtype != tf.int64: raise TypeError( "Invalid dtype for input: expected int32 or int64, got {}". format(run_inputs.dtype)) if num_h < 0: raise ValueError("num hidden should be >= 0") # =============================================== # RUN GRAPH # =============================================== var_reg = [] with tf.name_scope("run"): # RI ENCODING =============================================== # convert ids to ris gather a set of random indexes based on the ids in a sequence # ri_layer = tx.TensorLayer(ri_tensor, n_units=k_dim) # ri_inputs = tx.gather_sparse(ri_layer.tensor, run_inputs.tensor) # ri_inputs = tx.TensorLayer(ri_inputs, n_units=k_dim) with tf.name_scope("ri_encode"): if isinstance(ri_tensor, RandomIndexTensor): ri_tensor = ri_tensor ri_layer = tx.TensorLayer(ri_tensor.to_sparse_tensor(), k_dim, shape=[vocab_size, k_dim]) ri_inputs = ri_tensor.gather(run_inputs.tensor) ri_inputs = ri_inputs.to_sparse_tensor() ri_inputs = tx.TensorLayer( ri_inputs, k_dim, shape=[ri_inputs.get_shape()[0], k_dim]) # ri_tensor is a sparse tensor else: raise TypeError( "please supply RandomIndexTensor instead of sparse Tensor" ) # ri_layer = tx.TensorLayer(ri_tensor, k_dim) # ri_inputs = tx.gather_sparse(ri_layer.tensor, run_inputs.tensor) # ri_inputs = tx.TensorLayer(ri_inputs, k_dim) feature_lookup = tx.Lookup(ri_inputs, ctx_size, [k_dim, embed_dim], embed_init, name="lookup") self.embeddings = feature_lookup var_reg.append(feature_lookup.weights) feature_lookup = feature_lookup.as_concat() # =========================================================== last_layer = feature_lookup h_layers = [] for i in range(num_h): h_i = tx.Linear(last_layer, h_dim, h_init, bias=True, name="h_{i}_linear".format(i=i)) h_a = tx.Activation(h_i, h_activation) h = tx.Compose(h_i, h_a, name="h_{i}".format(i=i)) h_layers.append(h) last_layer = h var_reg.append(h_i.weights) self.h_layers = h_layers # feature prediction for Energy-Based Model f_prediction = tx.Linear(last_layer, embed_dim, f_init, bias=True, name="f_predict") var_reg.append(f_prediction.weights) # RI DECODING =============================================== # Shared Embeddings if embed_share: shared_weights = feature_lookup.weights if embed_share else None logit_init = logit_init if not embed_share else None # ri_dense = tx.ToDense(ri_layer) all_embeddings = tx.Linear(ri_layer, embed_dim, logit_init, shared_weights, name="all_features", bias=False) # dot product of f_predicted . all_embeddings with bias for each target word run_logits = tx.Linear(f_prediction, vocab_size, shared_weights=all_embeddings.tensor, transpose_weights=True, bias=logit_bias, name="logits") else: run_logits = tx.Linear(f_prediction, vocab_size, bias=logit_bias, name="logits") if not embed_share: var_reg.append(run_logits.weights) # =========================================================== embed_prob = tx.Activation(run_logits, tx.softmax, name="run_output") # =============================================== # TRAIN GRAPH # =============================================== with tf.name_scope("train"): if use_dropout and embed_dropout: feature_lookup = feature_lookup.reuse_with(ri_inputs) last_layer = tx.Dropout(feature_lookup, probability=keep_prob) else: last_layer = feature_lookup # add dropout between each layer for layer in h_layers: h = layer.reuse_with(last_layer) if use_dropout: h = tx.Dropout(h, probability=keep_prob) last_layer = h f_prediction = f_prediction.reuse_with(last_layer) train_logits = run_logits.reuse_with(f_prediction, name="train_logits") train_embed_prob = tx.Activation(train_logits, tx.softmax, name="train_output") if use_nce: # labels labels = loss_inputs.tensor # convert labels to random indices def labels_to_ri(x): random_index_tensor = ri_tensor.gather(x) sp_features = random_index_tensor.to_sparse_tensor() return sp_features model_prediction = f_prediction.tensor train_loss = tx.sparse_cnce_loss( label_features=labels, model_prediction=model_prediction, weights=feature_lookup.weights, noise_ratio=noise_level, num_samples=nce_samples, labels_to_sparse_features=labels_to_ri) else: one_hot = tx.dense_one_hot(column_indices=loss_inputs.tensor, num_cols=vocab_size) train_loss = tx.categorical_cross_entropy( one_hot, train_logits.tensor) train_loss = tf.reduce_mean(train_loss) if l2_loss: losses = [tf.nn.l2_loss(var) for var in var_reg] train_loss = train_loss + l2_loss_coef * tf.add_n(losses) # =============================================== # EVAL GRAPH # =============================================== with tf.name_scope("eval"): one_hot = tx.dense_one_hot(column_indices=eval_inputs.tensor, num_cols=vocab_size) eval_loss = tx.categorical_cross_entropy(one_hot, run_logits.tensor) eval_loss = tf.reduce_mean(eval_loss) # BUILD MODEL super().__init__(run_inputs=run_inputs, run_outputs=embed_prob, train_inputs=run_inputs, train_outputs=train_embed_prob, eval_inputs=run_inputs, eval_outputs=embed_prob, train_out_loss=train_loss, train_in_loss=loss_inputs, eval_out_score=eval_loss, eval_in_score=eval_inputs)
out2 = out2.stack() """ ******************************************************************************************** """ ta_output = tf.TensorArray(dtype=tf.float32, size=seq_size, tensor_array_name="output_tensors") # I cant accumulate objects inside while loop so I cant use the following in graph mode # cells = [] # cells.append(tx.RNNCell(x0, n_units=H, previous_cell=None)) # use cell[0] # also the states are wrong so I must use a TensorArray to pass the states x0 = ta_input.read(0) x0 = tx.TensorLayer(x0) cell = tx.RNNCell(x0, M) ta_output = ta_output.write(0, cell.tensor) init_vars = (1, ta_output, cell.state) cond_rnn = lambda i, *_: tf.less(i, seq_size) print("creating rnn body") def rnn_unroll(i, y, state): xt = ta_input.read(i) xt = tx.TensorLayer(xt) c = cell.reuse_with(xt, previous_state=state) y = y.write(i, c.tensor) return i + 1, y, c.state
def rnn_unroll(i, y, state): xt = ta_input.read(i) xt = tx.TensorLayer(xt) c = cell.reuse_with(xt, previous_state=state) y = y.write(i, c.tensor) return i + 1, y, c.state
unique=True, range_max=vocab_size, seed=None) sampled, true_expected_count, sampled_expected_count = ( tf.stop_gradient(s) for s in sampled_values) sampled = tf.cast(sampled, tf.int64) all_ids = tf.concat([labels_flat, sampled], 0) all_ris = tx.gather_sparse(ri_tensor, all_ids) # Retrieve the true weights and the logits of the sampled weights. # weights shape is [num_classes, dim] ri_layer = tx.TensorLayer(ri_tensor, k) l = tx.Linear(ri_layer, embed_size, weight_init=tx.random_normal(0, 1), bias=True) weights = l.weights sp_values = all_ris sp_indices = tx.sparse_indices(sp_values) all_w = tf.nn.embedding_lookup_sparse( weights, sp_indices, sp_values, combiner="sum") tf.global_variables_initializer().run() print("labels flat: ", labels_flat.eval()) print("all labels: ", all_ids.eval()) print("ri_tensor \n", all_ris.eval())
W: width of the image C: number of channels of the image (ex: 3 for RGB, 1 for grayscale...) since we're processing vector representations N == batch_size == 2 H == 1 (we're working with vectors) W == input_dim == 2 C == channels == 1 NWC, channels are last we only use one """ x = tf.reshape(x_concat, [batch_size, seq_size, input_dim]) x_layer = tx.TensorLayer(x, input_dim) print(x.eval()) print(x_layer.tensor) filters = tf.get_variable("filters", shape=filter_shape, dtype=tf.float32, initializer=tf.initializers.random_uniform(-1, 1)) filters = tf.ones(filter_shape) c_layer = tx.Conv1D(x_layer, num_filters, kernel_size, shared_filters=filters,
def __init__(self, ctx_size, vocab_size, k_dim, ri_tensor: RandomIndexTensor, embed_dim, embed_init=tx.random_uniform(minval=-0.01, maxval=0.01), x_to_f_init=tx.random_uniform(minval=-0.01, maxval=0.01), logit_init=tx.random_uniform(minval=-0.01, maxval=0.01), embed_share=True, logit_bias=False, use_gate=True, use_hidden=False, h_dim=100, h_activation=tx.elu, h_init=tx.he_normal_init(), h_to_f_init=tx.random_uniform(minval=-0.01, maxval=0.01), use_dropout=True, embed_dropout=False, keep_prob=0.95, l2_loss=False, l2_loss_coef=1e-5): # GRAPH INPUTS run_inputs = tx.Input(ctx_size, dtype=tf.int32, name="input") loss_inputs = tx.Input(n_units=1, dtype=tf.int32, name="target") eval_inputs = loss_inputs # RUN GRAPH ===================================================== var_reg = [] with tf.name_scope("run"): # RI ENCODING =============================================== # convert ids to ris gather a set of random indexes based on the ids in a sequence # ri_layer = tx.TensorLayer(ri_tensor, n_units=k_dim) # ri_inputs = tx.gather_sparse(ri_layer.tensor, run_inputs.tensor) with tf.name_scope("ri_encode"): # used to compute logits if isinstance(ri_tensor, RandomIndexTensor): ri_layer = tx.TensorLayer(ri_tensor.to_sparse_tensor(), k_dim) ri_inputs = ri_tensor.gather(run_inputs.tensor) ri_inputs = ri_inputs.to_sparse_tensor() ri_inputs = tx.TensorLayer(ri_inputs, k_dim) else: ri_layer = tx.TensorLayer(ri_tensor, k_dim) ri_inputs = tx.gather_sparse(ri_layer.tensor, run_inputs.tensor) ri_inputs = tx.TensorLayer(ri_inputs, k_dim) # use those sparse indexes to lookup a set of features based on the ri values feature_lookup = tx.Lookup(ri_inputs, ctx_size, [k_dim, embed_dim], embed_init, name="lookup") var_reg.append(feature_lookup.weights) feature_lookup = feature_lookup.as_concat() # =========================================================== if use_gate or use_hidden: hl = tx.Linear(feature_lookup, h_dim, h_init, bias=True, name="h_linear") ha = tx.Activation(hl, h_activation, name="h_activation") h = tx.Compose(hl, ha, name="hidden") var_reg.append(hl.weights) features = feature_lookup if use_gate: features = tx.Gate(features, ctx_size, gate_input=h) gate = features var_reg.append(features.gate_weights) x_to_f = tx.Linear(features, embed_dim, x_to_f_init, bias=True, name="x_to_f") var_reg.append(x_to_f.weights) f_prediction = x_to_f if use_hidden: h_to_f = tx.Linear(h, embed_dim, h_to_f_init, bias=True, name="h_to_f") var_reg.append(h_to_f.weights) f_prediction = tx.Add(x_to_f, h_to_f, name="f_predicted") # RI DECODING =============================================== shared_weights = feature_lookup.weights if embed_share else None logit_init = logit_init if not embed_share else None # embedding feature vectors for all words: shape [vocab_size, embed_dim] # later, for NCE we don't need to get all the features all_embeddings = tx.Linear(ri_layer, embed_dim, logit_init, shared_weights, name="logits", bias=False) # dot product of f_predicted . all_embeddings with bias for each target word run_logits = tx.Linear(f_prediction, n_units=vocab_size, shared_weights=all_embeddings.tensor, transpose_weights=True, bias=logit_bias) if not embed_share: var_reg.append(all_embeddings.weights) # =========================================================== run_embed_prob = tx.Activation(run_logits, tx.softmax) # TRAIN GRAPH =================================================== with tf.name_scope("train"): if use_dropout and embed_dropout: feature_lookup = feature_lookup.reuse_with(ri_inputs) features = tx.Dropout(feature_lookup, probability=keep_prob) else: features = feature_lookup if use_gate or use_hidden: if use_dropout: h = h.reuse_with(features) h = tx.Dropout(h, probability=keep_prob) if use_gate: features = gate.reuse_with(features, gate_input=h) f_prediction = x_to_f.reuse_with(features) if use_hidden: h_to_f = h_to_f.reuse_with(h) if use_dropout: h_to_f = tx.Dropout(h_to_f, probability=keep_prob) f_prediction = tx.Add(f_prediction, h_to_f) else: f_prediction = f_prediction.reuse_with(features) # we already define all_embeddings from which these logits are computed before so this should be ok train_logits = run_logits.reuse_with(f_prediction) train_embed_prob = tx.Activation(train_logits, tx.softmax, name="train_output") one_hot = tx.dense_one_hot(column_indices=loss_inputs.tensor, num_cols=vocab_size) train_loss = tx.categorical_cross_entropy(one_hot, train_logits.tensor) train_loss = tf.reduce_mean(train_loss) if l2_loss: losses = [tf.nn.l2_loss(var) for var in var_reg] train_loss = train_loss + l2_loss_coef * tf.add_n(losses) # EVAL GRAPH =============================================== with tf.name_scope("eval"): one_hot = tx.dense_one_hot(column_indices=eval_inputs.tensor, num_cols=vocab_size) eval_loss = tx.categorical_cross_entropy(one_hot, run_logits.tensor) eval_loss = tf.reduce_mean(eval_loss) # SETUP MODEL CONTAINER ==================================== super().__init__(run_inputs=run_inputs, run_outputs=run_embed_prob, train_inputs=run_inputs, train_outputs=train_embed_prob, eval_inputs=run_inputs, eval_outputs=run_embed_prob, train_out_loss=train_loss, train_in_loss=loss_inputs, eval_out_score=eval_loss, eval_in_score=eval_inputs)
embed_size = 4 generator = Generator(k, s) ris = [generator.generate() for _ in range(vocab_size)] ri_tensor = RandomIndexTensor.from_ri_list(ris, k, s) sp_values = ri_tensor.gather(flat_labels).to_sparse_tensor() sp_indices = tx.sparse_indices(sp_values) print(sp_values.get_shape()) print(tensor_util.constant_value_as_shape(sp_values.dense_shape)) print(tensor_util.constant_value(sp_values.dense_shape)) print(sp_values.dense_shape[-1].eval()) print(tf.shape(sp_values).eval()) lookup = tx.Lookup(tx.TensorLayer(sp_values), seq_size=1, lookup_shape=[k, embed_size]) linear = tx.Linear(tx.TensorLayer(sp_values), n_units=k, shared_weights=lookup.weights) w = embedding_lookup_sparse(params=lookup.weights, sp_ids=sp_indices, sp_weights=sp_values, combiner="sum", partition_strategy="mod") tf.global_variables_initializer().run()
def _sampled_logits_from_parametric_noise(ri_tensors, k_dim, weights, labels, inputs, input_dim, num_true=1, partition_strategy="mod", name=None): if isinstance(weights, variables.PartitionedVariable): weights = list(weights) if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "compute_sampled_logits", weights + [inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # true_ris true_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=labels_flat) true_w = embedding_lookup_sparse(params=weights, sp_ids=tx.sparse_indices(true_ris), sp_weights=true_ris, combiner="sum", partition_strategy=partition_strategy) label_layer = tx.TensorLayer(true_w, input_dim) noise_fn = tx.FC(label_layer, 512, activation=tx.relu) noise_fn_sp = tx.ToSparse(noise_fn) noise_ris = tx.Linear(noise_fn_sp, k_dim, weight_init=tx.glorot_uniform(), bias=True) noise_ris_sp = tx.ToSparse(noise_ris) noise_w = embedding_lookup_sparse(params=weights, sp_ids=tx.sparse_indices(noise_ris_sp.tensor), sp_weights=noise_ris_sp.tensor, combiner="sum", partition_strategy=partition_strategy) noise_logits = math_ops.matmul(inputs, noise_w, transpose_b=True) dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0) true_w_e = array_ops.reshape(true_w, new_true_w_shape) row_wise_dots = math_ops.multiply(array_ops.expand_dims(inputs, 1), true_w_e) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat([[-1], dim], 0)) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, noise_logits], 1) # true_logits is a float tensor, ones_like(true_logits) is a float # tensor of ones. We then divide by num_true to ensure the per-example # labels sum to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat([ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(noise_logits) ], 1) # out_logits = out_logits * math_ops.exp(partition_const) # out_logits = out_logits / (partition_const + 1) return out_logits, out_labels
import tensorflow as tf import tensorx as tx import numpy as np import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' n_features = 3 embed_size = 4 cell_units = 2 seq_size = 3 batch_size = 2 inputs = tx.TensorLayer(np.random.random([batch_size, seq_size]), n_units=seq_size, dtype=tf.int32) lookup = tx.Lookup(inputs, seq_size=seq_size, lookup_shape=[n_features, embed_size]) seq = lookup.permute_batch_time() # first step of a sequence t1 = seq[0] ks_cell = tf.keras.layers.LSTMCell(units=cell_units) tf_cell = tf.nn.rnn_cell.LSTMCell(num_units=cell_units, state_is_tuple=True) tx_cell = tx.LSTMCell(t1, n_units=cell_units) kernel_w = [ tx_cell.w_i.weights, tx_cell.w_c.weights, tx_cell.w_f.weights, tx_cell.w_o.weights
gate = tx.Linear(h, 2, bias=True) gate = tx.Activation(gate, tx.sigmoid) # lookup might output a sequence format with [batch,seq_size,m_dim] # lookup_out = lookup.tensor lookup_out = tf.reshape(lookup.tensor, [-1, seq_size, m_dim]) # reshape works anyway gated_out = tf.reshape(lookup_out, [-1, seq_size, m_dim]) * tf.expand_dims( gate.tensor, -1) # gated_out = tf.reshape(gated_out, [-1, seq_size * m_dim]) # gated_out = tf.reshape(gated_out, [-1, lookup.n_units]) gated_out = tf.reshape(gated_out, tf.shape(lookup.tensor)) gated_out = tx.TensorLayer(gated_out, lookup.n_units) # END GATING MECHANISM y = tx.Linear(gated_out, m_dim, bias=True) ss.run(tf.global_variables_initializer()) lookup_out = lookup.tensor.eval({inputs.placeholder: w}) assert (np.shape(lookup_out) == (3, 2 * m_dim)) print(np.shape(lookup_out)) gated_out = gated_out.tensor.eval({inputs.placeholder: w}) print(np.shape(gated_out)) gate_values = gate.tensor.eval({inputs.placeholder: w})