def draw(seed): with self.cached_session(): true_classes = constant_op.constant( [[1, 2], [0, 4], [3, 3]], dtype=dtypes.int64) sampled, _, _ = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes, self.NUM_TRUE, self.NUM_SAMPLED, True, 5, seed=seed) return self.evaluate(sampled)
def _compute_sampled_logits(outfile,weights,biases,inputs,labels,num_sampled,num_classes, num_true=1,sampled_values=None,subtract_log_q=True,remove_accidental_hits=False,partition_strategy="mod",name=None): if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "compute_sampled_logits",weights + [biases, inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler(true_classes=labels,num_true=num_true,num_sampled=num_sampled,unique=True,range_max=num_classes) sampled, true_expected_count, sampled_expected_count = sampled_values all_ids = array_ops.concat(0, [labels_flat, sampled]) all_w = embedding_ops.embedding_lookup(outfile,weights, all_ids, partition_strategy=partition_strategy) all_b = embedding_ops.embedding_lookup(outfile,biases, all_ids) true_w = array_ops.slice(all_w, [0, 0], array_ops.pack([array_ops.shape(labels_flat)[0], -1])) true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat)) dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul(array_ops.expand_dims(inputs, 1),array_ops.reshape(true_w, new_true_w_shape)) dots_as_matrix = array_ops.reshape(row_wise_dots,array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b sampled_w = array_ops.slice(all_w, array_ops.pack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1]) sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits(labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape(math_ops.cast(acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat(1, [acc_indices_2d, acc_ids_2d_int32],"sparse_indices") sampled_logits_shape = array_ops.concat(0,[array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)]) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense(sparse_indices,sampled_logits_shape,acc_weights,default_value=0.0,validate_indices=False) if subtract_log_q: true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) out_logits = array_ops.concat(1, [true_logits, sampled_logits]) out_labels = array_ops.concat(1,[array_ops.ones_like(true_logits) / num_true,array_ops.zeros_like(sampled_logits)]) return out_logits, out_labels
from tensorflow.python.ops import array_ops from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import math_ops vocab_size = 5000 embd_size = 100 batch_size = 1 #randomly intialize context feature batch size 1 inputs = tf.Variable(tf.truncated_normal([batch_size, embd_size], stddev=1.0 / math.sqrt(embd_size))) labels = tf.Variable(tf.constant([[23]], dtype=tf.int64)) sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=1, num_sampled=5, unique=True, range_max=vocab_size, seed=None) sampled_id, true_expected_count, sampled_expected_count = sampled_values labels_flat = array_ops.reshape(labels, [-1]) all_ids = array_ops.concat([labels_flat, sampled_id], 0) weights = tf.Variable(tf.truncated_normal([vocab_size, embd_size], stddev=1.0 / math.sqrt(embd_size))) biases = tf.Variable(tf.zeros([vocab_size])) all_w = embedding_ops.embedding_lookup(weights, all_ids) all_b = embedding_ops.embedding_lookup(biases, all_ids)
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None): """Helper function for nce_loss and sampled_softmax_loss functions. Computes sampled output training logits and labels suitable for implementing e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see sampled_softmax_loss). Note: In the case where num_true > 1, we assign to each target class the target probability 1 / num_true so that the target probabilities sum to 1 per-example. Args: weights: A `Tensor` of shape `[num_classes, dim]`, or a list of `Tensor` objects whose concatenation along dimension 0 has shape `[num_classes, dim]`. The (possibly-partitioned) class embeddings. biases: A `Tensor` of shape `[num_classes]`. The class biases. inputs: A `Tensor` of shape `[batch_size, dim]`. The forward activations of the input network. labels: A `Tensor` of type `int64` and shape `[batch_size, num_true]`. The target classes. Note that this format differs from the `labels` argument of `nn.softmax_cross_entropy_with_logits`. num_sampled: An `int`. The number of classes to randomly sample per batch. num_classes: An `int`. The number of possible classes. num_true: An `int`. The number of target classes per training example. sampled_values: a tuple of (`sampled_candidates`, `true_expected_count`, `sampled_expected_count`) returned by a `*_candidate_sampler` function. (if None, we default to `log_uniform_candidate_sampler`) subtract_log_q: A `bool`. whether to subtract the log expected count of the labels in the sample to get the logits of the true labels. Default is True. Turn off for Negative Sampling. remove_accidental_hits: A `bool`. whether to remove "accidental hits" where a sampled class equals one of the target classes. Default is False. partition_strategy: A string specifying the partitioning strategy, relevant if `len(weights) > 1`. Currently `"div"` and `"mod"` are supported. Default is `"mod"`. See `tf.nn.embedding_lookup` for more details. name: A name for the operation (optional). Returns: out_logits, out_labels: `Tensor` objects each with shape `[batch_size, num_true + num_sampled]`, for passing to either `nn.sigmoid_cross_entropy_with_logits` (NCE) or `nn.softmax_cross_entropy_with_logits` (sampled softmax). """ if not isinstance(weights, list): weights = [weights] with ops.op_scope( weights + [biases, inputs, labels], name, "compute_sampled_logits"): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = sampled_values # pylint: enable=unpacking-non-sequence # labels_flat is a [batch_size * num_true] tensor # sampled is a [num_sampled] int tensor all_ids = array_ops.concat(0, [labels_flat, sampled]) # weights shape is [num_classes, dim] all_w = embedding_ops.embedding_lookup( weights, all_ids, partition_strategy=partition_strategy) all_b = embedding_ops.embedding_lookup(biases, all_ids) # true_w shape is [batch_size * num_true, dim] # true_b is a [batch_size * num_true] tensor true_w = array_ops.slice( all_w, [0, 0], array_ops.pack([array_ops.shape(labels_flat)[0], -1])) true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat)) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b # Lookup weights and biases for sampled labels. # sampled_w shape is [num_sampled, dim] # sampled_b is a [num_sampled] float tensor sampled_w = array_ops.slice( all_w, array_ops.pack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # sampled_b has shape [num_sampled] # Apply X*W'+B, which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape(math_ops.cast( acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat( 1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat( 0, [array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)]) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat(1, [true_logits, sampled_logits]) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat( 1, [array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits)]) return out_logits, out_labels
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, name=None): """Helper function for nce_loss and sampled_softmax_loss functions. Computes sampled output training logits and labels suitable for implementing e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see sampled_softmax_loss). Note: In the case where num_true > 1, we assign to each target class the target probability 1 / num_true so that the target probabilities sum to 1 per-example. Args: weights: tensor of label embeddings with shape = [num_classes, dim] biases: tensor of num_classes label biases inputs: tensor with shape = [batch_size, dim] corresponding to forward activations of the input network labels: int tensor with shape [batch_size, num_true] num_sampled: number of label classes to sample per batch num_classes: number of possible label classes in the data (e.g. vocab size) num_true: number of target classes per example (default: 1) sampled_values: a tuple of (sampled_candidates, true_expected_count, sampled_expected_count) returned by a *CandidateSampler function to use (if None, we default to LogUniformCandidateSampler) subtract_log_q: subtract the log expected count of the labels in the sample to get the logits of the true labels (default: True) Turn off for Negative Sampling. remove_accidental_hits: whether to remove "accidental hits" where a sampled label equals the true labels (bool, default: False) name: name for this op Returns: out_logits, out_labels: tensors with shape [batch_size, num_true + num_sampled] for passing to either SigmoidCrossEntropyWithLogits (NCE) or SoftmaxCrossEntropyWithLogits (sampled softmax). """ with ops.op_scope([weights, biases, inputs, labels], name, "compute_sampled_logits"): if labels.dtype != types.int64: labels = math_ops.cast(labels, types.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: num_sampled vector # true_expected_count shape = [batch_size, 1] # sampled_expected_count shape = num_sampled vector if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = sampled_values # pylint: enable=unpacking-non-sequence # weights shape is [num_classes, dim] # labels_flat is a [batch_size * num_true] vector # true_w shape is [batch_size * num_true, dim] # true_b is a [batch_size * num_true] vector true_w = embedding_ops.embedding_lookup(weights, labels_flat) true_b = embedding_ops.embedding_lookup(biases, labels_flat) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b # Lookup weights and biases for sampled labels. # sampled is a num_sampled int vector # sampled_w shape is [num_sampled, dim] # sampled_b is a num_sampled float vector sampled_w = embedding_ops.embedding_lookup(weights, sampled) sampled_b = embedding_ops.embedding_lookup(biases, sampled) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # sampled_b has shape [num_sampled] # Apply X*W'+B, which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape( math_ops.cast(acc_ids, types.int32), [-1, 1]) sparse_indices = array_ops.concat( 1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat(0, [ array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0) ]) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, 0.0) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat(1, [true_logits, sampled_logits]) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat(1, [ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits) ]) return out_logits, out_labels
def build(self): embed = tf.nn.embedding_lookup(self.embeddings, self.context) norm = tf.sqrt(tf.reduce_sum(tf.square(embed), 2, keepdims=False)) normalized_embed = embed / tf.tile( tf.expand_dims(norm, 2), multiples=[1, 1, self.config["embedding_size"]]) normalized_embed_ket = tf.expand_dims(normalized_embed, -1) normalized_embed_bra = tf.transpose(normalized_embed_ket, perm=[0, 1, 3, 2]) outer_product = tf.matmul(normalized_embed_ket, normalized_embed_bra) softmax_norm = tf.nn.softmax(norm, 1) expand_norm = tf.expand_dims(tf.expand_dims(softmax_norm, 2), 3) density_matrix = tf.reduce_sum(outer_product * expand_norm, 1) #[None, dim, dim] with tf.name_scope('loss'): target_expand = tf.expand_dims( math_ops.cast(self.target, dtypes.int64), 1) # target_flat = array_ops.reshape(target_expand, [-1]) sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=target_expand, num_true=self.config["num_true"], num_sampled=self.config["num_sampled"], unique=True, range_max=self.config["vocabulary_size"]) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = ( array_ops.stop_gradient(s) for s in sampled_values) sampled = math_ops.cast(sampled, dtypes.int64) #[64] # all_ids = array_ops.concat([target, sampled], 0) sampled_embedding = tf.nn.embedding_lookup(self.embeddings, sampled) sampled_embedding_ket = tf.expand_dims(sampled_embedding, 2) sampled_embedding_bra = tf.transpose(sampled_embedding_ket, perm=[0, 2, 1]) sampled_embedding_outer_product = tf.matmul( sampled_embedding_ket, sampled_embedding_bra) #[num_sampled, dim, dim] density_matrix_reshape = tf.reshape(density_matrix, [ -1, self.config["embedding_size"] * self.config["embedding_size"] ]) sampled_embedding_outer_product_reshape_transpose = tf.transpose( tf.reshape(sampled_embedding_outer_product, [ -1, self.config["embedding_size"] * self.config["embedding_size"] ]), perm=[1, 0]) negative_sample_prob = tf.matmul( density_matrix_reshape, sampled_embedding_outer_product_reshape_transpose) #[None, 64] true_embedding = tf.nn.embedding_lookup(self.embeddings, self.target) true_embedding_ket = tf.expand_dims(true_embedding, 2) true_embedding_bra = tf.transpose(true_embedding_ket, perm=[0, 2, 1]) true_embedding_outer_product = tf.matmul( true_embedding_ket, true_embedding_bra) #[None, dim, dim] true_prob = tf.trace( tf.matmul(true_embedding_outer_product, density_matrix)) true_prob_expand = tf.expand_dims(true_prob, 1) #[None, 1] out_labels = array_ops.concat([ array_ops.ones_like(true_prob_expand) / self.config["num_true"], array_ops.zeros_like(negative_sample_prob) ], 1) out_logits = array_ops.concat( [true_prob_expand, negative_sample_prob], 1) #[None, 1+num_sampled] self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=out_labels, logits=out_logits)) tf.summary.scalar('loss', self.loss) with tf.name_scope('optimizer'): self.optimizer = tf.train.GradientDescentOptimizer(1.0).minimize( self.loss) # Compute the cosine similarity between minibatch examples and all embeddings. valid_embeddings = tf.nn.embedding_lookup( self.global_normalized_embedding, self.valid_dataset) self.similarity = tf.matmul(valid_embeddings, self.global_normalized_embedding, transpose_b=True) # Merge all summaries. self.merged = tf.summary.merge_all()
def _compute_sampled_logits(self, weights, biases, labels, inputs, num_sampled, num_classes, transmissibility, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None, seed=None): if isinstance(weights, variables.PartitionedVariable): weights = list(weights) if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "compute_sampled_logits", weights + [biases, inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) if labels.shape.ndims == 1: labels = array_ops.expand_dims(labels, -1) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor # num_sampled 字典大小 if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes, seed=seed) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = ( array_ops.stop_gradient(s) for s in sampled_values) # pylint: enable=unpacking-non-sequence sampled = math_ops.cast(sampled, dtypes.int64) # labels_flat is a [batch_size * num_true] tensor # sampled is a [num_sampled] int tensor all_ids = array_ops.concat([labels_flat, sampled], 0) # Retrieve the true weights and the logits of the sampled weights. # weights shape is [num_classes, dim] # 128个相似节点对和 5个非相似节点(也就是128*5个非相似节点对) all_w = embedding_ops.embedding_lookup( weights, all_ids, partition_strategy=partition_strategy) # true_w shape is [batch_size * num_true, dim] - > 128 * 100 true_w = array_ops.slice( all_w, [0, 0], array_ops.stack([array_ops.shape(labels_flat)[0], -1])) # 5 * 100 sampled_w = array_ops.slice( all_w, array_ops.stack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # Apply X*W', which yields [batch_size, num_sampled] # 128个输入节点分别和这5个非相似节点,进行比较, 128 * 5, 表示节点a和节点b的相似度. sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) # Retrieve the true and sampled biases, compute the true logits, and # add the biases to the true and sampled logits. all_b = embedding_ops.embedding_lookup( biases, all_ids, partition_strategy=partition_strategy) # true_b is a [batch_size * num_true] tensor # sampled_b is a [num_sampled] float tensor true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat)) sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1]) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0) row_wise_dots = math_ops.multiply( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape( row_wise_dots, array_ops.concat([[-1], dim], 0)) true_logits = array_ops.reshape(self._sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) # 相似节点对,对比结果是128*1;非相似节点对,对比结果是128*5 true_logits += true_b sampled_logits += sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape( math_ops.cast(acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat( [acc_indices_2d, acc_ids_2d_int32], 1, "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat([ array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0) ], 0) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) # true_logits is a float tensor, ones_like(true_logits) is a float # tensor of ones. We then divide by num_true to ensure the per-example # labels sum to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat( [ transmissibility, # array_ops.ones_like(true_logits) / num_true, # array_ops.zeros_like(sampled_logits) ], 1) return out_logits, out_labels
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, name=None): """Helper function for nce_loss and sampled_softmax_loss functions. Computes sampled output training logits and labels suitable for implementing e.g. noise-contrastive estimation (see nce_loss) or sampled softmax (see sampled_softmax_loss). Note: In the case where num_true > 1, we assign to each target class the target probability 1 / num_true so that the target probabilities sum to 1 per-example. Args: weights: tensor of label embeddings with shape = [num_classes, dim] biases: tensor of num_classes label biases inputs: tensor with shape = [batch_size, dim] corresponding to forward activations of the input network labels: int tensor with shape [batch_size, num_true] num_sampled: number of label classes to sample per batch num_classes: number of possible label classes in the data (e.g. vocab size) num_true: number of target classes per example (default: 1) sampled_values: a tuple of (sampled_candidates, true_expected_count, sampled_expected_count) returned by a *CandidateSampler function to use (if None, we default to LogUniformCandidateSampler) subtract_log_q: subtract the log expected count of the labels in the sample to get the logits of the true labels (default: True) Turn off for Negative Sampling. remove_accidental_hits: whether to remove "accidental hits" where a sampled label equals the true labels (bool, default: False) name: name for this op Returns: out_logits, out_labels: tensors with shape [batch_size, num_true + num_sampled] for passing to either SigmoidCrossEntropyWithLogits (NCE) or SoftmaxCrossEntropyWithLogits (sampled softmax). """ with ops.op_scope( [weights, biases, inputs, labels], name, "compute_sampled_logits"): if labels.dtype != types.int64: labels = math_ops.cast(labels, types.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: num_sampled vector # true_expected_count shape = [batch_size, 1] # sampled_expected_count shape = num_sampled vector if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = sampled_values # pylint: enable=unpacking-non-sequence # weights shape is [num_classes, dim] # labels_flat is a [batch_size * num_true] vector # true_w shape is [batch_size * num_true, dim] # true_b is a [batch_size * num_true] vector true_w = embedding_ops.embedding_lookup(weights, labels_flat) true_b = embedding_ops.embedding_lookup(biases, labels_flat) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul( array_ops.expand_dims(inputs, 1), array_ops.reshape(true_w, new_true_w_shape)) # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b # Lookup weights and biases for sampled labels. # sampled is a num_sampled int vector # sampled_w shape is [num_sampled, dim] # sampled_b is a num_sampled float vector sampled_w = embedding_ops.embedding_lookup(weights, sampled) sampled_b = embedding_ops.embedding_lookup(biases, sampled) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # sampled_b has shape [num_sampled] # Apply X*W'+B, which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape(math_ops.cast( acc_ids, types.int32), [-1, 1]) sparse_indices = array_ops.concat( 1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat( 0, [array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0)]) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, 0.0) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat(1, [true_logits, sampled_logits]) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat( 1, [array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits)]) return out_logits, out_labels
def _compute_sampled_logits(embedding, biases, labels, inputs, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None): if not isinstance(labels, list): labels = [labels] if not isinstance(num_classes, list): num_classes = [num_classes] with ops.name_scope(name, "compute_sampled_logits", [embedding, biases, inputs, labels]): for i in range(len(labels)): if labels[i].dtype != dtypes.int64: labels[i] = math_ops.cast(labels[i], dtypes.int64) labels_flat = [array_ops.reshape(label, [-1]) for label in labels] if sampled_values is None: sampled_values = [ candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=true_classes, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=range_max) for true_classes, range_max in zip(labels, num_classes) ] true_w = embedding(*labels_flat) true_logits = _compute_true_logits(inputs, true_w, num_true) sampled_w = embedding(*[sample[0] for sample in sampled_values]) sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) true_b = biases(*labels_flat) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b sampled_b = biases(*[sample[0] for sample in sampled_values]) sampled_logits += sampled_b if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.add_n( [math_ops.log(sample[1]) for sample in sampled_values]) sampled_logits -= math_ops.add_n( [math_ops.log(sample[2]) for sample in sampled_values]) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat([true_logits, sampled_logits], 1) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat([ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits) ], 1) return out_logits, out_labels
def build(self, weights=None): phase_part, amplitude_part = self.getEmbedding(self.context) real, imag = self.transform(amplitude_part, phase_part) norm = tf.sqrt( tf.reduce_sum(tf.square(amplitude_part), 2, keepdims=False)) softmax_norm = tf.nn.softmax(norm, 1) density_real, density_imag = self.outer_product(real, imag) # if weights == None: # density_real_mixture, density_imag_mixture = tf.reduce_mean(density_real,axis=1), tf.reduce_mean(density_imag,axis=1) # else: expand_norm = tf.expand_dims(tf.expand_dims(softmax_norm, 2), 3) density_real_mixture, density_imag_mixture = [ tf.reduce_sum(item * expand_norm, 1) for item in (density_real, density_imag) ] with tf.name_scope('loss'): target_expand = tf.expand_dims( math_ops.cast(self.target, dtypes.int64), 1) # target_flat = array_ops.reshape(target_expand, [-1]) sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=target_expand, num_true=self.config["num_true"], num_sampled=self.config["num_sampled"], unique=True, range_max=self.config["vocabulary_size"]) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = ( array_ops.stop_gradient(s) for s in sampled_values) sampled = math_ops.cast(sampled, dtypes.int64) # all_ids = array_ops.concat([target, sampled], 0) sampled_real_embedding, sampled_imag_embedding = self.getEmbedding( sampled) sampled_amplitude_embedding, sampled_phase_embedding = self.transform( sampled_real_embedding, sampled_imag_embedding) sampled_real_outer, sampled_imag_outer = self.outer_product( sampled_amplitude_embedding, sampled_phase_embedding) negative_sample_prob = self.inner_product( [density_real_mixture, density_imag_mixture], [sampled_real_outer, sampled_imag_outer]) # projection dirac multipiltion true_amplitude_embedding, true_phase_embedding = self.getEmbedding( self.target) true_real_embedding, true_imag_embedding = self.transform( true_amplitude_embedding, true_phase_embedding) true_real_outer, true_imag_outer = self.outer_product( true_real_embedding, true_imag_embedding) true_prob = tf.trace( tf.matmul(density_real_mixture, true_real_outer)) + tf.trace( tf.matmul(density_imag_mixture, true_imag_outer)) # projection-wise multipiltion true_prob_expand = tf.expand_dims(true_prob, 1) out_labels = array_ops.concat([ array_ops.ones_like(true_prob_expand) / self.config["num_true"], array_ops.zeros_like(negative_sample_prob) ], 1) out_logits = array_ops.concat( [true_prob_expand, negative_sample_prob], 1) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=out_labels, logits=out_logits)) tf.summary.scalar('loss', self.loss) with tf.name_scope('optimizer'): self.optimizer = tf.train.GradientDescentOptimizer(1.0).minimize( self.loss) # Compute the cosine similarity between minibatch examples and all embeddings. valid_embeddings = tf.nn.embedding_lookup( self.global_normalized_embedding, self.valid_dataset) self.similarity = tf.matmul(valid_embeddings, self.global_normalized_embedding, transpose_b=True) # Merge all summaries. self.merged = tf.summary.merge_all()
def _compute_sampled_logits(weights, biases, inputs, labels, num_sampled, num_classes, num_true=1, sampled_values=None, subtract_log_q=True, remove_accidental_hits=False, partition_strategy="mod", name=None): if not isinstance(weights, list): weights = [weights] with ops.name_scope(name, "compute_sampled_logits", weights + [biases, inputs, labels]): if labels.dtype != dtypes.int64: labels = math_ops.cast(labels, dtypes.int64) labels_flat = array_ops.reshape(labels, [-1]) # Sample the negative labels. # sampled shape: [num_sampled] tensor # true_expected_count shape = [batch_size, 1] tensor # sampled_expected_count shape = [num_sampled] tensor if sampled_values is None: sampled_values = candidate_sampling_ops.log_uniform_candidate_sampler( true_classes=labels, num_true=num_true, num_sampled=num_sampled, unique=True, range_max=num_classes) # NOTE: pylint cannot tell that 'sampled_values' is a sequence # pylint: disable=unpacking-non-sequence sampled, true_expected_count, sampled_expected_count = sampled_values # pylint: enable=unpacking-non-sequence # labels_flat is a [batch_size * num_true] tensor # sampled is a [num_sampled] int tensor all_ids = array_ops.concat(0, [labels_flat, sampled]) # weights shape is [num_classes, dim] all_w = embedding_ops.embedding_lookup( weights, all_ids, partition_strategy=partition_strategy) all_b = embedding_ops.embedding_lookup(biases, all_ids) # true_w shape is [batch_size * num_true, dim] # true_b is a [batch_size * num_true] tensor true_w = array_ops.slice(all_w, [0, 0], array_ops.pack( [array_ops.shape(labels_flat)[0], -1])) # 128*128 true_b = array_ops.slice(all_b, [0], array_ops.shape(labels_flat)) # inputs shape is [batch_size, dim] # true_w shape is [batch_size * num_true, dim] # row_wise_dots is [batch_size, num_true, dim] dim = array_ops.shape(true_w)[1:2] new_true_w_shape = array_ops.concat(0, [[-1, num_true], dim]) row_wise_dots = math_ops.mul( array_ops.expand_dims(inputs, 1), # 128*1*128 array_ops.reshape(true_w, new_true_w_shape)) # 128*1*128 # We want the row-wise dot plus biases which yields a # [batch_size, num_true] tensor of true_logits. dots_as_matrix = array_ops.reshape(row_wise_dots, array_ops.concat(0, [[-1], dim])) true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true]) true_b = array_ops.reshape(true_b, [-1, num_true]) true_logits += true_b # Lookup weights and biases for sampled labels. # sampled_w shape is [num_sampled, dim] # sampled_b is a [num_sampled] float tensor sampled_w = array_ops.slice( all_w, array_ops.pack([array_ops.shape(labels_flat)[0], 0]), [-1, -1]) sampled_b = array_ops.slice(all_b, array_ops.shape(labels_flat), [-1]) # inputs has shape [batch_size, dim] # sampled_w has shape [num_sampled, dim] # sampled_b has shape [num_sampled] # Apply X*W'+B, which yields [batch_size, num_sampled] sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True) + sampled_b if remove_accidental_hits: acc_hits = candidate_sampling_ops.compute_accidental_hits( labels, sampled, num_true=num_true) acc_indices, acc_ids, acc_weights = acc_hits # This is how SparseToDense expects the indices. acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1]) acc_ids_2d_int32 = array_ops.reshape( math_ops.cast(acc_ids, dtypes.int32), [-1, 1]) sparse_indices = array_ops.concat( 1, [acc_indices_2d, acc_ids_2d_int32], "sparse_indices") # Create sampled_logits_shape = [batch_size, num_sampled] sampled_logits_shape = array_ops.concat(0, [ array_ops.shape(labels)[:1], array_ops.expand_dims(num_sampled, 0) ]) if sampled_logits.dtype != acc_weights.dtype: acc_weights = math_ops.cast(acc_weights, sampled_logits.dtype) sampled_logits += sparse_ops.sparse_to_dense( sparse_indices, sampled_logits_shape, acc_weights, default_value=0.0, validate_indices=False) if subtract_log_q: # Subtract log of Q(l), prior probability that l appears in sampled. true_logits -= math_ops.log(true_expected_count) sampled_logits -= math_ops.log(sampled_expected_count) # Construct output logits and labels. The true labels/logits start at col 0. out_logits = array_ops.concat(1, [true_logits, sampled_logits]) # true_logits is a float tensor, ones_like(true_logits) is a float tensor # of ones. We then divide by num_true to ensure the per-example labels sum # to 1.0, i.e. form a proper probability distribution. out_labels = array_ops.concat(1, [ array_ops.ones_like(true_logits) / num_true, array_ops.zeros_like(sampled_logits) ]) return out_logits, out_labels