Exemplo n.º 1
0
def test_to_sparse():
    c = [[1, 0], [2, 3]]

    sparse_tensor = tx.to_sparse(c)

    dense_shape = tf.shape(c, out_type=tf.int64)
    indices = tf.where(tf.not_equal(c, 0))

    flat_values = tf.reshape(c, [-1])
    flat_indices = tf.where(tf.not_equal(flat_values, 0))
    flat_indices = tf.squeeze(flat_indices)
    flat_indices = tf.math.mod(flat_indices, dense_shape[1])

    values = tf.gather_nd(c, indices)

    sp_indices = tx.sparse_indices(sparse_tensor)

    assert tx.tensor_equal(sparse_tensor.indices, indices)
    assert tx.tensor_equal(sp_indices.values, flat_indices)
    assert tx.tensor_equal(sparse_tensor.values, values)
Exemplo n.º 2
0
labels = [[0, 1], [2, 3]]

flat_labels = tf.reshape(labels, [-1])

vocab_size = 1000
k = 100
s = 2
embed_size = 4

generator = Generator(k, s)
ris = [generator.generate() for _ in range(vocab_size)]
ri_tensor = RandomIndexTensor.from_ri_list(ris, k, s)

sp_values = ri_tensor.gather(flat_labels).to_sparse_tensor()
sp_indices = tx.sparse_indices(sp_values)

print(sp_values.get_shape())
print(tensor_util.constant_value_as_shape(sp_values.dense_shape))
print(tensor_util.constant_value(sp_values.dense_shape))
print(sp_values.dense_shape[-1].eval())
print(tf.shape(sp_values).eval())

lookup = tx.Lookup(tx.TensorLayer(sp_values),
                   seq_size=1,
                   lookup_shape=[k, embed_size])

linear = tx.Linear(tx.TensorLayer(sp_values),
                   n_units=k,
                   shared_weights=lookup.weights)
Exemplo n.º 3
0
print("adaptive sample: ", tf.shape(noise_logits.tensor).eval())

print("[noise sample shape] {}".format(tf.shape(sampled).eval()))

labels_flat = array_ops.reshape(input_labels.tensor, [-1])

true_ris = tx.gather_sparse(sp_tensor=ri_tensor, ids=labels_flat)
noise_ris = tx.gather_sparse(sp_tensor=ri_tensor, ids=sampled)

print("----")
print("[true_ri shape] {}".format(tf.shape(true_ris).eval()))
print("[noise_ri shape] {}".format(tf.shape(noise_ris).eval()))
print("----")

true_w = embedding_lookup_sparse(params=lookup.weights,
                                 sp_ids=tx.sparse_indices(true_ris),
                                 sp_weights=true_ris,
                                 combiner="sum",
                                 partition_strategy="mod")

noise_w = embedding_lookup_sparse(params=lookup.weights,
                                  sp_ids=tx.sparse_indices(noise_ris),
                                  sp_weights=noise_ris,
                                  combiner="sum",
                                  partition_strategy="mod")

print("[true_w shape] {}".format(tf.shape(true_w).eval()))
print("[noise_w shape] {}".format(tf.shape(noise_w).eval()))
print("----")
# *************************************
#   LOGITS
Exemplo n.º 4
0
r1 = tf.matmul(A, B, transpose_b=True)

rs = tf.sparse_tensor_dense_matmul(C, A, adjoint_b=True)
rs = tf.transpose(rs)

D = tf.sparse_tensor_to_dense(C)
r2 = tf.sparse_matmul(A, B, transpose_b=True, b_is_sparse=True)

sess = tf.Session()

Cd = tf.sparse_tensor_to_dense(C)
r3 = tf.matmul(A, Cd, b_is_sparse=True, transpose_b=True)

#Ct = tf.sparse_transpose(C)
Ct = C
Ci = tx.sparse_indices(Ct)
r4 = tf.nn.embedding_lookup_sparse(tf.transpose(A),
                                   sp_ids=Ci,
                                   sp_weights=Ct,
                                   combiner="sum")

print(sess.run(r1))
print("=" * 40)
print(sess.run(rs))
print("=" * 40)
print(sess.run(r2))
print("=" * 40)
print(sess.run(r3))
print("=" * 40)
print(sess.run(r4))
Exemplo n.º 5
0
def _compute_random_ri_sampled_logits(ri_tensors,
                                      k_dim,
                                      s_active,
                                      weights,
                                      labels,
                                      inputs,
                                      num_sampled,
                                      num_true=1,
                                      subtract_log_q=True,
                                      partition_strategy="mod",
                                      name=None,
                                      seed=None):
    """ Random Random Index Sampled Logits with negative sampling

    https://arxiv.org/pdf/1410.8251.pdf

    Computes the sampled logits from the space of all possible random indexes.
    Since any random index is possible, we sample, not from the existing random indexes
    but from the space of possible random indexes so that the model learns which combinations
    of bases are NOT the ones used to predict a given feature.

    Args:
        ri_tensors:
        k_dim:
        s_active:
        weights:
        labels:
        inputs:
        num_sampled:
        sampled_values:
        num_true:
        subtract_log_q:
        remove_accidental_hits:
        partition_strategy:
        name:
        seed:

    Returns:

    """
    if isinstance(weights, variables.PartitionedVariable):
        weights = list(weights)
    if not isinstance(weights, list):
        weights = [weights]

    with ops.name_scope(name, "random_ri_sampled_logits",
                        weights + [inputs, labels]):
        if labels.dtype != dtypes.int64:
            labels = math_ops.cast(labels, dtypes.int64)
        labels_flat = array_ops.reshape(labels, [-1])

        true_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=labels_flat)
        sampled_ris, expected_true_ris, expected_sampled_ris = sample_ri(k_dim, s_active, num_sampled, true_ris)

        all_ris = sparse_ops.sparse_concat(axis=0, sp_inputs=[true_ris, sampled_ris])

        sp_values = all_ris
        sp_indices = tx.sparse_indices(sp_values)

        # Retrieve the weights

        # weights shape is [num_classes, dim]
        all_w = embedding_lookup_sparse(
            weights, sp_indices, sp_values, combiner="sum", partition_strategy=partition_strategy)

        # true_w shape is [batch_size * num_true, dim]
        true_w = array_ops.slice(all_w, [0, 0],
                                 array_ops.stack(
                                     [array_ops.shape(labels_flat)[0], -1]))

        sampled_w = array_ops.slice(
            all_w, array_ops.stack([array_ops.shape(labels_flat)[0], 0]), [-1, -1])
        # inputs has shape [batch_size, dim]
        # sampled_w has shape [num_sampled, dim]
        # Apply X*W', which yields [batch_size, num_sampled]
        sampled_logits = math_ops.matmul(inputs, sampled_w, transpose_b=True)

        dim = array_ops.shape(true_w)[1:2]
        new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0)
        row_wise_dots = math_ops.multiply(
            array_ops.expand_dims(inputs, 1),
            array_ops.reshape(true_w, new_true_w_shape))
        # We want the row-wise dot plus biases which yields a
        # [batch_size, num_true] tensor of true_logits.
        dots_as_matrix = array_ops.reshape(row_wise_dots,
                                           array_ops.concat([[-1], dim], 0))
        true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true])

        if subtract_log_q:
            # Subtract log of Q(l), prior probability that label appears in sampled.
            true_logits -= math_ops.log(expected_true_ris)
            sampled_logits -= math_ops.log(expected_sampled_ris)

        # Construct output logits and labels. The true labels/logits start at col 0.
        out_logits = array_ops.concat([true_logits, sampled_logits], 1)

        # true_logits is a float tensor, ones_like(true_logits) is a float
        # tensor of ones. We then divide by num_true to ensure the per-example
        # labels sum to 1.0, i.e. form a proper probability distribution.
        out_labels = array_ops.concat([
            array_ops.ones_like(true_logits) / num_true,
            array_ops.zeros_like(sampled_logits)
        ], 1)

        return out_logits, out_labels
Exemplo n.º 6
0
def _compute_sampled_logits(ri_tensors,
                            weights,
                            bias,
                            labels,
                            partition_const,
                            inputs,
                            num_sampled,
                            num_classes,
                            num_true=1,
                            sampled_values=None,
                            subtract_log_q=True,
                            remove_accidental_hits=False,
                            partition_strategy="mod",
                            name=None,
                            seed=None):
    if isinstance(weights, variables.PartitionedVariable):
        weights = list(weights)
    if not isinstance(weights, list):
        weights = [weights]

    with ops.name_scope(name, "compute_sampled_logits",
                        weights + [inputs, labels]):
        if labels.dtype != dtypes.int64:
            labels = math_ops.cast(labels, dtypes.int64)
        labels_flat = array_ops.reshape(labels, [-1])

        # Sample the negative labels.
        #   sampled shape: [num_sampled] tensor
        #   true_expected_count shape = [batch_size, 1] tensor
        #   sampled_expected_count shape = [num_sampled] tensor
        if sampled_values is None:
            sampled_values = candidate_sampling_ops.uniform_candidate_sampler(
                true_classes=labels,
                num_true=num_true,
                num_sampled=num_sampled,
                unique=True,
                range_max=num_classes,
                seed=seed)
        # NOTE: pylint cannot tell that 'sampled_values' is a sequence
        # pylint: disable=unpacking-non-sequence
        sampled, true_expected_count, sampled_expected_count = (
            array_ops.stop_gradient(s) for s in sampled_values)
        # pylint: enable=unpacking-non-sequence
        sampled = math_ops.cast(sampled, dtypes.int64)

        # labels_flat is a [batch_size * num_true] tensor
        # sampled is a [num_sampled] int tensor
        all_ids = array_ops.concat([labels_flat, sampled], 0)

        # true_ris
        true_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=labels_flat)
        sampled_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=sampled)

        true_w = embedding_lookup_sparse(params=weights,
                                         sp_ids=tx.sparse_indices(true_ris),
                                         sp_weights=true_ris,
                                         combiner="sum",
                                         partition_strategy=partition_strategy)

        noise_w = embedding_lookup_sparse(params=weights,
                                          sp_ids=tx.sparse_indices(sampled_ris),
                                          sp_weights=sampled_ris,
                                          combiner="sum",
                                          partition_strategy=partition_strategy)

        if bias is not None:
            sampled_b = embedding_lookup_sparse(
                params=bias,
                sp_ids=tx.sparse_indices(sampled_ris),
                sp_weights=sampled_ris,
                combiner="sum",
                partition_strategy=partition_strategy)

            true_b = embedding_lookup_sparse(
                params=bias,
                sp_ids=tx.sparse_indices(true_ris),
                sp_weights=true_ris,
                combiner="sum",
                partition_strategy=partition_strategy)

        noise_logits = math_ops.matmul(inputs, noise_w, transpose_b=True)

        dim = array_ops.shape(true_w)[1:2]
        new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0)
        true_w_e = array_ops.reshape(true_w, new_true_w_shape)

        row_wise_dots = math_ops.multiply(array_ops.expand_dims(inputs, 1),
                                          true_w_e)
        # We want the row-wise dot plus biases which yields a
        # [batch_size, num_true] tensor of true_logits.
        dots_as_matrix = array_ops.reshape(row_wise_dots,
                                           array_ops.concat([[-1], dim], 0))
        true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true])

        if bias is not None:
            true_b = array_ops.reshape(true_b, [-1, num_true])
            true_logits += true_b
            noise_logits += sampled_b

        # TODO  need to review how to do this Z
        # true_logits = true_logits * math_ops.exp(partition_const)

        if remove_accidental_hits:
            acc_hits = candidate_sampling_ops.compute_accidental_hits(
                labels, sampled, num_true=num_true)
            acc_indices, acc_ids, acc_weights = acc_hits

            # This is how SparseToDense expects the indices.
            acc_indices_2d = array_ops.reshape(acc_indices, [-1, 1])
            acc_ids_2d_int32 = array_ops.reshape(
                math_ops.cast(acc_ids, dtypes.int32), [-1, 1])
            sparse_indices = array_ops.concat([acc_indices_2d, acc_ids_2d_int32], 1,
                                              "sparse_indices")
            # Create sampled_logits_shape = [batch_size, num_sampled]
            sampled_logits_shape = array_ops.concat(
                [array_ops.shape(labels)[:1],
                 array_ops.expand_dims(num_sampled, 0)], 0)
            if noise_logits.dtype != acc_weights.dtype:
                acc_weights = math_ops.cast(acc_weights, noise_logits.dtype)
            noise_logits += sparse_ops.sparse_to_dense(
                sparse_indices,
                sampled_logits_shape,
                acc_weights,
                default_value=0.0,
                validate_indices=False)

        if subtract_log_q:
            # Subtract log of Q(l), prior probability that l appears in sampled.
            true_logits -= math_ops.log(true_expected_count)
            noise_logits -= math_ops.log(sampled_expected_count)

        # Construct output logits and labels. The true labels/logits start at col 0.
        out_logits = array_ops.concat([true_logits, noise_logits], 1)

        # true_logits is a float tensor, ones_like(true_logits) is a float
        # tensor of ones. We then divide by num_true to ensure the per-example
        # labels sum to 1.0, i.e. form a proper probability distribution.
        out_labels = array_ops.concat([
            array_ops.ones_like(true_logits) / num_true,
            array_ops.zeros_like(noise_logits)
        ], 1)

        # out_logits = math_ops.div(out_logits,math_ops.exp(partition_const))
        # out_logits = out_logits / (partition_const + 1)
        return out_logits, out_labels
Exemplo n.º 7
0
def _sampled_logits_from_parametric_noise(ri_tensors,
                                          k_dim,
                                          weights,
                                          labels,
                                          inputs,
                                          input_dim,
                                          num_true=1,
                                          partition_strategy="mod",
                                          name=None):
    if isinstance(weights, variables.PartitionedVariable):
        weights = list(weights)
    if not isinstance(weights, list):
        weights = [weights]

    with ops.name_scope(name, "compute_sampled_logits",
                        weights + [inputs, labels]):
        if labels.dtype != dtypes.int64:
            labels = math_ops.cast(labels, dtypes.int64)
        labels_flat = array_ops.reshape(labels, [-1])

        # true_ris
        true_ris = tx.gather_sparse(sp_tensor=ri_tensors, ids=labels_flat)

        true_w = embedding_lookup_sparse(params=weights,
                                         sp_ids=tx.sparse_indices(true_ris),
                                         sp_weights=true_ris,
                                         combiner="sum",
                                         partition_strategy=partition_strategy)

        label_layer = tx.TensorLayer(true_w, input_dim)
        noise_fn = tx.FC(label_layer, 512, activation=tx.relu)
        noise_fn_sp = tx.ToSparse(noise_fn)
        noise_ris = tx.Linear(noise_fn_sp, k_dim, weight_init=tx.glorot_uniform(), bias=True)
        noise_ris_sp = tx.ToSparse(noise_ris)

        noise_w = embedding_lookup_sparse(params=weights,
                                          sp_ids=tx.sparse_indices(noise_ris_sp.tensor),
                                          sp_weights=noise_ris_sp.tensor,
                                          combiner="sum",
                                          partition_strategy=partition_strategy)

        noise_logits = math_ops.matmul(inputs, noise_w, transpose_b=True)

        dim = array_ops.shape(true_w)[1:2]
        new_true_w_shape = array_ops.concat([[-1, num_true], dim], 0)
        true_w_e = array_ops.reshape(true_w, new_true_w_shape)

        row_wise_dots = math_ops.multiply(array_ops.expand_dims(inputs, 1),
                                          true_w_e)
        # We want the row-wise dot plus biases which yields a
        # [batch_size, num_true] tensor of true_logits.
        dots_as_matrix = array_ops.reshape(row_wise_dots,
                                           array_ops.concat([[-1], dim], 0))
        true_logits = array_ops.reshape(_sum_rows(dots_as_matrix), [-1, num_true])

        # Construct output logits and labels. The true labels/logits start at col 0.
        out_logits = array_ops.concat([true_logits, noise_logits], 1)

        # true_logits is a float tensor, ones_like(true_logits) is a float
        # tensor of ones. We then divide by num_true to ensure the per-example
        # labels sum to 1.0, i.e. form a proper probability distribution.
        out_labels = array_ops.concat([
            array_ops.ones_like(true_logits) / num_true,
            array_ops.zeros_like(noise_logits)
        ], 1)

        # out_logits = out_logits * math_ops.exp(partition_const)
        # out_logits = out_logits / (partition_const + 1)
        return out_logits, out_labels