Python sparse_feature_cross示例，astronet.contrib.layers.python.ops.sparse_feature_cross_op.sparse_feature_cross Python示例

示例#1

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

 def test_dense(self):
     """Tests only dense inputs.
 """
     op = sparse_feature_cross_op.sparse_feature_cross([
         constant_op.constant([['batch1-FC1-F1', 'batch1-FC1-F2'],
                               ['batch2-FC1-F1', 'batch2-FC1-F2']],
                              dtypes.string),
         constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'],
                               ['batch2-FC2-F1', 'batch2-FC2-F2']],
                              dtypes.string),
     ])
     expected_out = self._sparse_tensor(
         [[
             'batch1-FC1-F1_X_batch1-FC2-F1',
             'batch1-FC1-F1_X_batch1-FC2-F2',
             'batch1-FC1-F2_X_batch1-FC2-F1',
             'batch1-FC1-F2_X_batch1-FC2-F2'
         ],
          [
              'batch2-FC1-F1_X_batch2-FC2-F1',
              'batch2-FC1-F1_X_batch2-FC2-F2',
              'batch2-FC1-F2_X_batch2-FC2-F1',
              'batch2-FC1-F2_X_batch2-FC2-F2'
          ]])
     with self.cached_session() as sess:
         self._assert_sparse_tensor_equals(expected_out, sess.run(op))

示例#2

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

    def test_one_column_empty(self):
        """Tests when one column is empty.

    The crossed tensor should be empty.
    """
        op = sparse_feature_cross_op.sparse_feature_cross([
            self._sparse_tensor([['batch1-FC1-F1', 'batch1-FC1-F2']]),
            self._sparse_tensor([], 1),
            self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']])
        ])
        with self.cached_session() as sess:
            self._assert_sparse_tensor_empty(sess.run(op))

示例#3

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

    def test_all_columns_empty(self):
        """Tests when all columns are empty.

    The crossed tensor should be empty.
    """
        op = sparse_feature_cross_op.sparse_feature_cross([
            self._sparse_tensor([]),
            self._sparse_tensor([]),
            self._sparse_tensor([])
        ])
        with self.cached_session() as sess:
            self._assert_sparse_tensor_empty(sess.run(op))

示例#4

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

 def test_hashed_output_v1_has_collision(self):
     """Tests the old version of the fingerprint concatenation has collisions.
 """
     # The last 10 bits of 359 and 1024+359 are identical.
     # As a result, all the crosses collide.
     t1 = constant_op.constant([[359], [359 + 1024]])
     t2 = constant_op.constant([list(range(10)), list(range(10))])
     cross = sparse_feature_cross_op.sparse_feature_cross(
         [t2, t1], hashed_output=True, num_buckets=1024)
     cross_dense = sparse_ops.sparse_tensor_to_dense(cross)
     with session.Session():
         values = cross_dense.eval()
         self.assertTrue(numpy.equal(values[0], values[1]).all())

示例#5

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

 def test_hashed_output_zero_bucket(self):
     """Tests a simple scenario.
 """
     op = sparse_feature_cross_op.sparse_feature_cross([
         self._sparse_tensor([['batch1-FC1-F1']]),
         self._sparse_tensor([['batch1-FC2-F1']]),
         self._sparse_tensor([['batch1-FC3-F1']])
     ],
                                                       hashed_output=True)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[3735511728867393167]])
     with self.cached_session() as sess:
         self._assert_sparse_tensor_equals(expected_out, sess.run(op))

示例#6

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

 def test_hashed_output_zero_bucket_v2(self):
     """Tests a simple scenario.
 """
     op = sparse_feature_cross_op.sparse_feature_cross(
         [
             self._sparse_tensor([['batch1-FC1-F1']]),
             self._sparse_tensor([['batch1-FC2-F1']]),
             self._sparse_tensor([['batch1-FC3-F1']])
         ],
         hashed_output=True,
         hash_key=layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
     # Check actual hashed output to prevent unintentional hashing changes.
     expected_out = self._sparse_tensor([[1971693436396284976]])
     with self.cached_session() as sess:
         self._assert_sparse_tensor_equals(expected_out, sess.run(op))

示例#7

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

 def test_integer_mixed_string_sparse(self):
     """Tests mixed type."""
     op = sparse_feature_cross_op.sparse_feature_cross([
         self._sparse_tensor([[11], [333, 55555]]),
         self._sparse_tensor([['batch1-FC2-F1'],
                              ['batch2-FC2-F1', 'batch2-FC2-F2']])
     ])
     expected_out = self._sparse_tensor([['11_X_batch1-FC2-F1'],
                                         [
                                             '333_X_batch2-FC2-F1',
                                             '333_X_batch2-FC2-F2',
                                             '55555_X_batch2-FC2-F1',
                                             '55555_X_batch2-FC2-F2'
                                         ]])
     with self.cached_session() as sess:
         self._assert_sparse_tensor_equals(expected_out, sess.run(op))

示例#8

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

 def test_hashed_output_v2_has_no_collision(self):
     """Tests the new version of the fingerprint concatenation has no collisions.
 """
     # Although the last 10 bits of 359 and 1024+359 are identical.
     # As a result, all the crosses shouldn't collide.
     t1 = constant_op.constant([[359], [359 + 1024]])
     t2 = constant_op.constant([list(range(10)), list(range(10))])
     cross = sparse_feature_cross_op.sparse_feature_cross(
         [t2, t1],
         hashed_output=True,
         num_buckets=1024,
         hash_key=layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY)
     cross_dense = sparse_ops.sparse_tensor_to_dense(cross)
     with session.Session():
         values = cross_dense.eval()
         self.assertTrue(numpy.not_equal(values[0], values[1]).all())

示例#9

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

 def test_integer_sparse_input(self):
     """Tests mixed type sparse and dense inputs."""
     op = sparse_feature_cross_op.sparse_feature_cross([
         self._sparse_tensor([[11], [333, 5555]]),
         constant_op.constant([['batch1-FC2-F1', 'batch1-FC2-F2'],
                               ['batch2-FC2-F1', 'batch2-FC2-F2']],
                              dtypes.string),
     ])
     expected_out = self._sparse_tensor(
         [['11_X_batch1-FC2-F1', '11_X_batch1-FC2-F2'],
          [
              '333_X_batch2-FC2-F1', '333_X_batch2-FC2-F2',
              '5555_X_batch2-FC2-F1', '5555_X_batch2-FC2-F2'
          ]])
     with self.cached_session() as sess:
         self._assert_sparse_tensor_equals(expected_out, sess.run(op))

示例#10

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

 def test_hashed_3x1x2(self):
     """Tests 3x1x2 permutation with hashed output.
 """
     op = sparse_feature_cross_op.sparse_feature_cross([
         self._sparse_tensor(
             [['batch1-FC1-F1', 'batch1-FC1-F2', 'batch1-FC1-F3']]),
         self._sparse_tensor([['batch1-FC2-F1']]),
         self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']])
     ],
                                                       hashed_output=True,
                                                       num_buckets=1000)
     with self.cached_session() as sess:
         out = sess.run(op)
         self.assertEqual(6, len(out.values))
         self.assertAllEqual([[0, i] for i in range(6)], out.indices)
         self.assertTrue(all(x < 1000 and x >= 0 for x in out.values))
         all_values_are_different = len(out.values) == len(set(out.values))
         self.assertTrue(all_values_are_different)

示例#11

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

    def test_some_columns_empty(self):
        """Tests when more than one columns are empty.

    Cross for the corresponding batch should be empty.
    """
        op = sparse_feature_cross_op.sparse_feature_cross([
            self._sparse_tensor([['batch1-FC1-F1', 'batch1-FC1-F2']], 2),
            self._sparse_tensor([['batch1-FC2-F1'], ['batch2-FC2-F1']], 2),
            self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']], 2)
        ])
        expected_out = self._sparse_tensor([[
            'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1',
            'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F2',
            'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1',
            'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2'
        ]], 2)
        with self.cached_session() as sess:
            self._assert_sparse_tensor_equals(expected_out, sess.run(op))

示例#12

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

 def test_simple(self):
     """Tests a simple scenario.
 """
     op = sparse_feature_cross_op.sparse_feature_cross([
         self._sparse_tensor([['batch1-FC1-F1'],
                              ['batch2-FC1-F1', 'batch2-FC1-F2']]),
         self._sparse_tensor([['batch1-FC2-F1'],
                              ['batch2-FC2-F1', 'batch2-FC2-F2']])
     ])
     expected_out = self._sparse_tensor(
         [['batch1-FC1-F1_X_batch1-FC2-F1'],
          [
              'batch2-FC1-F1_X_batch2-FC2-F1',
              'batch2-FC1-F1_X_batch2-FC2-F2',
              'batch2-FC1-F2_X_batch2-FC2-F1',
              'batch2-FC1-F2_X_batch2-FC2-F2'
          ]])
     with self.cached_session() as sess:
         self._assert_sparse_tensor_equals(expected_out, sess.run(op))

示例#13

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

 def test_permutation_3x1x2(self):
     """Tests 3x1x2 permutation.
 """
     op = sparse_feature_cross_op.sparse_feature_cross([
         self._sparse_tensor(
             [['batch1-FC1-F1', 'batch1-FC1-F2', 'batch1-FC1-F3']]),
         self._sparse_tensor([['batch1-FC2-F1']]),
         self._sparse_tensor([['batch1-FC3-F1', 'batch1-FC3-F2']])
     ])
     expected_out = self._sparse_tensor([[
         'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1',
         'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F2',
         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F1',
         'batch1-FC1-F2_X_batch1-FC2-F1_X_batch1-FC3-F2',
         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F1',
         'batch1-FC1-F3_X_batch1-FC2-F1_X_batch1-FC3-F2'
     ]])
     with self.cached_session() as sess:
         self._assert_sparse_tensor_equals(expected_out, sess.run(op))

示例#14

0

显示文件

文件： sparse_feature_cross_op_test.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

    def test_large_batch(self):
        """Tests with large batch size to force multithreading.
    """
        batch_size = 5000
        col1 = []
        col2 = []
        col3 = []
        for b in range(batch_size):
            col1.append([
                'batch%d-FC1-F1' % b,
                'batch%d-FC1-F2' % b,
                'batch%d-FC1-F3' % b
            ])
            col2.append(['batch%d-FC2-F1' % b])
            col3.append(['batch%d-FC3-F1' % b, 'batch%d-FC3-F2' % b])

        op = sparse_feature_cross_op.sparse_feature_cross([
            self._sparse_tensor(col1),
            self._sparse_tensor(col2),
            self._sparse_tensor(col3)
        ])

        col_out = []
        for b in range(batch_size):
            col_out.append([
                'batch%d-FC1-F1_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b),
                'batch%d-FC1-F1_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b),
                'batch%d-FC1-F2_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b),
                'batch%d-FC1-F2_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b),
                'batch%d-FC1-F3_X_batch%d-FC2-F1_X_batch%d-FC3-F1' % (b, b, b),
                'batch%d-FC1-F3_X_batch%d-FC2-F1_X_batch%d-FC3-F2' % (b, b, b)
            ])

        expected_out = self._sparse_tensor(col_out)
        with self.cached_session() as sess:
            self._assert_sparse_tensor_equals(expected_out, sess.run(op))

示例#15

0

显示文件

文件： embedding_ops.py 项目： abhishyantkhare/Astronet-Triage-TF2-upgraded

def _sampled_scattered_embedding_lookup(params,
                                        values,
                                        dimension=None,
                                        sampled_candidates=None,
                                        hash_key=None,
                                        name=None):
  """Looks up embeddings using parameter hashing for each value in `values`.

  This method looks up selected embedding dimensions if `sampled_candidates` is
  given, otherwise looks up all dimensions.

  The i-th embedding component of a value v in `values` is found by retrieving
  the weight whose index is a fingerprint of the pair (v,i).
  The concept is explored as "feature hashing" for model compression in this
  paper: http://arxiv.org/pdf/1504.04788.pdf

  Feature hashing has the pleasant effect of allowing us to compute an embedding
  without needing a pre-determined vocabulary, relieving some amount of process
  complexity. It also allows for us to maintain embeddings for possibly
  trillions of features with a fixed amount of memory.

  Note that this is superior to out-of-vocabulary shared "hash buckets" in that
  the embedding is extremely likely to be unique for each token as opposed to
  being shared across probably-colliding tokens. The price is that we must
  compute a hash once for each scalar in the token's embedding as opposed to
  once per token.

  If `params` is a list, it represents a partition of the embedding parameters.
  Each tensor in the list should have the same length, except for the first ones
  which may have an additional element. For instance 10 parameters can be
  partitioned in 4 tensors with length `[3, 3, 2, 2]`.

  Args:
    params: A `Tensor`, `list` of `Tensors`, or `PartitionedVariable`. Each
      tensor must be of rank 1 with fully-defined shape.
    values: `Tensor` of values to be embedded with shape `[d0, ..., dn]`.
    dimension: Embedding dimension. The user must specify either `dimension` or
      `sampled_candidates`.
    sampled_candidates: An optional `Tensor` of slice indices to keep along the
      final dimension with shape `[d0, ..., dn, N]`. If given, `dimension` is
      ignored. If `None`, looks up all candidates.
    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
      function to combine the crosses fingerprints on SparseFeatureCrossOp
      (optional).
    name: An optional name for this op.

  Returns:
    A `Tensor` with shape `[d0, ..., dn, dimension]`.
    If `sampled_candidates` is given, the output shape is `[d0, ..., dn, N]`

  Raises:
    ValueError: if dimension is not positive or the partition size is invalid.
  """
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)
  if not isinstance(params, list):
    params = [params]

  with ops.name_scope(name, "scattered_embedding_lookup",
                      params + [dimension, values]):
    # Flatten the values
    values_shape = array_ops.shape(values)
    values = array_ops.reshape(values, [-1, 1])

    if sampled_candidates is None:
      if dimension is None:
        raise ValueError(
            "You must specify either dimension or sampled_candidates.")
      if dimension <= 0:
        raise ValueError("Dimension must be >0. Given is %d" % dimension)
      sampled_candidates = array_ops.tile(
          array_ops.expand_dims(math_ops.range(0, dimension), 0),
          array_ops.shape(values))
    else:
      dimension = array_ops.shape(sampled_candidates)[math_ops.subtract(
          array_ops.rank(sampled_candidates), 1)]
      sampled_candidates_shape = array_ops.shape(sampled_candidates)
      dimension_tensor = array_ops.reshape(
          dimension, shape=[
              1,
          ])
      expected_shape = array_ops.concat([values_shape, dimension_tensor], 0)
      with ops.control_dependencies([
          control_flow_ops.Assert(
              math_ops.reduce_all(
                  math_ops.equal(sampled_candidates_shape, expected_shape)),
              [
                  "The shape of sampled_candidates: ", sampled_candidates_shape,
                  " does not match the shape of values: ", values_shape
              ])
      ]):
        # Flatten sampled_candidates, same way as values are flattened.
        sampled_candidates = array_ops.reshape(sampled_candidates,
                                               [-1, dimension])

    num_partitions = len(params)
    partition_sizes = []
    for p in range(num_partitions):
      shape = params[p].get_shape()
      shape.assert_has_rank(1)
      shape.assert_is_fully_defined()
      partition_sizes.append(tensor_shape.dimension_value(shape[0]))
    num_params = sum(partition_sizes)  # Total number of parameters.

    # Assert the size of each partition.
    for p in range(num_partitions):
      expected_size = (num_params - p - 1) // num_partitions + 1
      if partition_sizes[p] != expected_size:
        raise ValueError("Tensor %d in params has size %d, expected %d." %
                         (p, partition_sizes[p], expected_size))

    # With two values v1 and v2 and 3 dimensions, we will cross
    # [[0, 1, 2], [0, 1, 2]] with [[v1], [v2]].
    tensors_to_cross = [sampled_candidates, values]
    ids = sparse_feature_cross_op.sparse_feature_cross(
        tensors_to_cross,
        hashed_output=True,
        num_buckets=num_params,
        hash_key=hash_key)
    ids = sparse_ops.sparse_tensor_to_dense(ids)

    # No need to validate the indices since we have checked the params
    # dimensions and we know the largest id.
    result = embedding_ops.embedding_lookup(
        params, ids, partition_strategy="div")

    return array_ops.reshape(result,
                             array_ops.concat([values_shape, [dimension]], 0))