def testStringToOneHashBucketLegacyHash(self):
    with self.cached_session():
      input_string = array_ops.placeholder(dtypes.string)
      output = string_ops.string_to_hash_bucket(input_string, 1)
      result = output.eval(feed_dict={input_string: ['a', 'b', 'c']})

      self.assertAllEqual([0, 0, 0], result)
示例#2
0
 def replica_fn(elem):
     # Example of typical preprocessing of string to numeric feature
     hashed = string_to_hash_bucket(elem['str'], 10)
     # For dense string case, slice it to size of ragged int
     hashed_sliced = hashed[:, :elem['size'][0]]
     # Computation with both feature from string and numeric dataset output
     return elem['int'] * 10 + hashed_sliced
示例#3
0
 def insert_transformed_feature(self, columns_to_tensors):
   """Handles sparse column to id conversion."""
   sparse_id_values = string_ops.string_to_hash_bucket(
       columns_to_tensors[self.name].values,
       self.bucket_size,
       name=self.name + "_lookup")
   columns_to_tensors[self] = ops.SparseTensor(
       columns_to_tensors[self.name].indices, sparse_id_values,
       columns_to_tensors[self.name].shape)
 def insert_transformed_feature(self, columns_to_tensors):
   """Handles sparse column to id conversion."""
   sparse_id_values = string_ops.string_to_hash_bucket(
       columns_to_tensors[self.name].values,
       self.bucket_size,
       name=self.name + "_lookup")
   columns_to_tensors[self] = ops.SparseTensor(
       columns_to_tensors[self.name].indices, sparse_id_values,
       columns_to_tensors[self.name].shape)
  def testStringToHashBucketsLegacyHash(self):
    with self.cached_session():
      input_string = array_ops.placeholder(dtypes.string)
      output = string_ops.string_to_hash_bucket(input_string, 10)
      result = output.eval(feed_dict={input_string: ['a', 'b', 'c']})

      # Hash64('a') -> 2996632905371535868 -> mod 10 -> 8
      # Hash64('b') -> 5795986006276551370 -> mod 10 -> 0
      # Hash64('c') -> 14899841994519054197 -> mod 10 -> 7
      self.assertAllEqual([8, 0, 7], result)
示例#6
0
    def run_dataset_implementation(self, batch_size):
        num_repeats = 5
        starts = []
        ends = []
        for _ in range(num_repeats):
            ds = dataset_ops.Dataset.from_generator(
                word_gen, dtypes.string, tensor_shape.TensorShape([]))
            ds = ds.shuffle(batch_size * 100)
            ds = ds.batch(batch_size)
            num_batches = 5
            ds = ds.take(num_batches)
            ds = ds.prefetch(num_batches)
            starts.append(time.time())
            # Benchmarked code begins here.
            for i in ds:
                _ = string_ops.string_to_hash_bucket(i, num_buckets=2)
            # Benchmarked code ends here.
            ends.append(time.time())

        avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
        return avg_time
示例#7
0
 def replica_fn(elem):
     # Example of typical preprocessing of string to numeric feature
     hashed = string_to_hash_bucket(elem['str'], 10)
     return 1000 * hashed