Пример #1
0
 def testToTensorflowTensorsRaisesUnknownKeyError(self):
     with self.assertRaisesRegex(ValueError, '.* not found in .*'):
         util.to_tensorflow_tensors(
             {
                 'features': {
                     'feature_1': np.array([1, 2, 3], dtype=np.float32)
                 }
             }, {
                 'features': {
                     'missing_feature': tf.TensorSpec([1], dtype=tf.float32)
                 }
             })
Пример #2
0
 def testToTensorflowTensorsRaisesIncompatibleSpecError(self):
     with self.assertRaisesRegexp(ValueError,
                                  '.* is not compatible with .*'):
         util.to_tensorflow_tensors(
             {
                 'features': {
                     'feature_1': np.array([1, 2, 3], dtype=np.int64)
                 }
             }, {
                 'features': {
                     'feature_1': tf.TensorSpec([1], dtype=tf.float32)
                 }
             })
Пример #3
0
 def testToFromTensorValues(self):
     tensor_values = {
         'features': {
             'feature_1':
             np.array([1, 2, 3]),
             'feature_2':
             types.SparseTensorValue(values=np.array([0.5, -1., 0.5, -1.]),
                                     indices=np.array([[0, 3,
                                                        1], [0, 20, 0],
                                                       [1, 3, 1],
                                                       [1, 20, 0]]),
                                     dense_shape=np.array([2, 100, 3])),
             'feature_3':
             types.RaggedTensorValue(
                 values=np.array([3, 1, 4, 1, 5, 9, 2, 7, 1, 8, 8, 2, 1]),
                 nested_row_splits=[
                     np.array([0, 3, 6]),
                     np.array([0, 2, 3, 4, 5, 5, 8]),
                     np.array([0, 2, 3, 3, 6, 9, 10, 11, 13])
                 ]),
             'feature_4':
             types.VarLenTensorValue(values=np.array([1, 2, 3]),
                                     indices=np.array([[0, 0], [0, 1],
                                                       [1, 0]]),
                                     dense_shape=np.array([2, 2]))
         },
         'labels': np.array([1])
     }
     actual = util.to_tensor_values(
         util.to_tensorflow_tensors(tensor_values))
     self.assertAllClose(actual, tensor_values)
Пример #4
0
def get_inputs(
    features: types.DictOfTensorValue,
    input_specs: types.DictOfTypeSpec,
) -> Optional[types.TensorTypeMaybeMultiLevelDict]:
  """Returns inputs from features for given input specs.

  Args:
    features: Dict of feature tensors.
    input_specs: Input specs keyed by input name.

  Returns:
    Input tensors keyed by input name.
  """
  inputs = None
  input_names = list(input_specs.keys())
  # Avoid getting the tensors if we appear to be feeding serialized examples to
  # the callable.
  single_input = (
      next(iter(input_specs.values())) if len(input_specs) == 1 else None)
  single_input_name = input_names[0] if single_input else None
  if not (single_input and
          single_input.dtype == tf.string and find_input_name_in_features(
              set(features.keys()), single_input_name) is None):
    # If filtering is not successful (i.e. None is returned) fallback to feeding
    # serialized examples.
    features = filter_by_input_names(features, input_names)
    if features:
      inputs = util.to_tensorflow_tensors(features, input_specs)
  return inputs
Пример #5
0
 def testToFromTensorValuesWithSpecs(self):
     sparse_value = types.SparseTensorValue(
         values=np.array([0.5, -1., 0.5, -1.], dtype=np.float32),
         indices=np.array([[0, 3, 1], [0, 20, 0], [1, 3, 1], [1, 20, 0]]),
         dense_shape=np.array([2, 100, 3]))
     ragged_value = types.RaggedTensorValue(
         values=np.array([3, 1, 4, 1, 5, 9, 2, 7, 1, 8, 8, 2, 1],
                         dtype=np.float32),
         nested_row_splits=[
             np.array([0, 3, 6]),
             np.array([0, 2, 3, 4, 5, 5, 8]),
             np.array([0, 2, 3, 3, 6, 9, 10, 11, 13])
         ])
     tensor_values = {
         'features': {
             'feature_1': np.array([1, 2, 3], dtype=np.float32),
             'feature_2': sparse_value,
             'feature_3': ragged_value,
             'ignored_feature': np.array([1, 2, 3])
         },
         'labels': np.array([1], dtype=np.float32),
         'ignored': np.array([2])
     }
     specs = {
         'features': {
             'feature_1':
             tf.TensorSpec([3], dtype=tf.float32),
             'feature_2':
             tf.SparseTensorSpec(shape=[2, 100, 3], dtype=tf.float32),
             'feature_3':
             tf.RaggedTensorSpec(shape=[2, None, None, None],
                                 dtype=tf.float32)
         },
         'labels': tf.TensorSpec([1], dtype=tf.float32)
     }
     actual = util.to_tensor_values(
         util.to_tensorflow_tensors(tensor_values, specs))
     expected = {
         'features': {
             'feature_1': np.array([1, 2, 3], dtype=np.float32),
             'feature_2': sparse_value,
             'feature_3': ragged_value
         },
         'labels': np.array([1], dtype=np.float32)
     }
     self.assertAllClose(actual, expected)
Пример #6
0
 def testInferTensorSpecs(self):
     sparse_value = types.SparseTensorValue(
         values=np.array([0.5, -1., 0.5, -1.], dtype=np.float32),
         indices=np.array([[0, 3, 1], [0, 20, 0], [1, 3, 1], [1, 20, 0]]),
         dense_shape=np.array([2, 100, 3]))
     ragged_value = types.RaggedTensorValue(
         values=np.array([3, 1, 4, 1, 5, 9, 2, 7, 1, 8, 8, 2, 1],
                         dtype=np.float32),
         nested_row_splits=[
             np.array([0, 3, 6]),
             np.array([0, 2, 3, 4, 5, 5, 8]),
             np.array([0, 2, 3, 3, 6, 9, 10, 11, 13])
         ])
     tensor_values = {
         'features': {
             'feature_1': np.array([1, 2, 3], dtype=np.float32),
             'feature_2': sparse_value,
             'feature_3': ragged_value,
         },
         'labels': np.array([1], dtype=np.float32),
     }
     expected_specs = {
         'features': {
             'feature_1':
             tf.TensorSpec([None], dtype=tf.float32),
             'feature_2':
             tf.SparseTensorSpec(shape=[None, 100, 3], dtype=tf.float32),
             'feature_3':
             tf.RaggedTensorSpec(shape=[None, None, None, None],
                                 dtype=tf.float32)
         },
         'labels': tf.TensorSpec([None], dtype=tf.float32)
     }
     got_specs = util.infer_tensor_specs(
         util.to_tensorflow_tensors(tensor_values))
     self.assertDictEqual(expected_specs, got_specs)
  def process(self, batched_extract: types.Extracts) -> List[types.Extracts]:
    features = batched_extract[constants.FEATURES_KEY]
    # Slice on transformed features if available.
    if (constants.TRANSFORMED_FEATURES_KEY in batched_extract and
        batched_extract[constants.TRANSFORMED_FEATURES_KEY] is not None):
      transformed_features = batched_extract[constants.TRANSFORMED_FEATURES_KEY]
      # If only one model, the output is stored without keying on model name.
      if not self._eval_config or len(self._eval_config.model_specs) == 1:
        features.update(transformed_features)
      else:
        # Models listed earlier have precedence in feature lookup.
        for spec in reversed(self._eval_config.model_specs):
          if spec.name in transformed_features:
            features.update(transformed_features[spec.name])

    tensors = util.to_tensorflow_tensors(features)
    tensor_specs = util.infer_tensor_specs(tensors)

    if _TF_MAJOR_VERSION < 2:
      # TODO(b/228456048): TFX-BSL doesn't support passing tensorflow tensors
      # for non-sparse/ragged values in TF 1.x (i.e. it only accepts np.ndarray
      # for dense) so we need to convert dense tensors to numpy.
      sess = tf.compat.v1.Session()

      def _convert_dense_to_numpy(values):  # pylint: disable=invalid-name
        if isinstance(values, Mapping):
          for k, v in values.items():
            if isinstance(v, Mapping):
              values[k] = _convert_dense_to_numpy(v)
            elif isinstance(v, tf.Tensor):
              values[k] = v.eval(session=sess)
        return values

      tensors = _convert_dense_to_numpy(tensors)

    converter = tensor_to_arrow.TensorsToRecordBatchConverter(tensor_specs)
    record_batch = converter.convert(tensors)
    sql_slice_keys = [[] for _ in range(record_batch.num_rows)]

    for query in self._cached_queries(record_batch.schema):
      # Example of result with batch size = 3:
      # result = [[[('feature', 'value_1')]],
      #           [[('feature', 'value_2')]],
      #           []
      #          ]
      result = query.Execute(record_batch)
      for row_index, row_result in enumerate(result):
        sql_slice_keys[row_index].extend([tuple(s) for s in row_result])

    # convert sql_slice_keys into a VarLenTensorValue where each row has dtype
    # object.
    dense_rows = []
    for row_slice_keys in sql_slice_keys:
      dense_rows.append(slicer_lib.slice_keys_to_numpy_array(row_slice_keys))
    varlen_sql_slice_keys = types.VarLenTensorValue.from_dense_rows(dense_rows)

    # Make a a shallow copy, so we don't mutate the original.
    batched_extract_copy = copy.copy(batched_extract)
    batched_extract_copy[constants.SLICE_KEY_TYPES_KEY] = varlen_sql_slice_keys

    self._sql_slicer_num_record_batch_schemas.update(
        self._cached_queries.cache_info().currsize)

    return [batched_extract_copy]