Пример #1
0
  def _build(self, y_pred, y_true):
    """One-time setup of metric objects."""
    super(MetricsContainer, self)._build(y_pred)

    self._metrics = self._maybe_broadcast_to_outputs(y_pred, self._metrics)
    self._metrics = self._conform_to_outputs(y_pred, self._metrics)

    self._weighted_metrics = self._maybe_broadcast_to_outputs(
        y_pred, self._weighted_metrics)
    self._weighted_metrics = self._conform_to_outputs(y_pred,
                                                      self._weighted_metrics)

    # Standardize on tuple since `tf.data` turns lists into `Tensor`s.
    y_pred = nest.list_to_tuple(y_pred)
    y_true = nest.list_to_tuple(y_true)
    self._metrics = nest.list_to_tuple(self._metrics)
    self._weighted_metrics = nest.list_to_tuple(self._weighted_metrics)

    # Convert to `Metric` objects, potentially disambiguating based on output
    # properties.
    self._metrics = nest.map_structure_up_to(y_pred, self._get_metric_objects,
                                             self._metrics, y_true, y_pred)
    self._weighted_metrics = nest.map_structure_up_to(y_pred,
                                                      self._get_metric_objects,
                                                      self._weighted_metrics,
                                                      y_true, y_pred)

    self._metrics = nest.flatten_up_to(y_pred, self._metrics, check_types=False)
    self._weighted_metrics = nest.flatten_up_to(
        y_pred, self._weighted_metrics, check_types=False)

    # Assumes metrics, weighted_metrics have been flattened up to outputs.
    self._set_metric_names()
    self._create_ordered_metrics()
    self._built = True
Пример #2
0
def _process_tensorlike(inputs):
    """Process tensor-like inputs.

  This function:

  (1) Converts `Numpy` arrays to `Tensor`s.
  (2) Converts `Scipy` sparse matrices to `SparseTensor`s.
  (2) Converts `list`s to `tuple`s (for `tf.data` support).

  Args:
    inputs: Structure of `Tensor`s, `NumPy` arrays, or tensor-like.

  Returns:
    Structure of `Tensor`s or tensor-like.
  """
    def _convert_numpy_and_scipy(x):
        if isinstance(x, np.ndarray):
            dtype = None
            if issubclass(x.dtype.type, np.floating):
                dtype = backend.floatx()
            return ops.convert_to_tensor_v2_with_dispatch(x, dtype=dtype)
        elif scipy_sparse and scipy_sparse.issparse(x):
            return _scipy_sparse_to_sparse_tensor(x)
        return x

    inputs = nest.map_structure(_convert_numpy_and_scipy, inputs)
    return nest.list_to_tuple(inputs)
Пример #3
0
    def build(self, y_pred, y_true):
        """One-time setup of metric objects."""
        super(MetricsContainer, self).build(y_pred)

        self._metrics = self._maybe_broadcast_to_outputs(y_pred, self._metrics)
        self._metrics = self._conform_to_outputs(y_pred, self._metrics)

        self._weighted_metrics = self._maybe_broadcast_to_outputs(
            y_pred, self._weighted_metrics)
        self._weighted_metrics = self._conform_to_outputs(
            y_pred, self._weighted_metrics)

        # Standardize on tuple since `tf.data` turns lists into `Tensor`s.
        y_pred = nest.list_to_tuple(y_pred)
        y_true = nest.list_to_tuple(y_true)
        self._metrics = nest.list_to_tuple(self._metrics)
        self._weighted_metrics = nest.list_to_tuple(self._weighted_metrics)

        # Convert to `Metric` objects, potentially disambiguating based on output
        # properties.
        self._metrics = nest.map_structure_up_to(y_pred,
                                                 self._get_metric_objects,
                                                 self._metrics, y_true, y_pred)
        self._weighted_metrics = nest.map_structure_up_to(
            y_pred, self._get_metric_objects, self._weighted_metrics, y_true,
            y_pred)

        self._metrics = nest.flatten_up_to(y_pred,
                                           self._metrics,
                                           check_types=False)
        self._weighted_metrics = nest.flatten_up_to(y_pred,
                                                    self._weighted_metrics,
                                                    check_types=False)

        # Assumes metrics, weighted_metrics have been flattened up to outputs.
        #
        # If we are loading a model that has been already serialized, we do not
        # want to re-apply any pre-processing metric renaming steps.
        if not self._from_serialized:
            self._set_metric_names()
        self._create_ordered_metrics()
        self._built = True
Пример #4
0
    def _standardize_batch(self, data):
        """Standardizes a batch output by a generator."""
        # Removes `None`s.
        x, y, sample_weight = unpack_x_y_sample_weight(data)
        data = pack_x_y_sample_weight(x, y, sample_weight)

        data = nest.list_to_tuple(data)

        def _convert_dtype(t):
            if (isinstance(t, np.ndarray)
                    and issubclass(t.dtype.type, np.floating)):
                return np.array(t, dtype=backend.floatx())
            return t

        data = nest.map_structure(_convert_dtype, data)
        return data
Пример #5
0
        def wrapped_generator():
            """Remove Nones and lists before invoking Dataset.from_generator."""
            for batch in generator_fn():
                if wrap_in_tuple:
                    batch = (batch, )

                if must_extract_lists:
                    batch = nest.list_to_tuple(batch)

                if must_prune_nones:
                    batch = batch[:elements_to_keep]

                if partial_sample_weight:
                    sample_weights, _, _ = training_utils.handle_partial_sample_weights(
                        batch[1],
                        batch[2],
                        sample_weight_modes,
                        check_all_flat=False)
                    batch = batch[:2] + (sample_weights, )

                yield batch
Пример #6
0
    def _make_bridging_callable(generator_fn, wrap_in_tuple, peek,
                                elements_to_keep, partial_sample_weight,
                                sample_weight_modes):
        """Optional compatibility layer between user's data and Dataset."""
        must_prune_nones = (elements_to_keep != len(peek))
        try:
            nest.assert_same_structure(peek, nest.list_to_tuple(peek))
            must_extract_lists = False
        except TypeError:
            must_extract_lists = True

        # No additional transformations are needed.
        if not (wrap_in_tuple or must_extract_lists or must_prune_nones
                or partial_sample_weight):
            return generator_fn

        def wrapped_generator():
            """Remove Nones and lists before invoking Dataset.from_generator."""
            for batch in generator_fn():
                if wrap_in_tuple:
                    batch = (batch, )

                if must_extract_lists:
                    batch = nest.list_to_tuple(batch)

                if must_prune_nones:
                    batch = batch[:elements_to_keep]

                if partial_sample_weight:
                    sample_weights, _, _ = training_utils.handle_partial_sample_weights(
                        batch[1],
                        batch[2],
                        sample_weight_modes,
                        check_all_flat=False)
                    batch = batch[:2] + (sample_weights, )

                yield batch

        return wrapped_generator
Пример #7
0
  def adapt(self, data, reset_state=True):
    """Adapt the state of the layers of the preprocessing stage to the data.

    Args:
      data: A batched Dataset object, a NumPy array, an EagerTensor, or a list,
        dict or nested structure of Numpy Arrays or EagerTensors. The elements
        of Dataset object need to conform with inputs of the stage. The first
        dimension of NumPy arrays or EagerTensors are understood to be batch
        dimension. Data to be iterated over to adapt the state of the layers in
        this preprocessing stage.
      reset_state: Whether this call to `adapt` should reset the state of the
        layers in this preprocessing stage.

    Examples:

    >>> # For a stage with dict input
    >>> inputs = {'x2': tf.keras.Input(shape=(5,)),
    ...           'x1': tf.keras.Input(shape=(1,))}
    >>> outputs = [inputs['x1'], inputs['x2']]
    >>> stage = FunctionalPreprocessingStage(inputs, outputs)
    >>> ds = tf.data.Dataset.from_tensor_slices({'x1': tf.ones((4,5)),
    ...                                          'x2': tf.ones((4,1))})
    >>> sorted(ds.element_spec.items()) # Check element_spec
    [('x1', TensorSpec(shape=(5,), dtype=tf.float32, name=None)),
     ('x2', TensorSpec(shape=(1,), dtype=tf.float32, name=None))]
    >>> stage.adapt(ds)
    >>> data_np = {'x1': np.ones((4, 5)), 'x2': np.ones((4, 1))}
    >>> stage.adapt(data_np)

    """
    if not isinstance(data, dataset_ops.Dataset):
      data = self._flatten_to_reference_inputs(data)
      if any([
          not isinstance(datum, (np.ndarray, ops.EagerTensor)) for datum in data
      ]):
        raise ValueError(
            '`adapt()` requires a batched Dataset, a list of EagerTensors '
            'or Numpy arrays as input, got {}'.format(type(data)))
      ds_input = [
          dataset_ops.Dataset.from_tensor_slices(x).batch(1) for x in data
      ]

    if isinstance(data, dataset_ops.Dataset):
      # Validate the datasets to try and ensure we haven't been passed one with
      # infinite size. That would cause an infinite loop here.
      if tf_utils.dataset_is_infinite(data):
        raise ValueError(
            'The dataset passed to `adapt()` has an infinite number of '
            'elements. Please use dataset.take(...) to make the number '
            'of elements finite.')
      # Unzip dataset object to a list of single input dataset.
      ds_input = _unzip_dataset(data)

    # Dictionary mapping reference tensors to datasets
    ds_dict = {}
    tensor_usage_count = self._tensor_usage_count
    for x, y in zip(self.inputs, ds_input):
      x_id = str(id(x))
      ds_dict[x_id] = [y] * tensor_usage_count[x_id]

    nodes_by_depth = self._nodes_by_depth
    depth_keys = sorted(nodes_by_depth.keys(), reverse=True)

    def build_map_fn(node, args, kwargs):
      if not isinstance(args.element_spec, tuple):

        def map_fn(*x):
          return nest.flatten(node.layer(*x, **kwargs))
      else:

        def map_fn(*x):
          return nest.flatten(node.layer(x, **kwargs))

      return map_fn

    for depth in depth_keys:
      for node in nodes_by_depth[depth]:
        # Input node
        if node.is_input:
          continue

        # Node with input not computed yet
        if any(t_id not in ds_dict for t_id in node.flat_input_ids):
          continue

        args, kwargs = node.map_arguments(ds_dict)
        args = dataset_ops.Dataset.zip(nest.list_to_tuple(*args))

        if hasattr(node.layer, 'adapt'):
          node.layer.adapt(args, reset_state=reset_state)

        map_fn = build_map_fn(node, args, kwargs)
        outputs = args.map(map_fn)
        outputs = _unzip_dataset(outputs)

        # Update ds_dict.
        for x_id, y in zip(node.flat_output_ids, outputs):
          ds_dict[x_id] = [y] * tensor_usage_count[x_id]
Пример #8
0
    def _canonicalize_peek(self, peek, sample_weight_modes):
        """Map the peeked batch into a regular form.

    This function serves two purposes. First, it determines if per-batch
    transformations are needed. Second, it extracts the structre to be used
    by Dataset.from_generator.

    Args:
      peek: The first batch of the user's data
      sample_weight_modes: Optional structure indicating how to handle sample
        weights. If it is a string, it will be mapped to match the target
        structure.

    Returns:
      An updated peek and various inspection results.
    """
        wrap_in_tuple = False
        if not isinstance(peek, tuple):
            peek, wrap_in_tuple = (peek, ), True

        if len(peek) not in (1, 2, 3):
            raise ValueError(
                "Output of generator should be a tuple of 1 or 2 or 3 elements: "
                "(input,) or (input, target) or (input, target, sample_weights). "
                "Received {}".format(peek))

        x_peek, y_peek, sample_weights_peek = list(peek) + [None
                                                            ] * (3 - len(peek))

        any_sample_weight, partial_sample_weight = False, False
        sample_weight_modes = broadcast_sample_weight_modes(
            sample_weights_peek if sample_weights_peek is not None else y_peek,
            sample_weight_modes)

        if len(peek) == 3:
            (sample_weights_peek, any_sample_weight, partial_sample_weight
             ) = training_utils.handle_partial_sample_weights(
                 y_peek,
                 sample_weights_peek,
                 sample_weight_modes,
                 check_all_flat=True)
            peek = (x_peek, y_peek, sample_weights_peek)

        # Users often return None for fields which are not used. For instance:
        # (x, y, None) to indicate no sample weights.
        if len(peek) >= 2 and y_peek is None:
            if any_sample_weight:
                raise ValueError("Found sample weights, but not targets.")
            elements_to_keep = 1
        elif len(peek) == 3 and not any_sample_weight:
            elements_to_keep = 2
        else:
            elements_to_keep = len(peek)

        def dynamic_shape_like(t):
            return tuple(None for _ in t.shape)

        def convert_for_inspection(t):
            if getattr(t, "shape", None) and getattr(t, "dtype", None):
                return t
            return np.array(t, dtype=backend.floatx())

        canonicalized_peek = nest.list_to_tuple(
            nest.map_structure(convert_for_inspection,
                               peek[:elements_to_keep]))
        nested_dtypes = nest.map_structure(lambda t: t.dtype,
                                           canonicalized_peek)
        nested_shape = nest.map_structure(dynamic_shape_like,
                                          canonicalized_peek)

        try:
            self._first_batch_size = int(
                nest.flatten(canonicalized_peek)[0].shape[0])
        except IndexError:
            raise IndexError(
                "Could not infer batch size from: {}".format(peek))

        return (peek, wrap_in_tuple, elements_to_keep, partial_sample_weight,
                sample_weight_modes, nested_shape, nested_dtypes)