def _convert_to_binary_classification_predictions(predictions):
  """Converts a `Tensor` into a set of binary classification predictions.

  This function checks that the given `Tensor` is floating-point, and that it is
  trivially convertible to rank-1, i.e. has only one "nontrivial" dimension
  (e.g. the shapes [1000] and [1, 1, None, 1] are allowed, but [None, 1, None]
  and [50, 10] are not). If it satisfies these conditions, then it is reshaped
  to be rank-1 (if necessary) and returned.

  Args:
    predictions: a rank-1 floating-point `Tensor` of predictions.

  Returns:
    The predictions `Tensor`, reshaped to be rank-1, if necessary.

  Raises:
    TypeError: if "predictions" is not a floating-point `Tensor`.
    ValueError: if "predictions" is not trivially convertible to rank-1.
  """
  if not tf.contrib.framework.is_tensor(predictions):
    raise TypeError("predictions must be a tensor")
  if not predictions.dtype.is_floating:
    raise TypeError("predictions must be floating-point")

  return helpers.convert_to_1d_tensor(predictions, name="predictions")
Exemplo n.º 2
0
    def test_convert_to_1d_tensor(self):
        """Tests the "convert_to_1d_tensor" function."""
        self.assertFalse(tf.executing_eagerly())

        # Trying to make a rank-1 Tensor from a 0d Tensor should succeed.
        expected = [2.7]
        actual = helpers.convert_to_1d_tensor(2.7)
        with self.session() as session:
            self.assertAllClose(expected,
                                session.run(actual),
                                rtol=0,
                                atol=1e-6)

        # Trying to make a rank-1 Tensor from a rank-1 Tensor should succeed.
        expected = [-6.3, 1.0, 5.1]
        actual = helpers.convert_to_1d_tensor(expected)
        with self.session() as session:
            self.assertAllClose(expected,
                                session.run(actual),
                                rtol=0,
                                atol=1e-6)

        # Trying to make a rank-1 Tensor from a shape-(1,2,1) Tensor should succeed
        # (only one of the dimensions is nontrivial).
        expected = [3.6, -1.7]
        actual = helpers.convert_to_1d_tensor([[[3.6], [-1.7]]])
        with self.session() as session:
            self.assertAllClose(expected,
                                session.run(actual),
                                rtol=0,
                                atol=1e-6)

        # Trying to make a rank-1 Tensor from a shape-(1,None,1) Tensor should
        # succeed (only one of the dimensions is nontrivial).
        expected = [0.2, -2.4, 0.0]
        placeholder = tf.compat.v1.placeholder(tf.float32, shape=(1, None, 1))
        actual = helpers.convert_to_1d_tensor(placeholder)
        with self.session() as session:
            self.assertAllClose(
                expected,
                session.run(actual,
                            feed_dict={placeholder: [[[0.2], [-2.4], [0.0]]]}),
                rtol=0,
                atol=1e-6)

        # Trying to make a rank-1 Tensor from a rank-2 Tensor should fail.
        with self.assertRaises(ValueError):
            _ = helpers.convert_to_1d_tensor([[1, 2], [3, 4]])

        # Trying to make a rank-1 Tensor from a shape-(None,2) Tensor should fail.
        placeholder = tf.compat.v1.placeholder(tf.float32, shape=(None, 2))
        with self.assertRaises(ValueError):
            _ = helpers.convert_to_1d_tensor(placeholder)
            def update_ops_fn(running_averages_variable, structure_memoizer,
                              value_memoizer):
                """Updates the running sums before each call to the train_op."""
                weights, denominator_predicate = denominator
                weights = helpers.convert_to_1d_tensor(weights(
                    structure_memoizer, value_memoizer),
                                                       name="weights")
                dtype = weights.dtype.base_dtype
                if not dtype.is_floating:
                    raise TypeError("weights must be floating-point")

                update_ops = []
                update_ops.append(
                    tf.debugging.assert_non_negative(
                        weights, message="weights must be non-negative"))

                denominator_weights = weights * tf.cast(
                    denominator_predicate.tensor(structure_memoizer,
                                                 value_memoizer),
                    dtype=dtype)

                # We take convex combinations (with parameter running_proportion) to
                # make sure that both running_average_sum and running_average_count
                # are divided by the number of minibatches, as explained below.
                running_proportion = 1.0 / (tf.maximum(
                    tf.cast(structure_memoizer[defaults.GLOBAL_STEP_KEY],
                            dtype=running_dtype), 0.0) + 1.0)
                running_average_sum = (
                    running_averages_variable[0] * (1.0 - running_proportion) +
                    tf.cast(tf.reduce_sum(denominator_weights),
                            dtype=running_dtype) * running_proportion)
                running_average_count = (
                    running_averages_variable[1] * (1.0 - running_proportion) +
                    tf.cast(tf.size(denominator_weights),
                            dtype=running_dtype) * running_proportion)

                update_ops.append(
                    running_averages_variable.assign(
                        [running_average_sum, running_average_count]))

                return update_ops
        def value_fn(weights_value, predicate_value):
            """Returns the numerator `Tensor`.

      Args:
        weights_value: `Tensor` of example weights.
        predicate_value: indicator `Tensor` representing whether each example
          should be included in the ratio's numerator.

      Returns:
        A `Tensor` containing the element-wise product of the two arguments.

      Raises:
        TypeError: if "weights" is not floating-point.
        ValueError: if "weights" cannot be converted to a rank-1 `Tensor`.
      """
            value = helpers.convert_to_1d_tensor(weights_value, name="weights")
            dtype = value.dtype.base_dtype
            if not dtype.is_floating:
                raise TypeError("weights must be floating-point")
            value *= tf.cast(predicate_value, dtype=dtype)
            return value
    def ratio(cls, weights, numerator_predicate, denominator_predicate):
        """Creates a new `_RatioWeights` representing the weights for a ratio.

    This method is used to create the weights for a single ratio, for which
    "numerator_predicate" indicates which examples should be included in the
    numerator of the ratio, and "denominator_predicate" which should be included
    in the denominator.

    The numerator and denominator predicates can be arbitrary indicator
    `Tensor`s, but the numerator subset will be intersected with the
    denominator's before the rate is calculated.

    Args:
      weights: `Tensor` of example weights.
      numerator_predicate: boolean indicator `Tensor` representing whether each
        example should be included in the ratio's numerator.
      denominator_predicate: boolean indicator `Tensor` representing whether
        each example should be included in the ratio's denominator.

    Returns:
      A new `_RatioWeights` representing the ratio.

    Raises:
      TypeError: if "weights" is not floating-point.
      ValueError: if "weights" cannot be converted to a rank-1 `Tensor`.
    """
        key = (weights, denominator_predicate)

        value = helpers.convert_to_1d_tensor(weights, name="weights")
        dtype = value.dtype.base_dtype
        if not dtype.is_floating:
            raise TypeError("weights must be floating-point")
        # Notice that we force the set of examples included in the numerator to be a
        # subset of those in the denominator. It'll actually work fine either way,
        # but this should be closer to what users expect.
        value *= tf.cast(
            (numerator_predicate & denominator_predicate).predicate,
            dtype=dtype)

        return _RatioWeights(dtype, {key: value})
Exemplo n.º 6
0
 def convert_and_clip_fn(arg):
   """Converts the given object to a rank-one float32 `Tensor` in [0,1]."""
   return tf.clip_by_value(
       tf.cast(
           helpers.convert_to_1d_tensor(arg, "predicate"), dtype=tf.float32),
       0.0, 1.0)
        def evaluate_denominator(self, denominator):
            """Evaluates the denominator portion of a ratio.

      Recall that a `_RatioWeights` object is responsible for computing:
        ratio_weights[j] = weights[j] 1{j in numerator_subset}
            / (mean_i weights[i] 1{i in denominator_subset})
      This method returns (an approximation of) the denominator portion of this
      ratio. The numerator is calculated in the `_RatioWeights`.evaluate method.

      The implementation is complicated by the fact that, although the
      denominators of our ratios should evaluate to "the average weight of the
      examples included in the ratio's denominator", we don't have access to the
      entire dataset (instead, we will typically just get a sequence of
      minibatches). Hence, we can't compute the average weight across the entire
      dataset directly. Instead, we keep running sums of the total weight of
      examples included in the denominator, and the number of examples seen, and
      update them before each minibatch (in the set of `Operation`s returned by
      this method).

      Args:
        denominator: (`Tensor`, `Predicate`) pair, the first being the example
          weights, and the second the predicate indicating which examples are
          included in the denominator.

      Returns:
        A (`Tensor`, set, set) tuple containing the (approximate) denominator, a
        set of `Operation`s that should be executed before each training step
        (to update the internal state upon which the denominator depends), and a
        set of `Operation`s that can be executed to re-initialize this state.

      Raises:
        TypeError: if "weights" is not floating-point.
        ValueError: if "weights" cannot be converted to a rank-1 `Tensor`.
      """
            if denominator not in self._denominators:
                weights, denominator_predicate = denominator
                weights = helpers.convert_to_1d_tensor(weights, name="weights")
                dtype = weights.dtype.base_dtype
                if not dtype.is_floating:
                    raise TypeError("weights must be floating-point")

                pre_train_ops = set()
                pre_train_ops.add(
                    tf.assert_non_negative(
                        weights, message="weights must be non-negative"))

                denominator_weights = weights * tf.cast(
                    denominator_predicate.predicate, dtype=dtype)

                # The running_average_sum variable will contain the sum of the weights
                # included in the denominator that we've seen so far, divided by the
                # number of minibatches that we've seen so far. Similarly,
                # running_average_count will contain the average size of the minibatches
                # we've seen so far. Their ratio will therefore be the sum of the
                # weights included in the denominator, divided by the number of examples
                # we've seen so far. The reason for dividing both quantities by the
                # number of minibatches is to prevent them from growing without bound
                # during training.
                #
                # We use double precision arithmetic for the running sums because we
                # don't want numerical errors to ruin our estimates if we perform a very
                # large number of iterations.
                running_dtype = tf.float64
                running_average_sum = tf.Variable(1.0,
                                                  trainable=False,
                                                  dtype=running_dtype,
                                                  name="running_average_sum")
                running_average_count = tf.Variable(
                    1.0,
                    trainable=False,
                    dtype=running_dtype,
                    name="running_average_count")

                # To restart the denominator calculations, we set the two running
                # averages to their initial values.
                restart_ops = set([
                    tf.assign(running_average_sum, 1.0),
                    tf.assign(running_average_count, 1.0)
                ])

                # We take convex combinations (with parameter running_proportion) to
                # make sure that both running_average_sum and running_average_count are
                # divided by the number of minibatches, as explained above.
                running_proportion = 1.0 / (tf.maximum(
                    tf.cast(self._global_step, dtype=running_dtype), 0.0) +
                                            1.0)
                pre_train_ops.add(
                    tf.assign(
                        running_average_sum,
                        running_average_sum * (1.0 - running_proportion) +
                        tf.cast(tf.reduce_sum(denominator_weights),
                                dtype=running_dtype) * running_proportion))
                pre_train_ops.add(
                    tf.assign(
                        running_average_count,
                        running_average_count * (1.0 - running_proportion) +
                        tf.cast(tf.size(denominator_weights),
                                dtype=running_dtype) * running_proportion))

                # This code calculates max(denominator_lower_bound, running_average_sum
                # / running_average_count) safely, even when running_average_count is
                # zero (including when running_average_sum is also zero, in which case
                # the result will be denominator_lower_bound). We use a tf.cond to make
                # sure that we only perform the division if we know that it will result
                # in a quantity larger than denominator_lower_bound.
                running_denominator_lower_bound = tf.cast(
                    self._denominator_lower_bound, dtype=running_dtype)
                average_denominator_weight = tf.cond(
                    running_average_count * running_denominator_lower_bound <
                    running_average_sum,
                    true_fn=lambda: running_average_sum /
                    running_average_count,
                    false_fn=lambda: running_denominator_lower_bound)

                self._denominators[denominator] = (average_denominator_weight,
                                                   pre_train_ops, restart_ops)

            return self._denominators[denominator]