def upper_bound(expressions):
    """Creates an `Expression` upper bounding the given expressions.

  This function introduces a slack variable, and adds constraints forcing this
  variable to upper bound all elements of the given expression list. It then
  returns the slack variable.

  If you're going to be upper-bounding or minimizing the result of this
  function, then you can think of it as taking the `max` of its arguments. You
  should *never* lower-bound or maximize the result, however, since the
  consequence would be to increase the value of the slack variable, without
  affecting the contents of the expressions list.

  Args:
    expressions: list of `Expression`s, the quantities to upper-bound.

  Returns:
    An `Expression` representing an upper bound on the given expressions.

  Raises:
    ValueError: if the expressions list is empty.
    TypeError: if the expressions list contains a non-`Expression`.
  """
    if not expressions:
        raise ValueError(
            "upper_bound cannot be given an empty expression list")
    if not all(isinstance(ee, expression.Expression) for ee in expressions):
        raise TypeError(
            "upper_bound expects a list of rate Expressions (perhaps you need to "
            "call wrap_rate() to create an Expression from a Tensor?)")

    # Ideally the slack variable would have the same dtype as the predictions, but
    # we might not know their dtype (e.g. in eager mode), so instead we always use
    # float32 with auto_cast=True.
    bound = deferred_tensor.DeferredVariable(0.0,
                                             trainable=True,
                                             name="tfco_upper_bound",
                                             dtype=tf.float32,
                                             auto_cast=True)

    bound_basic_expression = basic_expression.BasicExpression(
        [term.TensorTerm(bound)])
    bound_expression = expression.ExplicitExpression(
        penalty_expression=bound_basic_expression,
        constraint_expression=bound_basic_expression)
    extra_constraints = [ee <= bound_expression for ee in expressions]

    # We wrap the result in a BoundedExpression so that we'll check if the user
    # attempts to maximize of lower-bound the result of this function, and will
    # raise an error if they do.
    return expression.BoundedExpression(
        lower_bound=expression.InvalidExpression(
            "the result of a call to upper_bound() can only be minimized or "
            "upper-bounded; it *cannot* be maximized or lower-bounded"),
        upper_bound=expression.ConstrainedExpression(
            expression.ExplicitExpression(
                penalty_expression=bound_basic_expression,
                constraint_expression=bound_basic_expression),
            extra_constraints=extra_constraints))
示例#2
0
def lower_bound(expressions):
  """Creates an `Expression` lower bounding the given expressions.

  This function introduces a slack variable, and adds constraints forcing this
  variable to lower bound all elements of the given expression list. It then
  returns the slack variable.

  If you're going to be lower-bounding or maximizing the result of this
  function, then you can think of it as taking the `min` of its arguments. You
  should *never* upper-bound or minimize the result, however, since the
  consequence would be to decrease the value of the slack variable, without
  affecting the contents of the expressions list.

  Args:
    expressions: list of `Expression`s, the quantities to lower-bound.

  Returns:
    An `Expression` representing an lower bound on the given expressions.

  Raises:
    ValueError: if the expressions list is empty.
    TypeError: if the expressions list contains a non-`Expression`.
  """
  if not expressions:
    raise ValueError("lower_bound cannot be given an empty expression list")
  if not all(isinstance(ee, expression.Expression) for ee in expressions):
    raise TypeError(
        "lower_bound expects a list of rate Expressions (perhaps you need to "
        "call wrap_rate() to create an Expression from a Tensor?)")

  # Ideally the slack variable would have the same dtype as the predictions, but
  # we might not know their dtype (e.g. in eager mode), so instead we always use
  # float32 with auto_cast=True.
  bound = deferred_tensor.DeferredVariable(
      0.0,
      trainable=True,
      name="tfco_lower_bound",
      dtype=tf.float32,
      auto_cast=True)

  bound_basic_expression = basic_expression.BasicExpression(
      terms=[], tensor=bound)
  bound_expression = expression.Expression(
      penalty_expression=bound_basic_expression,
      constraint_expression=bound_basic_expression,
      extra_variables=[bound])
  extra_constraints = [ee >= bound_expression for ee in expressions]
  return expression.Expression(
      penalty_expression=bound_basic_expression,
      constraint_expression=bound_basic_expression,
      extra_variables=[bound],
      extra_constraints=extra_constraints)
示例#3
0
    def test_extra_variables(self):
        """Tests that `Expression`s propagate extra variables correctly."""
        def create_dummy_expression(extra_variables=None):
            """Creates an empty `Expression` with the given extra variables."""
            return expression.Expression(basic_expression.BasicExpression([]),
                                         basic_expression.BasicExpression([]),
                                         extra_variables=extra_variables)

        variable1 = deferred_tensor.DeferredVariable(2.718)
        variable2 = deferred_tensor.DeferredVariable(3.142)
        variable3 = deferred_tensor.DeferredVariable(-1.0)

        expression1 = create_dummy_expression([variable1])
        expression2 = create_dummy_expression([variable2])
        expression3 = create_dummy_expression([variable3])

        expression12 = expression1 * 0.5 + expression2
        expression23 = expression2 - expression3 / 1.3
        expression123 = -expression12 + 0.6 * expression23

        self.assertEqual(expression12.extra_variables, [variable1, variable2])
        self.assertEqual(expression23.extra_variables, [variable2, variable3])
        self.assertEqual(expression123.extra_variables,
                         [variable1, variable2, variable3])
    def test_deferred_variable(self):
        """Tests that `DeferredVariable`s are created correctly."""
        structure_memoizer = {
            defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0,
            defaults.GLOBAL_STEP_KEY: tf.compat.v2.Variable(0, dtype=tf.int32)
        }

        variable = deferred_tensor.DeferredVariable(42, dtype=tf.int32)

        # We should raise if we try to read a variable that hasn't been created.
        with self.assertRaises(RuntimeError):
            _ = variable(structure_memoizer)

        variable.create(structure_memoizer)

        # We should raise if we try to create the same variable a second time.
        with self.assertRaises(RuntimeError):
            variable.create(structure_memoizer)

        with self.wrapped_session() as session:
            self.assertAllEqual(42, session.run(variable(structure_memoizer)))
示例#5
0
    def test_variables(self):
        """Tests that `Expression`s propagate extra variables correctly."""
        memoizer = {
            defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0,
            defaults.GLOBAL_STEP_KEY: tf.compat.v2.Variable(0, dtype=tf.int32)
        }

        def create_dummy_expression(penalty_variable, constraint_variable):
            """Creates an empty `Expression` from the given extra variables."""
            return expression.ExplicitExpression(
                basic_expression.BasicExpression(
                    [term.TensorTerm(penalty_variable)]),
                basic_expression.BasicExpression(
                    [term.TensorTerm(constraint_variable)]))

        penalty_variable1 = deferred_tensor.DeferredVariable(1)
        penalty_variable2 = deferred_tensor.DeferredVariable(2)
        penalty_variable3 = deferred_tensor.DeferredVariable(3)

        constraint_variable1 = deferred_tensor.DeferredVariable(-1)
        constraint_variable2 = deferred_tensor.DeferredVariable(-2)
        constraint_variable3 = deferred_tensor.DeferredVariable(-3)

        expression1 = create_dummy_expression(penalty_variable1,
                                              constraint_variable1)
        expression2 = create_dummy_expression(penalty_variable2,
                                              constraint_variable2)
        expression3 = create_dummy_expression(penalty_variable3,
                                              constraint_variable3)

        expression12 = expression1 * 0.5 + expression2
        expression23 = expression2 - expression3 / 1.3
        expression123 = -expression12 + 0.6 * expression23

        expression12_penalty_value = (
            expression12.penalty_expression.evaluate(memoizer))
        expression23_penalty_value = (
            expression23.penalty_expression.evaluate(memoizer))
        expression123_penalty_value = (
            expression123.penalty_expression.evaluate(memoizer))

        expression12_constraint_value = (
            expression12.constraint_expression.evaluate(memoizer))
        expression23_constraint_value = (
            expression23.constraint_expression.evaluate(memoizer))
        expression123_constraint_value = (
            expression123.constraint_expression.evaluate(memoizer))

        self.assertEqual(expression12_penalty_value.variables,
                         [penalty_variable1, penalty_variable2])
        self.assertEqual(expression23_penalty_value.variables,
                         [penalty_variable2, penalty_variable3])
        self.assertEqual(
            expression123_penalty_value.variables,
            [penalty_variable1, penalty_variable2, penalty_variable3])

        self.assertEqual(expression12_constraint_value.variables,
                         [constraint_variable1, constraint_variable2])
        self.assertEqual(expression23_constraint_value.variables,
                         [constraint_variable2, constraint_variable3])
        self.assertEqual(
            expression123_constraint_value.variables,
            [constraint_variable1, constraint_variable2, constraint_variable3])
    def _evaluate_denominator(self, denominator, memoizer):
        """Evaluates the denominator portion of a ratio.

    Recall that a `_RatioWeights` object is responsible for computing:
      ratio_weights[j] = weights[j] 1{j in numerator_subset}
          / (mean_i weights[i] 1{i in denominator_subset})
    This method returns (an approximation of) the denominator portion of this
    ratio. The numerator is calculated in the `_RatioWeights`.evaluate method.

    The implementation is complicated by the fact that, although the
    denominators of our ratios should evaluate to "the average weight of the
    examples included in the ratio's denominator", we don't have access to the
    entire dataset (instead, we will typically just get a sequence of
    minibatches). Hence, we can't compute the average weight across the entire
    dataset directly. Instead, we keep running sums of the total weight of
    examples included in the denominator, and the number of examples seen, and
    update them before each minibatch (in the update_ops associated with the
    running sum variables).

    Args:
      denominator: (`DeferredTensor`, `Predicate`) pair, the first being the
        example weights, and the second the predicate indicating which examples
        are included in the denominator.
      memoizer: dict, which memoizes portions of the calculation to simplify the
        resulting TensorFlow graph. It must contain the keys
        "denominator_lower_bound" and "global_step", with the corresponding
        values being the minimum allowed value of a rate denominator (a float),
        and the current iterate (a non-negative integer, starting at zero),
        respectively.

    Returns:
      A `DeferredTensor` containing the (approximate) denominator.
    """
        key = (_RatioWeights, denominator)
        if key not in memoizer:
            # We use double precision arithmetic for the running sums because we
            # don't want numerical errors to ruin our estimates if we perform a very
            # large number of iterations.
            running_dtype = tf.float64

            def update_ops_fn(running_averages_variable, memoizer):
                """Updates the running sums before each call to the train_op."""
                weights, denominator_predicate = denominator
                weights = helpers.convert_to_1d_tensor(weights(memoizer),
                                                       name="weights")
                dtype = weights.dtype.base_dtype
                if not dtype.is_floating:
                    raise TypeError("weights must be floating-point")

                update_ops = []
                update_ops.append(
                    tf.debugging.assert_non_negative(
                        weights, message="weights must be non-negative"))

                denominator_weights = weights * tf.cast(
                    denominator_predicate.tensor(memoizer), dtype=dtype)

                # We take convex combinations (with parameter running_proportion) to
                # make sure that both running_average_sum and running_average_count
                # are divided by the number of minibatches, as explained below.
                running_proportion = 1.0 / (tf.maximum(
                    tf.cast(memoizer[defaults.GLOBAL_STEP_KEY],
                            dtype=running_dtype), 0.0) + 1.0)
                running_average_sum = (
                    running_averages_variable[0] * (1.0 - running_proportion) +
                    tf.cast(tf.reduce_sum(denominator_weights),
                            dtype=running_dtype) * running_proportion)
                running_average_count = (
                    running_averages_variable[1] * (1.0 - running_proportion) +
                    tf.cast(tf.size(denominator_weights),
                            dtype=running_dtype) * running_proportion)

                update_ops.append(
                    running_averages_variable.assign(
                        [running_average_sum, running_average_count]))

                return update_ops

            # The first element of the running_averages variable will contain the sum
            # of the weights included in the denominator that we've seen so far,
            # divided by the number of minibatches that we've seen so far. Similarly,
            # the second element will contain the average size of the minibatches
            # we've seen so far. Their ratio will therefore be the sum of the weights
            # included in the denominator, divided by the number of examples we've
            # seen so far. The reason for dividing both quantities by the number of
            # minibatches is to prevent them from growing without bound during
            # training.
            running_averages = deferred_tensor.DeferredVariable(
                [1.0, 1.0],
                trainable=False,
                name="tfco_running_average_sum_and_count",
                dtype=running_dtype,
                update_ops_fn=update_ops_fn)

            def average_denominator_weight_fn(running_averages_variable):
                """Returns the average denominator weight `Tensor`."""
                # This code calculates max(denominator_lower_bound, running_average_sum
                # / running_average_count) safely, even when running_average_count is
                # zero (including when running_average_sum is also zero, in which case
                # the result will be denominator_lower_bound). We use a tf.cond to make
                # sure that we only perform the division if we know that it will result
                # in a quantity larger than denominator_lower_bound.
                running_denominator_lower_bound = tf.cast(
                    memoizer[defaults.DENOMINATOR_LOWER_BOUND_KEY],
                    dtype=running_dtype)
                running_average_sum = running_averages_variable[0]
                running_average_count = running_averages_variable[1]
                return tf.cond(
                    running_average_count * running_denominator_lower_bound <
                    running_average_sum,
                    true_fn=lambda: running_average_sum /
                    running_average_count,
                    false_fn=lambda: running_denominator_lower_bound)

            memoizer[key] = deferred_tensor.DeferredTensor.apply(
                average_denominator_weight_fn, running_averages)

        return memoizer[key]