def evaluate(self, memoizer): """Computes and returns the value of this `BasicExpression`. Args: memoizer: dict, which memoizes portions of the calculation to simplify the resulting TensorFlow graph. It must contain the keys "denominator_lower_bound" and "global_step", with the corresponding values being the minimum allowed value of a rate denominator (a python float), and the current iterate (starting at zero), respectively. Returns A (`DeferredTensor`, list) pair containing (i) the value of this `BasicExpression`, and (ii) a list of `DeferredVariable`s containing the internal state upon which the `BasicExpression` evaluation depends. Returns: A (`DeferredTensor`, list) pair containing (i) the value of this `BasicExpression`, and (ii) a list of `DeferredVariable`s containing the internal state upon which the `BasicExpression` evaluation depends. """ values = [self._tensor] variables = deferred_tensor.DeferredVariableList() for tt in self._terms: term_value, term_variables = tt.evaluate(memoizer) values.append(term_value) variables += term_variables # We create a list of values, and sum them all-at-once (instead of adding # them one-by-one inside the above loop) to limit how deeply the closures # inside the DeferredTensor will be nested. return deferred_tensor.DeferredTensor.apply(lambda *args: sum(args), *values), variables.list
def test_arithmetic(self): """Tests `Expression`'s arithmetic operators.""" memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0, defaults.GLOBAL_STEP_KEY: tf.compat.v2.Variable(0, dtype=tf.int32) } penalty_values = [-3.6, 1.5, 0.4] constraint_values = [-0.2, -0.5, 2.3] # Create three expressions containing the constants in "penalty_values" in # their penalty_expressions, and "constraint_values" in their # constraint_expressions. expression_objects = [] for penalty_value, constraint_value in zip(penalty_values, constraint_values): expression_object = expression.Expression( basic_expression.BasicExpression( [], deferred_tensor.DeferredTensor( tf.constant(penalty_value, dtype=tf.float32))), basic_expression.BasicExpression( [], deferred_tensor.DeferredTensor( tf.constant(constraint_value)))) expression_objects.append(expression_object) # This expression exercises all of the operators. expression_object = ( 0.3 - (expression_objects[0] / 2.3 + 0.7 * expression_objects[1]) - (1.2 + expression_objects[2] - 0.1) * 0.6 + 0.8) actual_penalty_value, penalty_variables = ( expression_object.penalty_expression.evaluate(memoizer)) actual_constraint_value, constraint_variables = ( expression_object.constraint_expression.evaluate(memoizer)) # We need to explicitly create the variables before creating the wrapped # session. variables = deferred_tensor.DeferredVariableList(penalty_variables + constraint_variables) for variable in variables: variable.create(memoizer) # This is the same expression as above, applied directly to the python # floats. expected_penalty_value = ( 0.3 - (penalty_values[0] / 2.3 + 0.7 * penalty_values[1]) - (1.2 + penalty_values[2] - 0.1) * 0.6 + 0.8) expected_constraint_value = ( 0.3 - (constraint_values[0] / 2.3 + 0.7 * constraint_values[1]) - (1.2 + constraint_values[2] - 0.1) * 0.6 + 0.8) with self.wrapped_session() as session: self.assertNear(expected_penalty_value, session.run(actual_penalty_value(memoizer)), err=1e-6) self.assertNear(expected_constraint_value, session.run(actual_constraint_value(memoizer)), err=1e-6)
def test_arithmetic(self): """Tests `Expression`'s arithmetic operators.""" structure_memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0, defaults.GLOBAL_STEP_KEY: tf.Variable(0, dtype=tf.int32), defaults.VARIABLE_FN_KEY: tf.Variable } def constant_expression(penalty_constant, constraint_constant=None): penalty_basic_expression = basic_expression.BasicExpression([ term.TensorTerm(tf.constant(penalty_constant, dtype=tf.float32)) ]) if constraint_constant is None: constraint_basic_expression = penalty_basic_expression else: constraint_basic_expression = basic_expression.BasicExpression( [ term.TensorTerm( tf.constant(constraint_constant, dtype=tf.float32)) ]) return expression.ExplicitExpression(penalty_basic_expression, constraint_basic_expression) # This expression exercises all of the operators. expression_object = ( constant_expression(0.3) - (constant_expression(-3.6, -0.2) / 2.3 + 0.7 * constant_expression(1.5, -0.5)) - (constant_expression(1.2) + constant_expression(0.4, 2.3) - constant_expression(0.1)) * 0.6 + constant_expression(0.8)) actual_penalty_value = expression_object.penalty_expression.evaluate( structure_memoizer) actual_constraint_value = expression_object.constraint_expression.evaluate( structure_memoizer) # We need to explicitly create the variables before creating the wrapped # session. variables = deferred_tensor.DeferredVariableList( actual_penalty_value.variables + actual_constraint_value.variables) for variable in variables: variable.create(structure_memoizer) # This is the same expression as above, applied directly to the python # floats. expected_penalty_value = (0.3 - (-3.6 / 2.3 + 0.7 * 1.5) - (1.2 + 0.4 - 0.1) * 0.6 + 0.8) expected_constraint_value = (0.3 - (-0.2 / 2.3 + 0.7 * -0.5) - (1.2 + 2.3 - 0.1) * 0.6 + 0.8) with self.wrapped_session() as session: self.assertNear(expected_penalty_value, session.run( actual_penalty_value(structure_memoizer)), err=1e-6) self.assertNear(expected_constraint_value, session.run( actual_constraint_value(structure_memoizer)), err=1e-6)
def __init__(self, penalty_expression, constraint_expression, extra_variables=None, extra_constraints=None): """Creates a new `Expression`. An `Expression` represents a quantity that will be minimized/maximized or constrained. Internally, it's actually represented as *two* `BasicExpression`s, one of which--the "penalty" portion--is used when the expression is being minimized (in the objective function) or penalized (to satisfy a constraint), and the second of which--the "constraint" portion--is used when the expression is being constrained. These two `BasicExpression`s are the first two parameters of this function. The third parameter--"extra_variables"--should contain any `DeferredVariable`s that are used (perhaps even indirectly) by this `Expression`. This is most commonly used for slack variables. The fourth parameter--"extra_constraints"--is used to specify additional constraints that should be added to any optimization problem involving this `Expression`. Technically, these can be anything: they're simply additional constraints, which may or may not have anything to do with the `Expression` to which they're attached. In practice, they'll usually represent conditions that are required for the associated `Expression` to make sense. For example, we could construct an `Expression` representing "the false positive rate of f(x) at the threshold for which the true positive rate is at least 0.9" with the expression being "the false positive rate of f(x) - t", and the extra constraint being "the true positive rate of f(x) - t is at least 0.9", where "t" is an implicit threshold. These extra constraints will ultimately be included in any optimization problem that includes the associated `Expression` (or an `Expression` derived from it). Args: penalty_expression: `BasicExpression` that will be used for the "penalty" portion of the optimization (i.e. when optimizing the model parameters). It should be {sub,semi}differentiable. constraint_expression: `BasicExpression` that will be used for the "constraint" portion of the optimization (i.e. when optimizing the constraints). It does not need to be {sub,semi}differentiable. extra_variables: optional collection of `DeferredVariable`s upon which this `Expression` depends. extra_constraints: optional collection of `Constraint`s required by this `Expression`. """ self._penalty_expression = penalty_expression self._constraint_expression = constraint_expression self._extra_variables = deferred_tensor.DeferredVariableList( extra_variables) self._extra_constraints = constraint.ConstraintList(extra_constraints)
def evaluate(self, memoizer): """Computes and returns the value of this `BinaryClassificationTerm`. Args: memoizer: dict, which memoizes portions of the calculation to simplify the resulting TensorFlow graph. It must contain the keys "denominator_lower_bound" and "global_step", with the corresponding values being the minimum allowed value of a rate denominator (a python float), and the current iterate (starting at zero), respectively. Returns: A (`DeferredTensor`, list) pair containing (i) the value of this `BinaryClassificationTerm`, and (ii) a list of `DeferredVariable`s containing the internal state upon which the `BinaryClassificationTerm` evaluation depends. """ variables = deferred_tensor.DeferredVariableList() # Evalaute the weights on the positive and negative approximate indicators. positive_weights, positive_variables = ( self._positive_ratio_weights.evaluate(memoizer)) negative_weights, negative_variables = ( self._negative_ratio_weights.evaluate(memoizer)) variables += positive_variables variables += negative_variables def average_loss_fn(positive_weights_value, negative_weights_value, predictions_value): """Returns the average loss.""" # Use broadcasting to make the positive and negative weights Tensors have # the same shape (yes, this is inelegant). The _RatioWeights object has # already checked that they're both rank-1, so this code just makes sure # that they're the same size before attempting to stack them. positive_weights_value += tf.zeros_like(negative_weights_value) negative_weights_value += tf.zeros_like(positive_weights_value) weights = tf.stack( [positive_weights_value, negative_weights_value], axis=1) losses = self._loss.evaluate_binary_classification( predictions_value, weights) return tf.reduce_mean(losses) return deferred_tensor.DeferredTensor.apply( average_loss_fn, positive_weights, negative_weights, self._predictions), variables.list
def _evaluate_expression(self, expression, extra_update_ops_fn=None): """Evaluates and returns both portions of an Expression. Args: expression: `Expression` to evaluate. extra_update_ops_fn: function that takes an `EvaluationMemoizer` and the list of `DeferredVariables`, and returns a list of ops to execute before evaluation. Returns: A pair (penalty,constraint) containing the values of the penalty and constraint portions of the `Expression`. """ structure_memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0, defaults.GLOBAL_STEP_KEY: tf.Variable(0, dtype=tf.int32), defaults.VARIABLE_FN_KEY: tf.Variable } penalty_value = expression.penalty_expression.evaluate( structure_memoizer) constraint_value = expression.constraint_expression.evaluate( structure_memoizer) # We need to explicitly create the variables before creating the wrapped # session. variables = deferred_tensor.DeferredVariableList( penalty_value.variables + constraint_value.variables).list for variable in variables: variable.create(structure_memoizer) def update_ops_fn(): if not extra_update_ops_fn: update_ops = [] else: update_ops = extra_update_ops_fn(structure_memoizer, variables) for variable in variables: update_ops += variable.update_ops(structure_memoizer) return update_ops with self.wrapped_session() as session: session.run_ops(update_ops_fn) return [ session.run(penalty_value(structure_memoizer)), session.run(constraint_value(structure_memoizer)) ]
def _check_rates(self, expected_penalty_value, expected_constraint_value, actual_expression): structure_memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0, defaults.GLOBAL_STEP_KEY: tf.Variable(0, dtype=tf.int32), defaults.VARIABLE_FN_KEY: tf.Variable } actual_penalty_value = actual_expression.penalty_expression.evaluate( structure_memoizer) actual_constraint_value = actual_expression.constraint_expression.evaluate( structure_memoizer) # We need to explicitly create the variables before creating the wrapped # session. variables = deferred_tensor.DeferredVariableList( actual_penalty_value.variables + actual_constraint_value.variables).list for variable in variables: variable.create(structure_memoizer) def update_ops_fn(): update_ops = [] for variable in variables: update_ops += variable.update_ops(structure_memoizer) return update_ops with self.wrapped_session() as session: # We only need to run the update ops once, since the entire dataset is # contained within the Tensors, so the denominators will be correct. session.run_ops(update_ops_fn) self.assertAllClose(expected_penalty_value, session.run( actual_penalty_value(structure_memoizer)), rtol=0, atol=1e-6) self.assertAllClose( expected_constraint_value, session.run(actual_constraint_value(structure_memoizer)), rtol=0, atol=1e-6)
def evaluate(self, memoizer): """Computes and returns the `Tensor` of ratio weights. Args: memoizer: dict, which memoizes portions of the calculation to simplify the resulting TensorFlow graph. It must contain the keys "denominator_lower_bound" and "global_step", with the corresponding values being the minimum allowed value of a rate denominator (a python float), and the current iterate (starting at zero), respectively. Returns: A (`DeferredTensor`, list) pair containing (i) the weights associated with each example, and (ii) a list of `DeferredVariable`s containing the internal state upon which the `_RatioWeights` evaluation depends. """ ratios = [] variables = deferred_tensor.DeferredVariableList() for denominator_key, numerator in six.iteritems(self._ratios): denominator, denominator_variables = self._evaluate_denominator( denominator_key, memoizer) def ratio_fn(numerator_value, denominator_value): """Returns the value of the current ratio as a `Tensor`.""" dtype = numerator_value.dtype.base_dtype return numerator_value / tf.cast(denominator_value, dtype=dtype) ratios.append( deferred_tensor.DeferredTensor.apply(ratio_fn, numerator, denominator)) variables += denominator_variables # It's probably paranoid to call stop_gradient on the ratio weights, but # it shouldn't do any harm, and might prevent failure if someone's doing # something weird. value = deferred_tensor.DeferredTensor.apply( lambda *args: tf.stop_gradient(sum(args, (0.0, ))), *ratios) return value, variables.list
def add_dependencies(self, extra_variables=None, extra_constraints=None): """Returns a new `Expression` with extra dependencies. The resulting `Expression` will depend on the same variables and constraints as this `Expression`, but will *also* depend on those included in the extra_variables and extra_constraints parameters to this method. Notice that this method does *not* change `self`: instead, it returns a *new* `Expression` that includes the extra dependencies. Args: extra_variables: optional collection of `DeferredVariable`s to add to the list of variables upon which the resulting `Expression` depends. extra_constraints: optional collection of `Constraint`s to add to the list of constraints required by the resulting `Expression`. """ extra_variables = deferred_tensor.DeferredVariableList(extra_variables) extra_constraints = constraint.ConstraintList(extra_constraints) return Expression( self._penalty_expression, self._constraint_expression, extra_variables=self._extra_variables + extra_variables, extra_constraints=self._extra_constraints + extra_constraints)
def test_precision(self): """Checks `precision`.""" bisection_epsilon = 1e-6 structure_memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0, defaults.GLOBAL_STEP_KEY: tf.Variable(0, dtype=tf.int32), defaults.VARIABLE_FN_KEY: tf.Variable } expression = general_rates.precision(self._context) # Extract the the constraints and the associated variables. constraint_list = [] variables = deferred_tensor.DeferredVariableList() for constraint in expression.extra_constraints: constraint_value = constraint.expression.constraint_expression.evaluate( structure_memoizer) constraint_list.append(constraint_value) variables += constraint_value.variables variables = variables.list self.assertEqual(2, len(constraint_list)) constraints = deferred_tensor.DeferredTensor.apply( lambda *args: tf.stack(args), *constraint_list) # We need to explicitly create all variables included in the expression # before we can try to extract the ratio_bounds. for variable in variables: variable.create(structure_memoizer) # The find_zeros_of_functions() helper will perform a bisection search over # the ratio_bounds, so we need to extract the Tensor containing them from # the graph. ratio_bounds = None for variable in variables: tensor = variable(structure_memoizer) if tensor.name.startswith("tfco_ratio_bounds"): self.assertIsNone(ratio_bounds) ratio_bounds = tensor self.assertIsNotNone(ratio_bounds) def update_ops_fn(): update_ops = [] for variable in variables: update_ops += variable.update_ops(structure_memoizer) return update_ops with self.wrapped_session() as session: session.run_ops(update_ops_fn) def evaluate_fn(values): """Assigns the variables and evaluates the constraints.""" session.run_ops(lambda: ratio_bounds.assign(values)) return session.run(constraints(structure_memoizer)) actual_ratio_bounds = test_helpers.find_zeros_of_functions( 2, evaluate_fn, epsilon=bisection_epsilon) actual_numerator = actual_ratio_bounds[0] actual_denominator = actual_ratio_bounds[1] expected_numerator = (np.sum( (0.5 * (1.0 + np.sign(self._predictions))) * (self._labels > 0.0) * self._weights) / np.sum(self._weights)) expected_denominator = (np.sum( (0.5 * (1.0 + np.sign(self._predictions))) * self._weights) / np.sum(self._weights)) self.assertAllClose(expected_numerator, actual_numerator, rtol=0, atol=bisection_epsilon) self.assertAllClose(expected_denominator, actual_denominator, rtol=0, atol=bisection_epsilon)
def test_roc_auc(self): """Tests that roc_auc's constraints give correct thresholds.""" # We don't check roc_auc_upper_bound since most of the code is shared, and # the test is too slow already. bins = 3 bisection_epsilon = 1e-6 memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0, defaults.GLOBAL_STEP_KEY: tf.compat.v2.Variable(0, dtype=tf.int32) } expression = binary_rates.roc_auc(self._context, bins) # Extract the the constraints and the associated variables. constraint_list = [] variables = deferred_tensor.DeferredVariableList() for constraint in expression.extra_constraints: constraint_value = constraint.expression.constraint_expression.evaluate( memoizer) constraint_list.append(constraint_value) variables += constraint_value.variables variables = variables.list self.assertEqual(bins, len(constraint_list)) constraints = deferred_tensor.DeferredTensor.apply( lambda *args: tf.stack(args), *constraint_list) # We need to explicitly create all variables included in the expression # before we can try to extract the roc_auc_thresholds. for variable in variables: variable.create(memoizer) # The find_zeros_of_functions() helper will perform a bisection search over # the roc_auc_thresholds, so we need to extract the Tensor containing them # from the graph. roc_auc_thresholds = None for variable in variables: tensor = variable(memoizer) if tensor.name.startswith("tfco_roc_auc_thresholds"): self.assertIsNone(roc_auc_thresholds) roc_auc_thresholds = tensor self.assertIsNotNone(roc_auc_thresholds) def update_ops_fn(): update_ops = [] for variable in variables: update_ops += variable.update_ops(memoizer) return update_ops with self.wrapped_session() as session: session.run_ops(update_ops_fn) def evaluate_fn(values): """Assigns the variables and evaluates the constraints.""" session.run_ops(lambda: roc_auc_thresholds.assign(values)) return session.run(constraints(memoizer)) actual_thresholds = find_zeros_of_functions( bins, evaluate_fn, epsilon=bisection_epsilon) expected_thresholds = self._find_roc_auc_thresholds(bins) self.assertAllClose(expected_thresholds, actual_thresholds, rtol=0, atol=bisection_epsilon)
def test_f_score_upper_bound(self): """Checks that `f_score_upper_bound` calculates the right quantities.""" # We don't check f_score_lower_bound since most of the code is shared, and # the test is too slow already. beta = 1.6 bisection_epsilon = 1e-6 memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0, defaults.GLOBAL_STEP_KEY: tf.compat.v2.Variable(0, dtype=tf.int32) } expression = binary_rates.f_score_upper_bound(self._split_context, beta) # Extract the the constraints and the associated variables. constraint_list = [] variables = deferred_tensor.DeferredVariableList(expression.extra_variables) for constraint in expression.extra_constraints: constraint_value, constraint_variables = ( constraint.expression.constraint_expression.evaluate(memoizer)) constraint_list.append(constraint_value) variables += constraint_variables variables += constraint.expression.extra_variables variables = variables.list self.assertEqual(2, len(constraint_list)) constraints = deferred_tensor.DeferredTensor.apply( lambda *args: tf.stack(args), *constraint_list) # We need to explicitly create all variables included in the expression # before we can try to extract the ratio_bounds. for variable in variables: variable.create(memoizer) # The find_zeros_of_functions() helper will perform a bisection search over # the ratio_bounds, so we need to extract the Tensor containing them from # the graph. ratio_bounds = None for variable in variables: tensor = variable(memoizer) if tensor.name.startswith("tfco_ratio_bounds"): self.assertIsNone(ratio_bounds) ratio_bounds = tensor self.assertIsNotNone(ratio_bounds) def update_ops_fn(): update_ops = [] for variable in variables: update_ops += variable.update_ops(memoizer) return update_ops with self.wrapped_session() as session: session.run_ops(update_ops_fn) def evaluate_fn(values): """Assigns the variables and evaluates the constraints.""" session.run_ops(lambda: ratio_bounds.assign(values)) return session.run(constraints(memoizer)) actual_ratio_bounds = find_zeros_of_functions( 2, evaluate_fn, epsilon=bisection_epsilon) actual_numerator = actual_ratio_bounds[0] actual_denominator = actual_ratio_bounds[1] expected_numerator = ( (1.0 + beta * beta) * np.sum( (0.5 * (1.0 + np.sign(self._constraint_predictions))) * (self._constraint_labels > 0.0) * self._constraint_weights * self._constraint_predicate) / np.sum(self._constraint_weights * self._constraint_predicate)) expected_denominator = (((1.0 + beta * beta) * np.sum( (0.5 * (1.0 + np.sign(self._constraint_predictions))) * (self._constraint_labels > 0.0) * self._constraint_weights * self._constraint_predicate) + (beta * beta) * np.sum( (0.5 * (1.0 - np.sign(self._constraint_predictions))) * (self._constraint_labels > 0.0) * self._constraint_weights * self._constraint_predicate) + np.sum( (0.5 * (1.0 + np.sign(self._constraint_predictions))) * (self._constraint_labels <= 0.0) * self._constraint_weights * self._constraint_predicate)) / np.sum( self._constraint_weights * self._constraint_predicate)) self.assertAllClose( expected_numerator, actual_numerator, rtol=0, atol=bisection_epsilon) self.assertAllClose( expected_denominator, actual_denominator, rtol=0, atol=bisection_epsilon)
def __init__(self, objective, constraints=None, denominator_lower_bound=1e-3): """Creates a rate constrained optimization problem. In addition to an objective function to minimize and a list of constraints to satisfy, this method also takes a "denominator_lower_bound" parameter. At a high level, a rate is "the proportion of training examples satisfying some property for which some event occurs, divided by the proportion of training examples satisfying the property", i.e. is a numerator divided by a denominator. To avoid dividing by zero (or quantities that are close to zero), the "denomintor_lower_bound" parameter is used to impose a lower bound on the denominator of a rate. However, this parameter is a last resort. If you're calculating a rate on a small subset of the data (i.e. with a property that is rately true, resulting in a small denominator), then the speed of optimization could suffer greatly: you'd almost certainly be better off splitting the subset of interest off into its own dataset, with its own placeholder tensors. Args: objective: an `Expression` to minimize. constraints: a collection of `Constraint`s to impose. denominator_lower_bound: float, the smallest permitted value of the denominator of a rate. Raises: ValueError: if the "penalty" portion of the objective or a constraint is non-differentiable, or if denominator_lower_bound is negative. """ # We do permit denominator_lower_bound to be zero. In this case, division by # zero is possible, and it's the user's responsibility to ensure that it # doesn't happen. if denominator_lower_bound < 0.0: raise ValueError("denominator lower bound must be non-negative") # The objective needs to be differentiable. So do the penalty portions of # the constraints, but we'll check those later. if not objective.penalty_expression.is_differentiable: raise ValueError( "non-differentiable losses (e.g. the zero-one loss) " "cannot be optimized--they can only be constrained") variables = deferred_tensor.DeferredVariableList() constraints = constraint.ConstraintList(constraints) # We make our own global_step, for keeping track of the denominators. We # don't take one as a parameter since we want complete ownership, to avoid # any shenanigans: it has to start at zero, and be incremented after every # minibatch. self._global_step = tf.compat.v2.Variable( 0, trainable=False, name="tfco_global_step", dtype=tf.int64, aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) # This memoizer will remember and re-use certain intermediate values, # causing the TensorFlow graph we construct to contain fewer redundancies # than it would otherwise. Additionally, it will store any slack variables # or denominator variables that need to be created for the optimization # problem. self._memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: denominator_lower_bound, defaults.GLOBAL_STEP_KEY: self._global_step } # We ignore the "constraint_expression" field here, since we're not inside a # constraint (this is the objective function). self._objective, objective_variables = ( objective.penalty_expression.evaluate(self._memoizer)) variables += objective_variables variables += objective.extra_variables constraints += objective.extra_constraints # Evaluating expressions can result in extra constraints being introduced, # so we keep track of the number of constraints that we've already evaluated # in "checked_constraints", append new constraints to "constraints" (which # will automatically ignore attempts to add duplicates, since it's a # ConstraintList), and repeatedly check the newly-added constraints until # none are left. # # In light of the fact that constraints can depend on other constraints, we # can view the structure of constraints as a tree, in which case this code # will enumerate over the constraints in breadth-first order. self._proxy_constraints = [] self._constraints = [] checked_constraints = 0 while len(constraints) > checked_constraints: new_constraints = constraints[checked_constraints:] checked_constraints = len(constraints) for new_constraint in new_constraints: if not new_constraint.expression.penalty_expression.is_differentiable: raise ValueError( "non-differentiable losses (e.g. the zero-one loss) " "cannot be optimized--they can only be constrained") penalty_value, penalty_variables = ( new_constraint.expression.penalty_expression.evaluate( self._memoizer)) constraint_value, constraint_variables = ( new_constraint.expression.constraint_expression.evaluate( self._memoizer)) self._proxy_constraints.append(penalty_value) self._constraints.append(constraint_value) variables += penalty_variables variables += constraint_variables variables += new_constraint.expression.extra_variables constraints += new_constraint.expression.extra_constraints # Explicitly create all of the variables. This also functions as a sanity # check: before this point, no variable should have been accessed # directly, and since their storage didn't exist yet, they couldn't have # been. self._variables = variables.list for variable in self._variables: variable.create(self._memoizer)
def test_arithmetic(self): """Tests `BasicExpression`'s arithmetic operators.""" structure_memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0, defaults.GLOBAL_STEP_KEY: tf.compat.v2.Variable(0, dtype=tf.int32) } dummy_predictions = deferred_tensor.ExplicitDeferredTensor( tf.constant(0, dtype=tf.float32, shape=(1, ))) dummy_weights = deferred_tensor.ExplicitDeferredTensor(1.0) true_predicate = predicate.Predicate(True) def ratio_expression(positive_coefficient, negative_coefficient, loss_function): term_object = term.BinaryClassificationTerm.ratio( positive_coefficient, negative_coefficient, dummy_predictions, dummy_weights, true_predicate, true_predicate, loss_function) return basic_expression.BasicExpression([term_object]) def constant_expression(constant): return basic_expression.BasicExpression( [term.TensorTerm(tf.constant(constant, dtype=tf.float32))]) # This expression exercises all of the operators. The first and third # ratio_expression()s will have the same losses (and everything else except # the coefficients), and will therefore be compatible. The second has a # different loss, and will be incompatible with the other two. expression_object = ( constant_expression(0.3) - (ratio_expression(1.0, 0.0, loss.ZeroOneLoss()) / 2.3 + 0.7 * ratio_expression(0.5, 0.5, loss.HingeLoss())) + (constant_expression(1.2) + ratio_expression( 0.0, 1.0, loss.ZeroOneLoss()) - constant_expression(0.1)) * 0.6 + constant_expression(0.8)) expected_constant = 0.3 + (1.2 - 0.1) * 0.6 + 0.8 coefficients = np.array([-1.0 / 2.3, -0.7, 0.6], dtype=np.float32) positive_coefficients = np.array([1.0, 0.5, 0.0], dtype=np.float32) * coefficients negative_coefficients = np.array([0.0, 0.5, 1.0], dtype=np.float32) * coefficients # The expected weights for the two zero-one terms will be merged, since # they're compatible. There is only one hinge term. expected_zero_one_positive_weights = (positive_coefficients[0] + positive_coefficients[2]) expected_zero_one_negative_weights = (negative_coefficients[0] + negative_coefficients[2]) expected_hinge_positive_weights = positive_coefficients[1] expected_hinge_negative_weights = negative_coefficients[1] # We should have three terms, since the two compatible # BinaryClassificationTerms will be merged, and we'll have one TensorTerm. expression_terms = expression_object._terms expression_binary_classification_terms = [ tt for tt in expression_terms if isinstance(tt, term.BinaryClassificationTerm) ] expression_tensor_terms = [ tt for tt in expression_terms if isinstance(tt, term.TensorTerm) ] self.assertEqual(3, len(expression_terms)) self.assertEqual(2, len(expression_binary_classification_terms)) self.assertEqual(1, len(expression_tensor_terms)) zero_one_term, hinge_term = expression_binary_classification_terms if zero_one_term.loss != loss.ZeroOneLoss(): zero_one_term, hinge_term = hinge_term, zero_one_term self.assertEqual(zero_one_term.loss, loss.ZeroOneLoss()) self.assertEqual(hinge_term.loss, loss.HingeLoss()) actual_constant = expression_tensor_terms[0].evaluate( structure_memoizer) actual_zero_one_positive_weights = ( zero_one_term.positive_ratio_weights.evaluate(structure_memoizer)) actual_zero_one_negative_weights = ( zero_one_term.negative_ratio_weights.evaluate(structure_memoizer)) actual_hinge_positive_weights = ( hinge_term.positive_ratio_weights.evaluate(structure_memoizer)) actual_hinge_negative_weights = ( hinge_term.negative_ratio_weights.evaluate(structure_memoizer)) # We need to explicitly create the variables before creating the wrapped # session. variables = deferred_tensor.DeferredVariableList( actual_constant.variables + actual_zero_one_positive_weights.variables + actual_zero_one_negative_weights.variables + actual_hinge_positive_weights.variables + actual_hinge_negative_weights.variables) for variable in variables: variable.create(structure_memoizer) with self.wrapped_session() as session: self.assertAllClose(expected_constant, actual_constant(structure_memoizer), rtol=0, atol=1e-6) self.assertAllClose( np.array([expected_zero_one_positive_weights]), session.run( actual_zero_one_positive_weights(structure_memoizer)), rtol=0, atol=1e-6) self.assertAllClose( np.array([expected_zero_one_negative_weights]), session.run( actual_zero_one_negative_weights(structure_memoizer)), rtol=0, atol=1e-6) self.assertAllClose( np.array([expected_hinge_positive_weights]), session.run(actual_hinge_positive_weights(structure_memoizer)), rtol=0, atol=1e-6) self.assertAllClose( np.array([expected_hinge_negative_weights]), session.run(actual_hinge_negative_weights(structure_memoizer)), rtol=0, atol=1e-6)
def test_f_score(self): """Checks `f_score`.""" beta = 1.6 bisection_epsilon = 1e-6 structure_memoizer = { defaults.DENOMINATOR_LOWER_BOUND_KEY: 0.0, defaults.GLOBAL_STEP_KEY: tf.Variable(0, dtype=tf.int32), defaults.VARIABLE_FN_KEY: tf.Variable } expression = general_rates.f_score(self._context, beta) # Extract the the constraints and the associated variables. constraint_list = [] variables = deferred_tensor.DeferredVariableList() for constraint in expression.extra_constraints: constraint_value = constraint.expression.constraint_expression.evaluate( structure_memoizer) constraint_list.append(constraint_value) variables += constraint_value.variables variables = variables.list self.assertEqual(1, len(constraint_list)) constraints = deferred_tensor.DeferredTensor.apply( lambda *args: tf.stack(args), *constraint_list) # We need to explicitly create all variables included in the expression # before we can try to extract the denominator_bound. for variable in variables: variable.create(structure_memoizer) # The find_zeros_of_functions() helper will perform a bisection search over # the denominator_bound, so we need to extract the Tensor containing them # from the graph. denominator_bound = None for variable in variables: tensor = variable(structure_memoizer) if tensor.name.startswith("tfco_denominator_bound"): self.assertIsNone(denominator_bound) denominator_bound = tensor self.assertIsNotNone(denominator_bound) def update_ops_fn(): update_ops = [] for variable in variables: update_ops += variable.update_ops(structure_memoizer) return update_ops with self.wrapped_session() as session: session.run_ops(update_ops_fn) def evaluate_fn(values): """Assigns the variables and evaluates the constraints.""" # We need to extract the first element from the one-element "values" # Tensor, since the denominator_bound has shape (), not (1,). session.run_ops(lambda: denominator_bound.assign(values[0])) return session.run(constraints(structure_memoizer)) # We need to extract the first element here, for the same reason as above. actual_denominator_bound = test_helpers.find_zeros_of_functions( 1, evaluate_fn, epsilon=bisection_epsilon)[0] expected_denominator = (((1.0 + beta * beta) * np.sum( (0.5 * (1.0 + np.sign(self._predictions))) * (self._labels > 0.0) * self._weights) + (beta * beta) * np.sum( (0.5 * (1.0 - np.sign(self._predictions))) * (self._labels > 0.0) * self._weights) + np.sum( (0.5 * (1.0 + np.sign(self._predictions))) * (self._labels <= 0.0) * self._weights)) / np.sum(self._weights)) self.assertAllClose(expected_denominator, actual_denominator_bound, rtol=0, atol=bisection_epsilon)
def __init__(self, objective, constraints=None, denominator_lower_bound=1e-3, variable_fn=tf.Variable, name=None): """Creates a rate constrained optimization problem. In addition to an objective function to minimize and a list of constraints to satisfy, this method also takes a "denominator_lower_bound" parameter. At a high level, a rate is "the proportion of training examples satisfying some property for which some event occurs, divided by the proportion of training examples satisfying the property", i.e. is a numerator divided by a denominator. To avoid dividing by zero (or quantities that are close to zero), the "denomintor_lower_bound" parameter is used to impose a lower bound on the denominator of a rate. However, this parameter is a last resort. If you're calculating a rate on a small subset of the data (i.e. with a property that is rately true, resulting in a small denominator), then the speed of optimization could suffer greatly: you'd almost certainly be better off splitting the subset of interest off into its own dataset, with its own placeholder tensors. Args: objective: an `Expression` to minimize. constraints: a collection of `Constraint`s to impose. denominator_lower_bound: float, the smallest permitted value of the denominator of a rate. variable_fn: optional function with the same signature as the `tf.Variable` constructor, that returns a new variable with the specified properties. name: optional string, the name of this object. Raises: ValueError: if the "penalty" portion of the objective or a constraint is non-differentiable, or if denominator_lower_bound is negative. """ super(RateMinimizationProblem, self).__init__(name=name) # We do permit denominator_lower_bound to be zero. In this case, division by # zero is possible, and it's the user's responsibility to ensure that it # doesn't happen. if denominator_lower_bound < 0.0: raise ValueError("denominator lower bound must be non-negative") # The objective needs to be differentiable. So do the penalty portions of # the constraints, but we'll check those later. if not objective.penalty_expression.is_differentiable: raise ValueError( "non-differentiable losses (e.g. the zero-one loss) " "cannot be optimized--they can only be constrained") inputs = deferred_tensor.DeferredTensorInputList() variables = deferred_tensor.DeferredVariableList() constraints = constraint.ConstraintList(constraints) # We make our own global_step, for keeping track of the denominators. We # don't take one as a parameter since we want complete ownership, to avoid # any shenanigans: it has to start at zero, and be incremented after every # minibatch. self._global_step = variable_fn( 0, trainable=False, name="tfco_global_step", dtype=tf.int64, aggregation=tf.VariableAggregation.ONLY_FIRST_REPLICA) # This structure_memoizer will remember and re-use certain intermediate # values, causing the TensorFlow graph we construct to contain fewer # redundancies than it would otherwise. Additionally, it will store any # slack variables or denominator variables that need to be created for the # optimization problem. # # Each DeferredVariable has an associated key, which maps (in the structure # memoizer) to the tf.Variable itself. However, since dicts with tuple keys # cannot be tracked by tf.Trackable (upon which tf.Module is based), # "self._structure_memoizer" is excluded from tracking via # "self._no_dependency" and "_TF_MODULE_IGNORED_PROPERTIES". We still need # the raw tf.Variables to be tracked though, so we create the # "self._raw_variables" list, at the end of this method. self._structure_memoizer = self._no_dependency({ defaults.DENOMINATOR_LOWER_BOUND_KEY: denominator_lower_bound, defaults.GLOBAL_STEP_KEY: self._global_step, defaults.VARIABLE_FN_KEY: variable_fn }) # We ignore the "constraint_expression" field here, since we're not inside a # constraint (this is the objective function). self._objective = objective.penalty_expression.evaluate( self._structure_memoizer) inputs += self._objective.inputs variables += self._objective.variables constraints += objective.extra_constraints # Evaluating expressions can result in extra constraints being introduced, # so we keep track of the number of constraints that we've already evaluated # in "checked_constraints", append new constraints to "constraints" (which # will automatically ignore attempts to add duplicates, since it's a # ConstraintList), and repeatedly check the newly-added constraints until # none are left. # # In light of the fact that constraints can depend on other constraints, we # can view the structure of constraints as a tree, in which case this code # will enumerate over the constraints in breadth-first order. self._proxy_constraints = [] self._constraints = [] checked_constraints = 0 while len(constraints) > checked_constraints: new_constraints = constraints[checked_constraints:] checked_constraints = len(constraints) for new_constraint in new_constraints: if not new_constraint.expression.penalty_expression.is_differentiable: raise ValueError( "non-differentiable losses (e.g. the zero-one loss) " "cannot be optimized--they can only be constrained") penalty_value = new_constraint.expression.penalty_expression.evaluate( self._structure_memoizer) constraint_value = ( new_constraint.expression.constraint_expression.evaluate( self._structure_memoizer)) self._proxy_constraints.append(penalty_value) self._constraints.append(constraint_value) inputs += penalty_value.inputs inputs += constraint_value.inputs variables += penalty_value.variables variables += constraint_value.variables constraints += new_constraint.expression.extra_constraints # Extract the list of all input `Tensor`-like objects (or nullary functions # returning such). self._inputs = inputs.list # Explicitly create all of the variables. This also functions as a sanity # check: before this point, no variable should have been accessed # directly, and since their storage didn't exist yet, they couldn't have # been. # # The self._variables list contains the DeferredVariables needed by this # problem, whereas self._raw_variables contains the tf.Variables created by # these DeferredVariables. The only reason that we have the latter list is # to help tf.Module checkpoint them. self._variables = variables.list with self.name_scope: self._raw_variables = [ variable.create(self._structure_memoizer) for variable in self._variables ]