def _initial_state(self, num_constraints):
     # For an AdditiveExternalRegretOptimizer, the internal state is simply a
     # tensor of Lagrange multipliers with shape (m,), where m is the number of
     # constraints.
     #
     # FUTURE WORK: make the dtype a parameter.
     return standard_ops.zeros((num_constraints, ), dtype=dtypes.float32)
 def _initial_state(self, num_constraints):
   # For an AdditiveExternalRegretOptimizer, the internal state is simply a
   # tensor of Lagrange multipliers with shape (m,), where m is the number of
   # constraints.
   #
   # FUTURE WORK: make the dtype a parameter.
   return standard_ops.zeros((num_constraints,), dtype=dtypes.float32)
 def _initial_state(self, num_constraints):
   # For an AdditiveSwapRegretOptimizer, the internal state is a tensor of
   # shape (m+1,m+1), where m is the number of constraints, representing a
   # left-stochastic matrix.
   dimension = num_constraints + 1
   # Initialize by putting all weight on the objective, and none on the
   # constraints.
   return standard_ops.concat((standard_ops.ones(
       (1, dimension)), standard_ops.zeros((dimension - 1, dimension))),
                              axis=0)
 def _initial_state(self, num_constraints):
   # For an AdditiveSwapRegretOptimizer, the internal state is a tensor of
   # shape (m+1,m+1), where m is the number of constraints, representing a
   # left-stochastic matrix.
   dimension = num_constraints + 1
   # Initialize by putting all weight on the objective, and none on the
   # constraints.
   return standard_ops.concat((standard_ops.ones(
       (1, dimension)), standard_ops.zeros((dimension - 1, dimension))),
                              axis=0)
  def _minimize_constrained(self,
                            minimization_problem,
                            global_step=None,
                            var_list=None,
                            gate_gradients=train_optimizer.Optimizer.GATE_OP,
                            aggregation_method=None,
                            colocate_gradients_with_ops=False,
                            name=None,
                            grad_loss=None):
    """Returns an `Operation` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the constraint/objective weight matrix (the analogue of
    Lagrange multipliers) will be updated using `constrained_optimizer` (if
    provided) or `optimizer` (if not). Whether the matrix updates are additive
    or multiplicative depends on the derived class.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      global_step: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.compat.v1.train.Optimizer`'s `minimize`
        method.
      colocate_gradients_with_ops: as in `tf.compat.v1.train.Optimizer`'s
        `minimize` method.
      name: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.compat.v1.train.Optimizer`'s `minimize` method.

    Raises:
      ValueError: If the minimization_problem tensors have different dtypes.

    Returns:
      `Operation`, the train_op.
    """
    objective = minimization_problem.objective

    constraints = minimization_problem.constraints
    proxy_constraints = minimization_problem.proxy_constraints
    if proxy_constraints is None:
      proxy_constraints = constraints

    # Make sure that the objective, constraints and proxy constraints all have
    # the same dtype.
    if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
        objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
      raise ValueError("objective, constraints and proxy_constraints must "
                       "have the same dtype")

    # Flatten both constraints tensors to 1d.
    num_constraints = minimization_problem.num_constraints
    constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
    proxy_constraints = standard_ops.reshape(
        proxy_constraints, shape=(num_constraints,))

    # We use a lambda to initialize the state so that, if this function call is
    # inside the scope of a tf.control_dependencies() block, the dependencies
    # will not be applied to the initializer.
    state = standard_ops.Variable(
        lambda: self._initial_state(num_constraints),
        trainable=False,
        name="swap_regret_optimizer_state")

    zero_and_constraints = standard_ops.concat((standard_ops.zeros(
        (1,), dtype=constraints.dtype), constraints),
                                               axis=0)
    objective_and_proxy_constraints = standard_ops.concat(
        (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0)

    distribution = self._distribution(state)
    loss = standard_ops.tensordot(
        standard_ops.cast(distribution, objective_and_proxy_constraints.dtype),
        objective_and_proxy_constraints, 1)
    matrix_gradient = standard_ops.matmul(
        standard_ops.expand_dims(
            standard_ops.cast(zero_and_constraints, distribution.dtype), 1),
        standard_ops.expand_dims(distribution, 0))

    update_ops = []
    if self.constraint_optimizer is None:
      # If we don't have a separate constraint_optimizer, then we use
      # self._optimizer for both the update of the model parameters, and that of
      # the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      grads_and_vars.append(
          self._constraint_grad_and_var(state, matrix_gradient))
      update_ops.append(
          self.optimizer.apply_gradients(grads_and_vars, name="update"))
    else:
      # If we have a separate constraint_optimizer, then we use self._optimizer
      # for the update of the model parameters, and self._constraint_optimizer
      # for that of the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      matrix_grads_and_vars = [
          self._constraint_grad_and_var(state, matrix_gradient)
      ]

      gradients = [
          gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars
          if gradient is not None
      ]
      with ops.control_dependencies(gradients):
        update_ops.append(
            self.optimizer.apply_gradients(grads_and_vars, name="update"))
        update_ops.append(
            self.constraint_optimizer.apply_gradients(
                matrix_grads_and_vars, name="optimizer_state_update"))

    with ops.control_dependencies(update_ops):
      if global_step is None:
        # If we don't have a global step, just project, and we're done.
        return self._projection_op(state, name=name)
      else:
        # If we have a global step, then we need to increment it in addition to
        # projecting.
        projection_op = self._projection_op(state, name="project")
        with ops.colocate_with(global_step):
          global_step_op = state_ops.assign_add(
              global_step, 1, name="global_step_increment")
        return control_flow_ops.group(projection_op, global_step_op, name=name)
Пример #6
0
  def _minimize_constrained(self,
                            minimization_problem,
                            global_step=None,
                            var_list=None,
                            gate_gradients=train_optimizer.Optimizer.GATE_OP,
                            aggregation_method=None,
                            colocate_gradients_with_ops=False,
                            name=None,
                            grad_loss=None):
    """Returns an `Operation` for minimizing the constrained problem.

    The `optimizer` constructor parameter will be used to update the model
    parameters, while the constraint/objective weight matrix (the analogue of
    Lagrange multipliers) will be updated using `constrained_optimizer` (if
    provided) or `optimizer` (if not). Whether the matrix updates are additive
    or multiplicative depends on the derived class.

    Args:
      minimization_problem: ConstrainedMinimizationProblem, the problem to
        optimize.
      global_step: as in `tf.train.Optimizer`'s `minimize` method.
      var_list: as in `tf.train.Optimizer`'s `minimize` method.
      gate_gradients: as in `tf.train.Optimizer`'s `minimize` method.
      aggregation_method: as in `tf.train.Optimizer`'s `minimize` method.
      colocate_gradients_with_ops: as in `tf.train.Optimizer`'s `minimize`
        method.
      name: as in `tf.train.Optimizer`'s `minimize` method.
      grad_loss: as in `tf.train.Optimizer`'s `minimize` method.

    Raises:
      ValueError: If the minimization_problem tensors have different dtypes.

    Returns:
      `Operation`, the train_op.
    """
    objective = minimization_problem.objective

    constraints = minimization_problem.constraints
    proxy_constraints = minimization_problem.proxy_constraints
    if proxy_constraints is None:
      proxy_constraints = constraints

    # Make sure that the objective, constraints and proxy constraints all have
    # the same dtype.
    if (objective.dtype.base_dtype != constraints.dtype.base_dtype or
        objective.dtype.base_dtype != proxy_constraints.dtype.base_dtype):
      raise ValueError("objective, constraints and proxy_constraints must "
                       "have the same dtype")

    # Flatten both constraints tensors to 1d.
    num_constraints = minimization_problem.num_constraints
    constraints = standard_ops.reshape(constraints, shape=(num_constraints,))
    proxy_constraints = standard_ops.reshape(
        proxy_constraints, shape=(num_constraints,))

    # We use a lambda to initialize the state so that, if this function call is
    # inside the scope of a tf.control_dependencies() block, the dependencies
    # will not be applied to the initializer.
    state = standard_ops.Variable(
        lambda: self._initial_state(num_constraints),
        trainable=False,
        name="swap_regret_optimizer_state")

    zero_and_constraints = standard_ops.concat(
        (standard_ops.zeros((1,), dtype=constraints.dtype), constraints),
        axis=0)
    objective_and_proxy_constraints = standard_ops.concat(
        (standard_ops.expand_dims(objective, 0), proxy_constraints), axis=0)

    distribution = self._distribution(state)
    loss = standard_ops.tensordot(
        standard_ops.cast(distribution, objective_and_proxy_constraints.dtype),
        objective_and_proxy_constraints, 1)
    matrix_gradient = standard_ops.matmul(
        standard_ops.expand_dims(
            standard_ops.cast(zero_and_constraints, distribution.dtype), 1),
        standard_ops.expand_dims(distribution, 0))

    update_ops = []
    if self.constraint_optimizer is None:
      # If we don't have a separate constraint_optimizer, then we use
      # self._optimizer for both the update of the model parameters, and that of
      # the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      grads_and_vars.append(
          self._constraint_grad_and_var(state, matrix_gradient))
      update_ops.append(
          self.optimizer.apply_gradients(grads_and_vars, name="update"))
    else:
      # If we have a separate constraint_optimizer, then we use self._optimizer
      # for the update of the model parameters, and self._constraint_optimizer
      # for that of the internal state.
      grads_and_vars = self.optimizer.compute_gradients(
          loss,
          var_list=var_list,
          gate_gradients=gate_gradients,
          aggregation_method=aggregation_method,
          colocate_gradients_with_ops=colocate_gradients_with_ops,
          grad_loss=grad_loss)
      matrix_grads_and_vars = [
          self._constraint_grad_and_var(state, matrix_gradient)
      ]

      gradients = [
          gradient for gradient, _ in grads_and_vars + matrix_grads_and_vars
          if gradient is not None
      ]
      with ops.control_dependencies(gradients):
        update_ops.append(
            self.optimizer.apply_gradients(grads_and_vars, name="update"))
        update_ops.append(
            self.constraint_optimizer.apply_gradients(
                matrix_grads_and_vars, name="optimizer_state_update"))

    with ops.control_dependencies(update_ops):
      if global_step is None:
        # If we don't have a global step, just project, and we're done.
        return self._projection_op(state, name=name)
      else:
        # If we have a global step, then we need to increment it in addition to
        # projecting.
        projection_op = self._projection_op(state, name="project")
        with ops.colocate_with(global_step):
          global_step_op = state_ops.assign_add(
              global_step, 1, name="global_step_increment")
        return control_flow_ops.group(projection_op, global_step_op, name=name)