示例#1
0
    def test_add_fairness_metrics_baseline_model(self):
        config = tf_estimator.RunConfig(model_dir=self.model_dir,
                                        save_checkpoints_steps=2)
        feature_columns, _, _, label_column_name = self.load_dataset.get_feature_columns(include_sensitive_columns=True)  # pylint: disable=line-too-long
        estimator = baseline_model.get_estimator(
            feature_columns=feature_columns,
            label_column_name=label_column_name,
            config=config,
            model_dir=self.model_dir,
            hidden_units=self.hidden_units,
            batch_size=self.batch_size)
        self.assertIsInstance(estimator, tf_estimator.Estimator)

        # Adds additional fairness metrics to estimator
        eval_metrics_fn = self.fairness_metrics.create_fairness_metrics_fn()
        estimator = tf_estimator.add_metrics(estimator, eval_metrics_fn)

        train_input_fn, test_input_fn = self._get_train_test_input_fn()
        estimator.train(input_fn=train_input_fn, steps=self.train_steps)
        eval_results = estimator.evaluate(input_fn=test_input_fn,
                                          steps=self.test_steps)
        self.assertNotEmpty(eval_results)
        # # Checks if auc metric is computed for all subgroups
        for subgroup in self.subgroups:
            self.assertIn('auc subgroup {}'.format(subgroup), eval_results)
示例#2
0
 def test_create_baseline_estimator_without_demographics(self):
     config = tf_estimator.RunConfig(model_dir=self.model_dir,
                                     save_checkpoints_steps=2)
     feature_columns, _, _, label_column_name = self.load_dataset.get_feature_columns(include_sensitive_columns=False)  # pylint: disable=line-too-long
     estimator = baseline_model.get_estimator(
         feature_columns=feature_columns,
         label_column_name=label_column_name,
         config=config,
         model_dir=self.model_dir,
         hidden_units=self.hidden_units,
         batch_size=self.batch_size,
         learning_rate=0.01,
         optimizer='Adagrad',
         activation=tf.nn.relu)
     self.assertIsInstance(estimator, tf_estimator.Estimator)
示例#3
0
 def test_global_steps_baseline_model(self):
     config = tf_estimator.RunConfig(model_dir=self.model_dir,
                                     save_checkpoints_steps=2)
     feature_columns, _, _, label_column_name = self.load_dataset.get_feature_columns(include_sensitive_columns=True)  # pylint: disable=line-too-long
     estimator = baseline_model.get_estimator(
         feature_columns=feature_columns,
         label_column_name=label_column_name,
         config=config,
         model_dir=self.model_dir,
         hidden_units=self.hidden_units,
         batch_size=self.batch_size)
     self.assertIsInstance(estimator, tf_estimator.Estimator)
     train_input_fn, test_input_fn = self._get_train_test_input_fn()
     estimator.train(input_fn=train_input_fn, steps=self.train_steps)
     eval_results = estimator.evaluate(input_fn=test_input_fn,
                                       steps=self.test_steps)
     # Checks if global step has reached specified number of train_steps
     self.assertIn('global_step', eval_results)
     self.assertEqual(eval_results['global_step'], self.train_steps)
示例#4
0
 def test_eval_results_baseline_model(self):
     config = tf_estimator.RunConfig(model_dir=self.model_dir,
                                     save_checkpoints_steps=2)
     feature_columns, _, _, label_column_name = self.load_dataset.get_feature_columns(include_sensitive_columns=True)  # pylint: disable=line-too-long
     estimator = baseline_model.get_estimator(
         feature_columns=feature_columns,
         label_column_name=label_column_name,
         config=config,
         model_dir=self.model_dir,
         hidden_units=self.hidden_units,
         batch_size=self.batch_size)
     self.assertIsInstance(estimator, tf_estimator.Estimator)
     train_input_fn, test_input_fn = self._get_train_test_input_fn()
     estimator.train(input_fn=train_input_fn, steps=self.train_steps)
     eval_results = estimator.evaluate(input_fn=test_input_fn,
                                       steps=self.test_steps)
     self.assertNotEmpty(eval_results)
     # # Checks if all tp,tn,fp,fn keys are present in eval_results dictionary
     self.assertIn('auc', eval_results)
     self.assertIn('fp', eval_results)
     self.assertIn('fn', eval_results)
     self.assertIn('tp', eval_results)
     self.assertIn('tn', eval_results)
示例#5
0
def get_estimator(model_dir, model_name, feature_columns, protected_groups,
                  label_column_name):
    """Instantiates and returns a model estimator.

  Args:
    model_dir: Directory to save model parameters, graph and etc. This can
        also be used to load checkpoints from the directory into an estimator
        to continue training a previously saved model.
    model_name: (string) name of the estimator to instantiate.
    feature_columns: list of feature_columns.
    protected_groups: list of protected_groups. For example, ["sex","race"].
    label_column_name: (string) name of the target variable.

  Returns:
    An instance of `tf.estimator.Estimator'.

  Raises:
    ValueError: if estimator for model_name is not implemented.
    ValueError: if activation function is not implemented.
  """
    # Defines activation function to be used for the model. Append activation
    # functions that we want to use here.
    if FLAGS.activation == "relu":
        activation_fn = tf.nn.relu
    elif FLAGS.activation == "linear":
        activation_fn = tf.nn.linear
    else:
        raise ValueError("Activation {} is not supported.".format(
            FLAGS.activation))

    kwargs = {
        "feature_columns":
        feature_columns,
        "label_column_name":
        label_column_name,
        "config":
        tf.estimator.RunConfig(
            model_dir=model_dir,
            save_checkpoints_steps=FLAGS.min_eval_frequency),
        "model_dir":
        model_dir,
        "batch_size":
        FLAGS.batch_size,
        "activation":
        activation_fn,
        "optimizer":
        FLAGS.optimizer
    }

    # Instantiates estimators to be used. Append new estimators that we want to use here.
    if model_name == "baseline":
        estimator = baseline_model.get_estimator(
            hidden_units=FLAGS.primary_hidden_units,
            learning_rate=FLAGS.primary_learning_rate,
            **kwargs)
    elif model_name == "inverse_propensity_weighting":
        estimator = ips_reweighting_model.get_estimator(
            reweighting_type=FLAGS.reweighting_type,
            hidden_units=FLAGS.primary_hidden_units,
            learning_rate=FLAGS.primary_learning_rate,
            **kwargs)
    elif model_name == "robust_learning":
        estimator = robust_learning_model.get_estimator(
            adversary_loss_type=FLAGS.adversary_loss_type,
            adversary_include_label=FLAGS.adversary_include_label,
            upweight_positive_instance_only=FLAGS.
            upweight_positive_instance_only,
            pretrain_steps=FLAGS.pretrain_steps,
            primary_hidden_units=FLAGS.primary_hidden_units,
            adversary_hidden_units=FLAGS.adversary_hidden_units,
            primary_learning_rate=FLAGS.primary_learning_rate,
            adversary_learning_rate=FLAGS.adversary_learning_rate,
            **kwargs)
    elif model_name == "adversarial_subgroup_reweighting":
        estimator = adversarial_subgroup_reweighting_model.get_estimator(
            protected_column_names=protected_groups,
            pretrain_steps=FLAGS.pretrain_steps,
            primary_hidden_units=FLAGS.primary_hidden_units,
            adversary_hidden_units=FLAGS.adversary_hidden_units,
            primary_learning_rate=FLAGS.primary_learning_rate,
            adversary_learning_rate=FLAGS.adversary_learning_rate,
            **kwargs)
    else:
        raise ValueError("Model {} is not implemented.".format(model_name))
    return estimator