示例#1
0
  def test_evaluate(self):
    es_size_2 = ensemble_selection.EnsembleSelection(
        problem_statement=ps_pb2.ProblemStatement(tasks=[
            ps_pb2.Task(
                type=ps_pb2.Type(
                    one_dimensional_regression=ps_pb2.OneDimensionalRegression(
                        label='label')))
        ]),
        saved_model_paths=self.saved_model_paths,
        predict_fn=_test_predict_fn,
        ensemble_size=2,
        metric=tf.keras.metrics.MeanSquaredError(),
        goal='minimize')
    es_size_4 = ensemble_selection.EnsembleSelection(
        problem_statement=ps_pb2.ProblemStatement(tasks=[
            ps_pb2.Task(
                type=ps_pb2.Type(
                    one_dimensional_regression=ps_pb2.OneDimensionalRegression(
                        label='label')))
        ]),
        saved_model_paths=self.saved_model_paths,
        predict_fn=_test_predict_fn,
        ensemble_size=4,
        metric=tf.keras.metrics.MeanSquaredError(),
        goal='minimize')
    metrics = [tf.keras.metrics.MeanSquaredError()]

    es_size_2.fit(self.fit_examples, self.fit_label)
    es_size_4.fit(self.fit_examples, self.fit_label)
    es_2_mse = es_size_2.evaluate(self.fit_examples, self.fit_label, metrics)[0]
    es_4_mse = es_size_4.evaluate(self.fit_examples, self.fit_label, metrics)[0]

    self.assertLessEqual(es_4_mse, es_2_mse)
示例#2
0
    def _get_task_type(self):
        """Creates a `ps_pb2.Type` from the number of classes."""

        if self.num_classes == 0:
            return ps_pb2.Type(
                one_dimensional_regression=ps_pb2.OneDimensionalRegression(
                    label=self._label_key))
        if self.num_classes == 2:
            return ps_pb2.Type(
                binary_classification=ps_pb2.BinaryClassification(
                    label=self._label_key))
        return ps_pb2.Type(
            multi_class_classification=ps_pb2.MultiClassClassification(
                label=self._label_key))
示例#3
0
    def testDoWithMajoritVoting(self):

        exec_properties = self._exec_properties.copy()
        exec_properties['tuner_fn'] = '%s.%s' % (
            tuner_module.tuner_fn.__module__, tuner_module.tuner_fn.__name__)
        exec_properties['metalearning_algorithm'] = 'majority_voting'

        input_dict = self._input_dict.copy()

        ps_type = ps_pb2.Type(
            binary_classification=ps_pb2.BinaryClassification(label='class'))
        ps = ps_pb2.ProblemStatement(
            owner=['nitroml'],
            tasks=[ps_pb2.Task(
                name='mockdata_1',
                type=ps_type,
            )])

        exec_properties['custom_config'] = json_utils.dumps({
            'problem_statement':
            text_format.MessageToString(message=ps, as_utf8=True),
        })
        hps_artifact = artifacts.KCandidateHyperParameters()
        hps_artifact.uri = os.path.join(self._testdata_dir,
                                        'MetaLearner.majority_voting',
                                        'hparams_out')
        input_dict['warmup_hyperparameters'] = [hps_artifact]

        tuner = executor.Executor(self._context)
        tuner.Do(input_dict=input_dict,
                 output_dict=self._output_dict,
                 exec_properties=exec_properties)
        self._verify_output()
示例#4
0
    def testDoWithTunerFn(self):

        self._exec_properties['tuner_fn'] = '%s.%s' % (
            tuner_module.tuner_fn.__module__, tuner_module.tuner_fn.__name__)

        ps_type = ps_pb2.Type(
            binary_classification=ps_pb2.BinaryClassification(label='class'))
        ps = ps_pb2.ProblemStatement(
            owner=['nitroml'],
            tasks=[ps_pb2.Task(
                name='mockdata_1',
                type=ps_type,
            )])

        self._exec_properties['custom_config'] = json_utils.dumps({
            'problem_statement':
            text_format.MessageToString(message=ps, as_utf8=True),
        })

        tuner = executor.Executor(self._context)
        tuner.Do(input_dict=self._input_dict,
                 output_dict=self._output_dict,
                 exec_properties=self._exec_properties)

        self._verify_output()
示例#5
0
    def problem_statement(self) -> ps_pb2.ProblemStatement:
        """Returns the ProblemStatement associated with this BenchmarkTask."""

        return ps_pb2.ProblemStatement(
            owner=['nitroml'],
            tasks=[
                ps_pb2.Task(
                    name='Test',
                    type=ps_pb2.Type(one_dimensional_regression=ps_pb2.
                                     OneDimensionalRegression(label='test')),
                )
            ])
示例#6
0
  def test_lifecycle(self):
    es = ensemble_selection.EnsembleSelection(
        problem_statement=ps_pb2.ProblemStatement(tasks=[
            ps_pb2.Task(
                type=ps_pb2.Type(
                    one_dimensional_regression=ps_pb2.OneDimensionalRegression(
                        label='label')))
        ]),
        saved_model_paths=self.saved_model_paths,
        predict_fn=_test_predict_fn,
        ensemble_size=3,
        metric=tf.keras.metrics.MeanSquaredError(),
        goal='minimize')
    test_dir = os.path.join(self.data_path, 'test_examples.tfrecord')
    test_examples = np.asarray(
        list(tf.data.TFRecordDataset(test_dir).as_numpy_iterator()))
    test_examples_tensor = tf.convert_to_tensor(test_examples)
    model_predictions = {}
    for model_id, path in self.saved_model_paths.items():
      reloaded_model = tf.saved_model.load(path)
      model_predictions[model_id] = reloaded_model.signatures[
          'serving_default'](test_examples_tensor)['output_0'].numpy()
    want_weights = {'2': 0.3333333333333333, '4': 0.6666666666666666}
    want_prediction = want_weights['2'] * model_predictions['2'] + want_weights[
        '4'] * model_predictions['4']
    mse = tf.keras.metrics.MeanSquaredError()
    mse_scores = []
    for pred in model_predictions.values():
      mse_scores.append(mse(self.fit_label, pred))
      mse.reset_states()
    export_dir = os.path.join(
        tempfile.mkdtemp(dir=absltest.get_default_test_tmpdir()),
        'from_estimator')

    es.fit(self.fit_examples, self.fit_label)
    ensemble_predictions = es.predict(test_examples)
    ensemble_mse = es.evaluate(self.fit_examples, self.fit_label, [mse])[0]
    ensemble_path = es.save(export_dir)
    reloaded_ensemble = tf.saved_model.load(ensemble_path)
    loaded_ensemble_prediction = reloaded_ensemble.signatures[
        'serving_default'](input=test_examples_tensor)['output'].numpy()

    self.assertEqual(want_weights, es.weights)
    self.assertEqual((10, 1), ensemble_predictions.shape)
    np.testing.assert_array_almost_equal(want_prediction, ensemble_predictions,
                                         1)
    self.assertLessEqual(ensemble_mse, min(mse_scores))
    np.testing.assert_array_almost_equal(ensemble_predictions,
                                         loaded_ensemble_prediction, 1)
示例#7
0
  def problem_statement(self) -> ps_pb2.ProblemStatement:
    """Returns the ProblemStatement associated with this Task."""

    # Supervised keys is a two-tuple.
    _, target_key = self._dataset_builder.info.supervised_keys
    return ps_pb2.ProblemStatement(
        owner=['nitroml'],
        tasks=[
            ps_pb2.Task(
                name=self.name,
                type=ps_pb2.Type(
                    binary_classification=ps_pb2.BinaryClassification(
                        label=target_key)),
            )
        ])
示例#8
0
  def test_get_predictions(self):
    # TODO(liumich): improve test predictions with the following steps
    # - reduce the number of samples to 4-5
    # - manually compute MSE after each iteration for each partial ensemble
    # - also output the ground truth (labels) so that we can verify
    es = ensemble_selection.EnsembleSelection(
        problem_statement=ps_pb2.ProblemStatement(tasks=[
            ps_pb2.Task(
                type=ps_pb2.Type(
                    one_dimensional_regression=ps_pb2.OneDimensionalRegression(
                        label='label')))
        ]),
        saved_model_paths=self.saved_model_paths,
        predict_fn=_test_predict_fn,
        ensemble_size=3,
        metric=tf.keras.metrics.MeanSquaredError(),
        goal='minimize')
    want_predictions = {
        '0':
            np.array([[268520.7], [172055.8], [172840.52], [203374.36],
                      [629715.5], [160393.], [242507.27], [156286.08],
                      [262261.7], [221169.3]]),
        '1':
            np.array([[262822.53], [168104.17], [168874.69], [198855.67],
                      [617477.56], [156652.53], [237280.08], [152620.],
                      [256676.81], [216328.45]]),
        '2':
            np.array([[247936.98], [158487.19], [159214.84], [187528.2],
                      [582864.9], [147672.52], [223815.3], [143864.3],
                      [242133.11], [204029.08]]),
        '3':
            np.array([[268206.75], [171761.81], [172546.36], [203073.86],
                      [629326.7], [160101.38], [242198.66], [155995.36],
                      [261948.98], [220865.14]]),
        '4':
            np.array([[257493.5], [164639.19], [165394.55], [194785.5],
                      [605169.06], [153412.9], [232453.73], [149459.73],
                      [251468.73], [211914.42]])
    }

    predictions = es._get_predictions_dict(self.fit_examples)

    for model_id in predictions.keys():
      np.testing.assert_array_almost_equal(want_predictions[model_id],
                                           predictions[model_id], 1)
示例#9
0
  def test_predict_before_fit(self):
    es = ensemble_selection.EnsembleSelection(
        problem_statement=ps_pb2.ProblemStatement(tasks=[
            ps_pb2.Task(
                type=ps_pb2.Type(
                    one_dimensional_regression=ps_pb2.OneDimensionalRegression(
                        label='label')))
        ]),
        saved_model_paths=self.saved_model_paths,
        predict_fn=_test_predict_fn,
        ensemble_size=3,
        metric=tf.keras.metrics.MeanSquaredError(),
        goal='minimize')

    with self.assertRaisesRegex(
        ValueError,
        'Weights cannot be empty. Must call `fit` before `predict`.'):
      _ = es.predict(self.fit_examples)
示例#10
0
  def test_calculate_weights(self):
    es = ensemble_selection.EnsembleSelection(
        problem_statement=ps_pb2.ProblemStatement(tasks=[
            ps_pb2.Task(
                type=ps_pb2.Type(
                    one_dimensional_regression=ps_pb2.OneDimensionalRegression(
                        label='label')))
        ]),
        saved_model_paths=self.saved_model_paths,
        predict_fn=_test_predict_fn,
        ensemble_size=4,
        metric=tf.keras.metrics.MeanSquaredError(),
        goal='minimize')
    ensemble_count = {'model_1': 1, 'model_2': 2, 'model_3': 1}
    want_weights = {'model_1': 0.25, 'model_2': 0.5, 'model_3': 0.25}

    es._calculate_weights(ensemble_count)

    self.assertEqual(want_weights, es.weights)
示例#11
0
  def test_evaluate_metrics(self):
    es = ensemble_selection.EnsembleSelection(
        problem_statement=ps_pb2.ProblemStatement(tasks=[
            ps_pb2.Task(
                type=ps_pb2.Type(
                    one_dimensional_regression=ps_pb2.OneDimensionalRegression(
                        label='label')))
        ]),
        saved_model_paths=self.saved_model_paths,
        predict_fn=_test_predict_fn,
        ensemble_size=3,
        metric=tf.keras.metrics.MeanSquaredError(),
        goal='minimize')
    metrics = [
        tf.keras.metrics.MeanSquaredError(),
        tf.keras.metrics.MeanAbsoluteError(),
        tf.keras.metrics.RootMeanSquaredError()
    ]

    es.fit(self.fit_examples, self.fit_label)
    ensemble_metrics = es.evaluate(self.fit_examples, self.fit_label, metrics)

    self.assertLen(ensemble_metrics, len(metrics))