Exemplo n.º 1
0
    def testEntireEnsembleBuilder(self):

        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=1,  # Binary Classification
            metric=roc_auc,
            limit=-1,  # not used,
            seed=0,  # important to find the test files
            ensemble_nbest=2,
        )
        ensbuilder.SAVE2DISC = False

        ensbuilder.score_ensemble_preds()

        d2 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2_0.0.npy"
        )

        sel_keys = ensbuilder.get_n_best_preds()
        self.assertGreater(len(sel_keys), 0)

        ensemble = ensbuilder.fit_ensemble(selected_keys=sel_keys)
        print(ensemble, sel_keys)

        n_sel_valid, n_sel_test = ensbuilder.get_valid_test_preds(
            selected_keys=sel_keys)

        # both valid and test prediction files are available
        self.assertGreater(len(n_sel_valid), 0)
        self.assertEqual(n_sel_valid, n_sel_test)

        y_valid = ensbuilder.predict(
            set_="valid",
            ensemble=ensemble,
            selected_keys=n_sel_valid,
            n_preds=len(sel_keys),
            index_run=1,
        )
        y_test = ensbuilder.predict(
            set_="test",
            ensemble=ensemble,
            selected_keys=n_sel_test,
            n_preds=len(sel_keys),
            index_run=1,
        )

        # predictions for valid and test are the same
        # --> should results in the same predictions
        np.testing.assert_array_almost_equal(y_valid, y_test)

        # since d2 provides perfect predictions
        # it should get a higher weight
        # so that y_valid should be exactly y_valid_d2
        y_valid_d2 = ensbuilder.read_preds[d2][Y_VALID][:, 1]
        np.testing.assert_array_almost_equal(y_valid, y_valid_d2)
Exemplo n.º 2
0
    def testEntireEnsembleBuilder(self):
        
        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=1,  #Binary Classification
            metric=roc_auc,
            limit=-1, # not used,
            seed=0, # important to find the test files
            ensemble_nbest=2,
        )
        ensbuilder.SAVE2DISC = False
        
        ensbuilder.read_ensemble_preds()

        d2 = os.path.join(
            self.backend.temporary_directory,
            ".auto-sklearn/predictions_ensemble/predictions_ensemble_0_2.npy"
        )

        sel_keys = ensbuilder.get_n_best_preds()
        self.assertGreater(len(sel_keys), 0)
        
        ensemble = ensbuilder.fit_ensemble(selected_keys=sel_keys)
        print(ensemble, sel_keys)
        
        n_sel_valid, n_sel_test = ensbuilder.get_valid_test_preds(selected_keys=sel_keys)
        
        # both valid and test prediction files are available
        self.assertGreater(len(n_sel_valid), 0)
        self.assertEqual(n_sel_valid, n_sel_test)

        y_valid = ensbuilder.predict(
            set_="valid",
            ensemble=ensemble,
            selected_keys=n_sel_valid,
            n_preds=len(sel_keys),
            index_run=1,
        )
        y_test = ensbuilder.predict(
            set_="test",
            ensemble=ensemble,
            selected_keys=n_sel_test,
            n_preds=len(sel_keys),
            index_run=1,
        )

        # predictions for valid and test are the same
        # --> should results in the same predictions
        np.testing.assert_array_almost_equal(y_valid, y_test)

        # since d2 provides perfect predictions
        # it should get a higher weight
        # so that y_valid should be exactly y_valid_d2
        y_valid_d2 = ensbuilder.read_preds[d2][Y_VALID][:, 1]
        np.testing.assert_array_almost_equal(y_valid, y_valid_d2)
Exemplo n.º 3
0
def testEntireEnsembleBuilder(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=BINARY_CLASSIFICATION,
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=2,
    )
    ensbuilder.SAVE2DISC = False

    ensbuilder.compute_loss_per_model()

    d2 = os.path.join(
        ensemble_backend.temporary_directory,
        ".auto-sklearn/runs/0_2_0.0/predictions_ensemble_0_2_0.0.npy")

    sel_keys = ensbuilder.get_n_best_preds()
    assert len(sel_keys) > 0

    ensemble = ensbuilder.fit_ensemble(selected_keys=sel_keys)
    print(ensemble, sel_keys)

    n_sel_valid, n_sel_test = ensbuilder.get_valid_test_preds(
        selected_keys=sel_keys)

    # both valid and test prediction files are available
    assert len(n_sel_valid) > 0
    assert n_sel_valid == n_sel_test

    y_valid = ensbuilder.predict(
        set_="valid",
        ensemble=ensemble,
        selected_keys=n_sel_valid,
        n_preds=len(sel_keys),
        index_run=1,
    )
    y_test = ensbuilder.predict(
        set_="test",
        ensemble=ensemble,
        selected_keys=n_sel_test,
        n_preds=len(sel_keys),
        index_run=1,
    )

    # predictions for valid and test are the same
    # --> should results in the same predictions
    np.testing.assert_array_almost_equal(y_valid, y_test)

    # since d2 provides perfect predictions
    # it should get a higher weight
    # so that y_valid should be exactly y_valid_d2
    y_valid_d2 = ensbuilder.read_preds[d2][Y_VALID][:, 1]
    np.testing.assert_array_almost_equal(y_valid, y_valid_d2)
Exemplo n.º 4
0
def test_read_pickle_read_preds(ensemble_backend):
    """
    This procedure test that we save the read predictions before
    destroying the ensemble builder and that we are able to read
    them safely after
    """
    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=MULTILABEL_CLASSIFICATION,  # Multilabel Classification
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=2,
        max_models_on_disc=None,
    )
    ensbuilder.SAVE2DISC = False

    ensbuilder.main(time_left=np.inf, iteration=1, return_predictions=False)

    # Check that the memory was created
    ensemble_memory_file = os.path.join(ensemble_backend.internals_directory,
                                        'ensemble_read_preds.pkl')
    assert os.path.exists(ensemble_memory_file)

    # Make sure we pickle the correct read preads and hash
    with (open(ensemble_memory_file, "rb")) as memory:
        read_preds, last_hash = pickle.load(memory)

    compare_read_preds(read_preds, ensbuilder.read_preds)
    assert last_hash == ensbuilder.last_hash

    ensemble_memory_file = os.path.join(ensemble_backend.internals_directory,
                                        'ensemble_read_scores.pkl')
    assert os.path.exists(ensemble_memory_file)

    # Make sure we pickle the correct read scores
    with (open(ensemble_memory_file, "rb")) as memory:
        read_scores = pickle.load(memory)

    compare_read_preds(read_scores, ensbuilder.read_scores)

    # Then create a new instance, which should automatically read this file
    ensbuilder2 = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=MULTILABEL_CLASSIFICATION,  # Multilabel Classification
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=2,
        max_models_on_disc=None,
    )
    compare_read_preds(ensbuilder2.read_preds, ensbuilder.read_preds)
    compare_read_preds(ensbuilder2.read_scores, ensbuilder.read_scores)
    assert ensbuilder2.last_hash == ensbuilder.last_hash
Exemplo n.º 5
0
def test_main(ensemble_backend):

    ensbuilder = EnsembleBuilder(
        backend=ensemble_backend,
        dataset_name="TEST",
        task_type=MULTILABEL_CLASSIFICATION,  # Multilabel Classification
        metric=roc_auc,
        seed=0,  # important to find the test files
        ensemble_nbest=2,
        max_models_on_disc=None,
    )
    ensbuilder.SAVE2DISC = False

    run_history, ensemble_nbest, _, _, _ = ensbuilder.main(
        time_left=np.inf,
        iteration=1,
        return_predictions=False,
    )

    assert len(ensbuilder.read_preds) == 3
    assert ensbuilder.last_hash is not None
    assert ensbuilder.y_true_ensemble is not None

    # Make sure the run history is ok

    # We expect at least 1 element to be in the ensemble
    assert len(run_history) > 0

    # As the data loader loads the same val/train/test
    # we expect 1.0 as score and all keys available
    expected_performance = {
        'ensemble_val_score': 1.0,
        'ensemble_test_score': 1.0,
        'ensemble_optimization_score': 1.0,
    }

    # Make sure that expected performance is a subset of the run history
    assert all(item in run_history[0].items()
               for item in expected_performance.items())
    assert 'Timestamp' in run_history[0]
    assert isinstance(run_history[0]['Timestamp'], pd.Timestamp)

    assert os.path.exists(
        os.path.join(ensemble_backend.internals_directory,
                     'ensemble_read_preds.pkl')), os.listdir(
                         ensemble_backend.internals_directory)
    assert os.path.exists(
        os.path.join(ensemble_backend.internals_directory,
                     'ensemble_read_scores.pkl')), os.listdir(
                         ensemble_backend.internals_directory)
    def testMain(self):

        ensbuilder = EnsembleBuilder(backend=self.backend,
                                    dataset_name="TEST",
                                    task_type=1,  #Binary Classification
                                    metric=roc_auc,
                                    limit=-1, # not used,
                                    seed=0, # important to find the test files
                                    ensemble_nbest=2,
                                    max_iterations=1 # prevents infinite loop
                                    )
        ensbuilder.SAVE2DISC = False

        ensbuilder.main()

        self.assertEqual(len(ensbuilder.read_preds), 2)
        self.assertIsNotNone(ensbuilder.last_hash)
        self.assertIsNotNone(ensbuilder.y_true_ensemble)
Exemplo n.º 7
0
 def testMain(self):
     
     ensbuilder = EnsembleBuilder(backend=self.backend, 
                                 dataset_name="TEST",
                                 task_type=1,  #Binary Classification
                                 metric=roc_auc,
                                 limit=-1, # not used,
                                 seed=0, # important to find the test files
                                 ensemble_nbest=2,
                                 max_iterations=1 # prevents infinite loop
                                 )
     ensbuilder.SAVE2DISC = False
     
     ensbuilder.main()
     
     self.assertEqual(len(ensbuilder.read_preds), 2)
     self.assertIsNotNone(ensbuilder.last_hash)
     self.assertIsNotNone(ensbuilder.y_true_ensemble)
Exemplo n.º 8
0
def test_run_end_at(ensemble_backend):
    with unittest.mock.patch('pynisher.enforce_limits') as pynisher_mock:
        ensbuilder = EnsembleBuilder(
            backend=ensemble_backend,
            dataset_name="TEST",
            task_type=MULTILABEL_CLASSIFICATION,  # Multilabel Classification
            metric=roc_auc,
            seed=0,  # important to find the test files
            ensemble_nbest=2,
            max_models_on_disc=None,
        )
        ensbuilder.SAVE2DISC = False

        current_time = time.time()

        ensbuilder.run(end_at=current_time + 10, iteration=1)
        # 4 seconds left because: 10 seconds - 5 seconds overhead - very little overhead,
        # but then rounded to an integer
        assert pynisher_mock.call_args_list[0][1]["wall_time_in_s"], 4
Exemplo n.º 9
0
    def testMain(self):

        ensbuilder = EnsembleBuilder(
            backend=self.backend,
            dataset_name="TEST",
            task_type=3,  # Multilabel Classification
            metric=roc_auc,
            limit=-1,  # not used,
            seed=0,  # important to find the test files
            ensemble_nbest=2,
            max_iterations=1,  # prevents infinite loop
            max_models_on_disc=None,
        )
        ensbuilder.SAVE2DISC = False

        ensbuilder.main()

        self.assertEqual(len(ensbuilder.read_preds), 3)
        self.assertIsNotNone(ensbuilder.last_hash)
        self.assertIsNotNone(ensbuilder.y_true_ensemble)

        # Make sure the run history is ok
        run_history = ensbuilder.get_ensemble_history()

        # We expect 1 element to be the ensemble
        self.assertEqual(len(run_history), 1)

        # As the data loader loads the same val/train/test
        # we expect 1.0 as score and all keys available
        expected_performance = {
            'ensemble_val_score': 1.0,
            'ensemble_test_score': 1.0,
            'ensemble_optimization_score': 1.0,
        }
        self.assertDictContainsSubset(expected_performance, run_history[0])
        self.assertIn('Timestamp', run_history[0])
        self.assertIsInstance(run_history[0]['Timestamp'], pd.Timestamp)