Exemplo n.º 1
0
    def test_file_output(self):
        self.output_dir = os.path.join(os.getcwd(), '.test_file_output')

        D = get_regression_datamanager()
        D.name = 'test'

        configuration_space = get_configuration_space(D.info)

        configuration = configuration_space.sample_configuration()
        backend_api = backend.create(self.output_dir, self.output_dir)
        evaluator = HoldoutEvaluator(D,
                                     backend_api,
                                     configuration,
                                     with_predictions=True,
                                     all_scoring_functions=True,
                                     output_y_test=True)

        loss, Y_optimization_pred, Y_valid_pred, Y_test_pred = \
            evaluator.fit_predict_and_loss()
        evaluator.file_output(loss, Y_optimization_pred, Y_valid_pred,
                              Y_test_pred)

        self.assertTrue(
            os.path.exists(
                os.path.join(self.output_dir, '.auto-sklearn',
                             'true_targets_ensemble.npy')))
Exemplo n.º 2
0
    def test_subsample_indices_regression(self, mock, backend_mock):
        D = get_regression_datamanager()

        configuration = unittest.mock.Mock(spec=Configuration)
        queue_ = multiprocessing.Queue()
        kfold = ShuffleSplit(random_state=1, n_splits=1)
        evaluator = TrainEvaluator(D,
                                   backend_mock,
                                   queue_,
                                   configuration=configuration,
                                   cv=kfold,
                                   subsample=30,
                                   metric=accuracy)
        train_indices = np.arange(69, dtype=int)
        train_indices3 = evaluator.subsample_indices(train_indices)
        evaluator.subsample = 67
        train_indices4 = evaluator.subsample_indices(train_indices)
        # Common cases
        for ti in train_indices3:
            self.assertIn(ti, train_indices4)

        # Corner cases
        evaluator.subsample = 0
        train_indices5 = evaluator.subsample_indices(train_indices)
        np.testing.assert_allclose(train_indices5, np.array([]))
        # With equal or greater it should return a non-shuffled array of indices
        evaluator.subsample = 69
        train_indices6 = evaluator.subsample_indices(train_indices)
        np.testing.assert_allclose(train_indices6, train_indices)
Exemplo n.º 3
0
    def test_file_output(self, makedirs_mock, backend_mock):

        D = get_regression_datamanager()
        D.name = 'test'
        configuration = unittest.mock.Mock(spec=Configuration)
        queue_ = multiprocessing.Queue()

        kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
        evaluator = TrainEvaluator(D,
                                   backend_mock,
                                   queue=queue_,
                                   configuration=configuration,
                                   cv=kfold,
                                   all_scoring_functions=True,
                                   output_y_hat_optimization=True,
                                   metric=accuracy)

        backend_mock.get_model_dir.return_value = True
        evaluator.model = 'model'
        evaluator.Y_optimization = D.data['Y_train']
        rval = evaluator.file_output(D.data['Y_train'], D.data['Y_valid'],
                                     D.data['Y_test'])

        self.assertEqual(rval, (None, None))
        self.assertEqual(backend_mock.save_targets_ensemble.call_count, 1)
        self.assertEqual(backend_mock.save_predictions_as_npy.call_count, 3)
        self.assertEqual(makedirs_mock.call_count, 1)
        self.assertEqual(backend_mock.save_model.call_count, 1)

        # Check for not containing NaNs - that the models don't predict nonsense
        # for unseen data
        D.data['Y_valid'][0] = np.NaN
        rval = evaluator.file_output(D.data['Y_train'], D.data['Y_valid'],
                                     D.data['Y_test'])
        self.assertEqual(
            rval,
            (1.0, {
                'error': 'Model predictions for validation '
                'set contains NaNs.'
            }))
        D.data['Y_train'][0] = np.NaN
        rval = evaluator.file_output(D.data['Y_train'], D.data['Y_valid'],
                                     D.data['Y_test'])
        self.assertEqual(rval, (1.0, {
            'error':
            'Model predictions for '
            'optimization set contains '
            'NaNs.'
        }))
    def test_file_output(self):
        self.output_dir = os.path.join(os.getcwd(), '.test')

        D = get_regression_datamanager()
        D.name = 'test'

        configuration_space = get_configuration_space(D.info)

        configuration = configuration_space.sample_configuration()
        evaluator = HoldoutEvaluator(D, self.output_dir, configuration,
                                     with_predictions=True,
                                     all_scoring_functions=True,
                                     output_y_test=True)

        loss, Y_optimization_pred, Y_valid_pred, Y_test_pred = \
            evaluator.fit_predict_and_loss()
        evaluator.file_output(loss, Y_optimization_pred, Y_valid_pred,
                              Y_test_pred)

        self.assertTrue(os.path.exists(os.path.join(
            self.output_dir, '.auto-sklearn', 'true_targets_ensemble.npy')))
Exemplo n.º 5
0
    def test_file_output(self, loss_mock):

        D = get_regression_datamanager()
        D.name = 'test'
        self.backend_mock.load_datamanager.return_value = D
        configuration = unittest.mock.Mock(spec=Configuration)
        queue_ = multiprocessing.Queue()
        loss_mock.return_value = None

        evaluator = TrainEvaluator(self.backend_mock,
                                   queue_,
                                   configuration=configuration,
                                   metric=accuracy,
                                   budget=0)

        self.backend_mock.get_model_dir.return_value = True
        evaluator.pipeline = 'model'
        evaluator.Y_optimization = D.train_tensors[1]
        rval = evaluator.file_output(
            D.train_tensors[1],
            None,
            D.test_tensors[1],
        )

        self.assertEqual(rval, (None, {}))
        self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, 1)
        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 1)
        self.assertEqual(
            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(),
            {
                'seed', 'idx', 'budget', 'model', 'cv_model',
                'ensemble_predictions', 'valid_predictions', 'test_predictions'
            })
        self.assertIsNotNone(
            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
            ['model'])
        self.assertIsNone(
            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
            ['cv_model'])

        evaluator.pipelines = ['model2', 'model2']
        rval = evaluator.file_output(
            D.train_tensors[1],
            None,
            D.test_tensors[1],
        )
        self.assertEqual(rval, (None, {}))
        self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, 2)
        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 2)
        self.assertEqual(
            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(),
            {
                'seed', 'idx', 'budget', 'model', 'cv_model',
                'ensemble_predictions', 'valid_predictions', 'test_predictions'
            })
        self.assertIsNotNone(
            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
            ['model'])
        self.assertIsNotNone(
            self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]
            ['cv_model'])

        # Check for not containing NaNs - that the models don't predict nonsense
        # for unseen data
        D.train_tensors[1][0] = np.NaN
        rval = evaluator.file_output(
            D.train_tensors[1],
            None,
            D.test_tensors[1],
        )
        self.assertEqual(rval, (
            1.0,
            {
                'error':
                'Model predictions for optimization set contains NaNs.'
            },
        ))