def test_multilabel(tmp_dir, output_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_multilabel=True) automl = AutoSklearnClassifier(time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, dask_client=dask_client, output_folder=output_dir) automl.fit(X_train, Y_train) predictions = automl.predict(X_test) assert predictions.shape == (50, 3), print_debug_information(automl) assert count_succeses( automl.cv_results_) > 0, print_debug_information(automl) assert includes_train_scores(automl.performance_over_time_.columns) is True assert performance_over_time_is_plausible( automl.performance_over_time_) is True score = f1_macro(Y_test, predictions) assert score >= 0.9, print_debug_information(automl) probs = automl.predict_proba(X_train) assert np.mean(probs) == pytest.approx(0.33, rel=1e-1)
def test_multilabel(self): output = os.path.join(self.test_dir, '..', '.tmp_multilabel_fit') self._setUp(output) X_train, Y_train, X_test, Y_test = putil.get_dataset( 'iris', make_multilabel=True) automl = AutoSklearnClassifier(time_left_for_this_task=20, per_run_time_limit=5, tmp_folder=output, output_folder=output) automl.fit(X_train, Y_train) predictions = automl.predict(X_test) self.assertEqual(predictions.shape, (50, 3)) score = f1_macro(Y_test, predictions) self.assertGreaterEqual(score, 0.9)
def test_multilabel(self): tmp = os.path.join(self.test_dir, '..', '.tmp_multilabel_fit') output = os.path.join(self.test_dir, '..', '.out_multilabel_fit') self._setUp(tmp) self._setUp(output) X_train, Y_train, X_test, Y_test = putil.get_dataset( 'iris', make_multilabel=True) automl = AutoSklearnClassifier(time_left_for_this_task=20, per_run_time_limit=5, tmp_folder=tmp, output_folder=output) automl.fit(X_train, Y_train) predictions = automl.predict(X_test) self.assertEqual(predictions.shape, (50, 3)) score = f1_macro(Y_test, predictions) self.assertGreaterEqual(score, 0.9) probs = automl.predict_proba(X_train) self.assertAlmostEqual(np.mean(probs), 0.33, places=1)
def test_multilabel(self): tmp = os.path.join(self.test_dir, '..', '.tmp_multilabel_fit') output = os.path.join(self.test_dir, '..', '.out_multilabel_fit') self._setUp(tmp) self._setUp(output) X_train, Y_train, X_test, Y_test = putil.get_dataset( 'iris', make_multilabel=True) automl = AutoSklearnClassifier(time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp, output_folder=output) automl.fit(X_train, Y_train) predictions = automl.predict(X_test) self.assertEqual(predictions.shape, (50, 3)) self.assertGreater(self._count_succeses(automl.cv_results_), 0) score = f1_macro(Y_test, predictions) self.assertGreaterEqual(score, 0.9) probs = automl.predict_proba(X_train) self.assertAlmostEqual(np.mean(probs), 0.33, places=1)
def test_multilabel(tmp_dir, output_dir, dask_client): X_train, Y_train, X_test, Y_test = putil.get_dataset('iris', make_multilabel=True) automl = AutoSklearnClassifier(time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, dask_client=dask_client, output_folder=output_dir) automl.fit(X_train, Y_train) # Log file path log_file_path = glob.glob(os.path.join(tmp_dir, 'AutoML*.log'))[0] predictions = automl.predict(X_test) assert predictions.shape == (50, 3), extract_msg_from_log(log_file_path) assert count_succeses( automl.cv_results_) > 0, extract_msg_from_log(log_file_path) score = f1_macro(Y_test, predictions) assert score >= 0.9, extract_msg_from_log(log_file_path) probs = automl.predict_proba(X_train) assert np.mean(probs) == pytest.approx(0.33, rel=1e-1)