def test_balancing_get_weights_treed_single_label(self): Y = np.array([0] * 80 + [1] * 20) balancing = Balancing(strategy='weighting') init_params, fit_params = balancing.get_weights( Y, 'adaboost', None, None, None) self.assertTrue(np.allclose(fit_params['classifier:sample_weight'], np.array([0.4] * 80 + [1.6] * 20)))
def test_balancing_get_weights_treed_multilabel(self): Y = np.array([[0, 0, 0]] * 100 + [[1, 0, 0]] * 100 + [[0, 1, 0]] * 100 + [[1, 1, 0]] * 100 + [[0, 0, 1]] * 100 + [[1, 0, 1]] * 10) balancing = Balancing(strategy='weighting') init_params, fit_params = balancing.get_weights( Y, 'adaboost', None, None, None) self.assertTrue(np.allclose(fit_params['classifier:sample_weight'], np.array([0.4] * 500 + [4.0] * 10)))
def test_balancing_get_weights_svm_sgd(self): Y = np.array([0] * 80 + [1] * 20) balancing = Balancing(strategy='weighting') init_params, fit_params = balancing.get_weights( Y, 'libsvm_svc', None, None, None) self.assertEqual(("classifier:class_weight", "balanced"), list(init_params.items())[0]) init_params, fit_params = balancing.get_weights( Y, None, 'liblinear_svc_preprocessor', None, None) self.assertEqual(("preprocessor:class_weight", "balanced"), list(init_params.items())[0])
def test_balancing_get_weights_treed_single_label(self): Y = np.array([0] * 80 + [1] * 20) balancing = Balancing(strategy='weighting') init_params, fit_params = balancing.get_weights( Y, 'adaboost', None, None, None) self.assertAlmostEqual( np.mean(fit_params['classifier:sample_weight']), 1, ) np.testing.assert_allclose( fit_params['classifier:sample_weight'], np.array([0.625] * 80 + [2.5] * 20), )
def test_balancing_get_weights_treed_multilabel(self): Y = np.array([[0, 0, 0]] * 100 + [[1, 0, 0]] * 100 + [[0, 1, 0]] * 100 + [[1, 1, 0]] * 100 + [[0, 0, 1]] * 100 + [[1, 0, 1]] * 10) balancing = Balancing(strategy='weighting') init_params, fit_params = balancing.get_weights( Y, 'adaboost', None, None, None) print(fit_params['classifier:sample_weight']) self.assertAlmostEqual( np.mean(fit_params['classifier:sample_weight']), 1, ) np.testing.assert_allclose( fit_params['classifier:sample_weight'], np.array([0.85] * 500 + [8.5] * 10), )
def _get_pipeline(self): steps = [] default_dataset_properties = {'target_type': 'classification'} # Add the always active preprocessing components steps.extend([["one_hot_encoding", OneHotEncoder()], ["imputation", Imputation()], [ "rescaling", rescaling_components.RescalingChoice( default_dataset_properties) ], ["balancing", Balancing()]]) # Add the preprocessing component steps.append([ 'preprocessor', feature_preprocessing_components.FeaturePreprocessorChoice( default_dataset_properties) ]) # Add the classification component steps.append([ 'classifier', classification_components.ClassifierChoice( default_dataset_properties) ]) return steps
def _get_pipeline_steps(self, dataset_properties): steps = [] default_dataset_properties = {'target_type': 'classification'} if dataset_properties is not None and isinstance( dataset_properties, dict): default_dataset_properties.update(dataset_properties) steps.extend( [[ "data_preprocessing", DataPreprocessor(dataset_properties=default_dataset_properties) ], ["balancing", Balancing()], [ "feature_preprocessor", feature_preprocessing_components.FeaturePreprocessorChoice( default_dataset_properties) ], [ 'classifier', classification_components.ClassifierChoice( default_dataset_properties) ]]) return steps
def fit_transformer(self, X, y, fit_params=None): if fit_params is None: fit_params = {} if self.configuration['balancing:strategy'] == 'weighting': balancing = Balancing(strategy='weighting') _init_params, _fit_params = balancing.get_weights( y, self.configuration['classifier:__choice__'], self.configuration['preprocessor:__choice__'], {}, {}) _init_params.update(self._init_params) self.set_hyperparameters(configuration=self.configuration, init_params=_init_params) if _fit_params is not None: fit_params.update(_fit_params) X, fit_params = super().fit_transformer(X, y, fit_params=fit_params) return X, fit_params
def pre_transform(self, X, y, fit_params=None): self.num_targets = 1 if len(y.shape) == 1 else y.shape[1] if fit_params is None: fit_params = {} if self.configuration['balancing:strategy'] == 'weighting': balancing = Balancing(strategy='weighting') _init_params, _fit_params = balancing.get_weights( y, self.configuration['classifier:__choice__'], self.configuration['preprocessor:__choice__'], {}, {}) self.set_hyperparameters(configuration=self.configuration, init_params=_init_params) if _fit_params is not None: fit_params.update(_fit_params) X, fit_params = super(SimpleClassificationPipeline, self).pre_transform(X, y, fit_params=fit_params) return X, fit_params
def _get_pipeline_steps(self): steps = [] print(" going execute pipeline autosklearn") default_dataset_properties = {'target_type': 'classification'} steps.extend([ ["feature_preprocessor", feature_preprocessing_components.FeaturePreprocessorChoice( default_dataset_properties)], ["data_preprocessing", DataPreprocessor(dataset_properties=default_dataset_properties)], ["balancing", Balancing()], ['classifier', classification_components.ClassifierChoice( default_dataset_properties)] ]) return steps