def test_default_configuration(self): transformation, original = _test_preprocessing(SelectPercentileClassification) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], int(original.shape[1]/2)) self.assertFalse((transformation == 0).all()) transformation, original = _test_preprocessing(SelectPercentileClassification, make_sparse=True) self.assertTrue(scipy.sparse.issparse(transformation)) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], int(original.shape[1]/2)) # Custon preprocessing test to check if clipping to zero works X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits') original_X_train = X_train.copy() ss = sklearn.preprocessing.StandardScaler() X_train = ss.fit_transform(X_train) configuration_space = SelectPercentileClassification.get_hyperparameter_search_space() default = configuration_space.get_default_configuration() preprocessor = SelectPercentileClassification(random_state=1, **{hp_name: default[hp_name] for hp_name in default if default[hp_name] is not None}) transformer = preprocessor.fit(X_train, Y_train) transformation, original = transformer.transform(X_train), original_X_train self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], int(original.shape[1] / 2))
def test_default_configuration(self): transformation, original = _test_preprocessing(SelectRates) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 3) self.assertFalse((transformation == 0).all()) transformation, original = _test_preprocessing(SelectRates, make_sparse=True) self.assertTrue(scipy.sparse.issparse(transformation)) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], int(original.shape[1] / 2)) # Custon preprocessing test to check if clipping to zero works X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits') original_X_train = X_train.copy() ss = sklearn.preprocessing.StandardScaler() X_train = ss.fit_transform(X_train) configuration_space = SelectRates.get_hyperparameter_search_space() default = configuration_space.get_default_configuration() preprocessor = SelectRates(random_state=1, **{ hp_name: default[hp_name] for hp_name in default if default[hp_name] is not None }) transformer = preprocessor.fit(X_train, Y_train) transformation, original = transformer.transform( X_train), original_X_train self.assertEqual(transformation.shape[0], original.shape[0]) # I don't know why its 52 here and not 32 which would be half of the # number of features. Seems to be related to a runtime warning raised # by sklearn self.assertEqual(transformation.shape[1], 52)
def test_default_configuration(self): transformation, original = _test_preprocessing(SelectRates) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 3) self.assertFalse((transformation == 0).all()) transformation, original = _test_preprocessing(SelectRates, make_sparse=True) self.assertTrue(scipy.sparse.issparse(transformation)) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], int(original.shape[1] / 2)) # Custon preprocessing test to check if clipping to zero works X_train, Y_train, X_test, Y_test = get_dataset(dataset="digits") original_X_train = X_train.copy() ss = sklearn.preprocessing.StandardScaler() X_train = ss.fit_transform(X_train) configuration_space = SelectRates.get_hyperparameter_search_space() default = configuration_space.get_default_configuration() preprocessor = SelectRates( random_state=1, **{hp_name: default[hp_name] for hp_name in default if default[hp_name] is not None} ) transformer = preprocessor.fit(X_train, Y_train) transformation, original = transformer.transform(X_train), original_X_train self.assertEqual(transformation.shape[0], original.shape[0]) # I don't know why its 52 here and not 32 which would be half of the # number of features. Seems to be related to a runtime warning raised # by sklearn self.assertEqual(transformation.shape[1], 52)
def test_default_configuration(self): transformation, original = _test_preprocessing(SelectRegressionRates) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 4) self.assertFalse((transformation == 0).all()) transformation, original = _test_preprocessing(SelectRegressionRates, make_sparse=True) self.assertTrue(scipy.sparse.issparse(transformation)) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], int(original.shape[1] / 2)) # Makes sure that the features are reduced, not the number of samples X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits') original_X_train = X_train.copy() ss = sklearn.preprocessing.StandardScaler() X_train = ss.fit_transform(X_train) configuration_space = SelectRegressionRates.get_hyperparameter_search_space( ) default = configuration_space.get_default_configuration() preprocessor = SelectRegressionRates( random_state=1, **{ hp_name: default[hp_name] for hp_name in default if default[hp_name] is not None }) transformer = preprocessor.fit(X_train, Y_train) transformation, original = transformer.transform( X_train), original_X_train self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 21)
def test_default_configuration(self): transformation, original = _test_preprocessing(Nystroem) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 100) self.assertFalse((transformation == 0).all()) # Custon preprocessing test to check if clipping to zero works X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits') original_X_train = X_train.copy() ss = sklearn.preprocessing.StandardScaler() X_train = ss.fit_transform(X_train) configuration_space = Nystroem.get_hyperparameter_search_space() default = configuration_space.get_default_configuration() preprocessor = Nystroem(random_state=1, **{ hp_name: default[hp_name] for hp_name in default if default[hp_name] is not None }) transformer = preprocessor.fit(X_train, Y_train) transformation, original = transformer.transform( X_train), original_X_train self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 100)
def test_default_configuration(self): transformation, original = _test_preprocessing(RandomTreesEmbedding) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 216) self.assertIsInstance(original, np.ndarray) self.assertTrue(scipy.sparse.issparse(transformation)) self.assertTrue(all(transformation.data == 1))
def test_default_configuration(self): transformation, original = _test_preprocessing(RandomTreesEmbedding) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 213) self.assertIsInstance(original, np.ndarray) self.assertTrue(scipy.sparse.issparse(transformation)) self.assertTrue(all(transformation.data == 1))
def test_default_configuration_sparse_data(self): transformations = [] transformation, original = _test_preprocessing(VarianceThreshold, make_sparse=True) self.assertEqual(transformation.shape, (100, 3)) self.assertTrue((transformation.toarray() == original.toarray()[:, 1:]).all()) self.assertIsInstance(transformation, sparse.csr_matrix) transformations.append(transformation)
def test_default_configuration_sparse_data(self): transformations = [] transformation, original = _test_preprocessing(Imputation, make_sparse=True) self.assertEqual(transformation.shape, original.shape) self.assertTrue((transformation.data == original.data).all()) self.assertIsInstance(transformation, sparse.csc_matrix) transformations.append(transformation)
def test_default_configuration(self): transformation, original = _test_preprocessing( dataset="boston", Preprocessor=SelectPercentileRegression, ) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], int(original.shape[1] / 2)) self.assertFalse((transformation == 0).all())
def test_default_configuration_sparse_data(self): transformations = [] transformation, original = _test_preprocessing(NumericalImputation, make_sparse=True) self.assertEqual(transformation.shape, original.shape) self.assertTrue((transformation.data == original.data).all()) self.assertIsInstance(transformation, sparse.csc_matrix) transformations.append(transformation)
def test_default_configuration(self): transformations = [] for i in range(2): transformation, original = _test_preprocessing(PCA) self.assertEqual(transformation.shape, original.shape) self.assertFalse((transformation == original).all()) transformations.append(transformation) if len(transformations) > 1: self.assertTrue((transformations[-1] == transformations[-2]).all())
def test_default_configuration(self): transformations = [] for i in range(10): transformation, original = _test_preprocessing(PCA) self.assertEqual(transformation.shape, original.shape) self.assertFalse((transformation == original).all()) transformations.append(transformation) if len(transformations) > 1: self.assertTrue((transformations[-1] == transformations[-2]).all())
def test_default_configuration_regression(self): transformation, original = _test_preprocessing( SelectRegressionRates, dataset='boston', ) self.assertEqual(transformation.shape[0], original.shape[0]) # From 13 to 12 features self.assertEqual(transformation.shape[1], 12) self.assertFalse((transformation == 0).all())
def test_default_configuration(self): transformation, original = _test_preprocessing(NoPreprocessing) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], original.shape[1]) self.assertFalse((transformation == 0).all()) self.assertEqual(np.sum(original), np.sum(transformation)) self.assertEqual(np.min(original), np.min(transformation)) self.assertEqual(np.max(original), np.max(transformation)) self.assertEqual(np.std(original), np.std(transformation)) self.assertEqual(np.mean(original), np.mean(transformation))
def test_default_configuration_no_encoding(self): transformations = [] for i in range(2): transformation, original = _test_preprocessing(OneHotEncoder) self.assertEqual(transformation.shape, original.shape) self.assertTrue((transformation == original).all()) transformations.append(transformation) if len(transformations) > 1: self.assertTrue( (transformations[-1] == transformations[-2]).all())
def test_default_configuration_no_encoding(self): transformations = [] for i in range(10): transformation, original = _test_preprocessing(OneHotEncoder) self.assertEqual(transformation.shape, original.shape) self.assertTrue((transformation == original).all()) transformations.append(transformation) if len(transformations) > 1: self.assertTrue( (transformations[-1] == transformations[-2]).all())
def test_default_configuration(self): transformations = [] for i in range(2): transformation, original = _test_preprocessing(PCA) self.assertEqual(transformation.shape, original.shape) self.assertFalse((transformation == original).all()) transformations.append(transformation) if len(transformations) > 1: np.testing.assert_allclose(transformations[-1], transformations[-2], rtol=1e-4)
def test_default_configuration_sparse_no_encoding(self): transformations = [] for i in range(10): transformation, original = _test_preprocessing(OneHotEncoder, make_sparse=True) self.assertEqual(transformation.shape, original.shape) self.assertTrue((transformation.todense() == original.todense()).all()) transformations.append(transformation) if len(transformations) > 1: self.assertTrue( (transformations[-1].todense() == transformations[-2].todense()).all())
def test_default_configuration_sparse_no_encoding(self): transformations = [] for i in range(2): transformation, original = _test_preprocessing(NoEncoding, make_sparse=True) self.assertEqual(transformation.shape, original.shape) self.assertTrue( (transformation.todense() == original.todense()).all()) transformations.append(transformation) if len(transformations) > 1: self.assertEqual((transformations[-1] != transformations[-2]).count_nonzero(), 0)
def test_default_configuration(self): transformation, original = _test_preprocessing(Nystroem) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 100) self.assertFalse((transformation == 0).all()) # Custon preprocessing test to check if clipping to zero works X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits') original_X_train = X_train.copy() ss = sklearn.preprocessing.StandardScaler() X_train = ss.fit_transform(X_train) configuration_space = Nystroem.get_hyperparameter_search_space() default = configuration_space.get_default_configuration() preprocessor = Nystroem(random_state=1, **{hp_name: default[hp_name] for hp_name in default if default[hp_name] is not None}) transformer = preprocessor.fit(X_train, Y_train) transformation, original = transformer.transform( X_train), original_X_train self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 100)
def test_default_configuration_sparse(self): transformation, original = _test_preprocessing(KernelPCA, make_sparse=True, dataset='digits') self.assertEqual(transformation.shape[0], original.shape[0]) self.assertFalse((transformation == 0).all())
def test_default_configuration(self): transformation, original = _test_preprocessing(Densifier, make_sparse=True) self.assertIsInstance(transformation, np.ndarray) self.assertEqual(transformation.shape, original.shape) self.assertIsInstance(transformation, np.ndarray)
def test_default_configuration(self): transformation, original = _test_preprocessing(RandomKitchenSinks) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], 100) self.assertFalse((transformation == 0).all())
def test_default_configuration(self): transformation, original = _test_preprocessing(FeatureAgglomeration) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertFalse((transformation == 0).all())
def test_default_configuration(self): transformation, original = _test_preprocessing(LibLinear_Preprocessor) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertFalse((transformation == 0).all())
def test_default_configuration(self): transformation, original = _test_preprocessing(KernelPCA, dataset='digits', train_size_maximum=2000) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertFalse((transformation == 0).all())
def test_default_configuration(self): transformation, original = _test_preprocessing(dataset="boston", Preprocessor=SelectPercentileRegression) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertEqual(transformation.shape[1], int(original.shape[1]/2)) self.assertFalse((transformation == 0).all())
def test_default_configuration(self): transformation, original = _test_preprocessing( ExtraTreesPreprocessorClassification) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertFalse((transformation == 0).all())
def test_default_configuration(self): transformation, original = _test_preprocessing(FastICA, dataset="diabetes") self.assertEqual(transformation.shape[0], original.shape[0]) self.assertFalse((transformation == 0).all())
def test_default_configuration(self): transformation, original = _test_preprocessing(TruncatedSVD) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertFalse((transformation == 0).all())
def test_default_configuration(self): transformation, original = _test_preprocessing( ExtraTreesPreprocessorRegression) self.assertEqual(transformation.shape[0], original.shape[0]) self.assertFalse((transformation == 0).all())