示例#1
0
 def test_variance_drop_all(self):
     data, label = get_data_label(load_boston())
     method = SelectionMethod.Variance(threshold=100000)
     selector = Selective(method)
     try:
         selector.fit(data)
         selector.transform(data)
     except ValueError:
         pass
示例#2
0
    def test_correlation(self):
        data, label = get_data_label(load_boston())

        method = SelectionMethod.Correlation(0.60)
        selector = Selective(method)
        selector.fit(data)
        subset = selector.transform(data)

        self.assertListEqual(list(subset.columns), ['CRIM', 'ZN', 'INDUS', 'CHAS', 'RM', 'PTRATIO', 'B'])
示例#3
0
    def test_correlation_small(self):
        data, label = get_data_label(load_boston())
        data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"])

        method = SelectionMethod.Correlation(0.60)
        selector = Selective(method)
        selector.fit(data)
        subset = selector.transform(data)
        self.assertListEqual(list(subset.columns), ['CRIM', 'ZN', 'AGE', 'B'])
示例#4
0
    def test_tree_invalid_num_features(self):
        data, label = get_data_label(load_boston())
        data = data.drop(columns=[
            "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"
        ])

        method = SelectionMethod.TreeBased(num_features=100)
        selector = Selective(method)
        with self.assertRaises(ValueError):
            selector.fit(data, label)
示例#5
0
    def test_variance_zero_threshold(self):
        data, label = get_data_label(load_boston())

        method = SelectionMethod.Variance(threshold=0)
        selector = Selective(method)
        selector.fit(data)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(data.shape[1], 13)
        self.assertEqual(subset.shape[1], 13)
示例#6
0
    def test_anova_classif_top_percentile(self):
        data, label = get_data_label(load_iris())
        
        method = SelectionMethod.Statistical(num_features=0.5)
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(subset.shape[1], 2)
        self.assertListEqual(list(subset.columns), ['petal length (cm)', 'petal width (cm)'])
示例#7
0
    def test_chi_regress_top_percentile_invalid(self):
        data, label = get_data_label(load_boston())
        data = data.drop(columns=[
            "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"
        ])

        method = SelectionMethod.Statistical(num_features=0.6,
                                             method="chi_square")
        selector = Selective(method)
        with self.assertRaises(TypeError):
            selector.fit(data, label)
示例#8
0
    def test_tree_estimator_lightgbm_classif_top_k(self):
        data, label = get_data_label(load_iris())

        method = SelectionMethod.TreeBased(
            num_features=2,
            estimator=XGBClassifier(random_state=Constants.default_seed))
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(subset.shape[1], 2)
示例#9
0
    def test_variance_drop_target(self):
        data, label = get_data_label(load_boston())

        method = SelectionMethod.Variance(threshold=85)
        selector = Selective(method)
        selector.fit(data)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(data.shape[1], 13)
        self.assertEqual(subset.shape[1], 4)
        self.assertListEqual(list(subset.columns), ['ZN', 'AGE', 'TAX', 'B'])
示例#10
0
    def test_tree_classif_top_k(self):
        data, label = get_data_label(load_iris())

        method = SelectionMethod.TreeBased(num_features=2)
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(subset.shape[1], 2)
        self.assertListEqual(list(subset.columns),
                             ['petal length (cm)', 'petal width (cm)'])
示例#11
0
    def test_anova_regress_top_percentile_all(self):
        data, label = get_data_label(load_boston())
        data = data.drop(columns=["CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"])

        method = SelectionMethod.Statistical(num_features=1.0)
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(data.shape[1], subset.shape[1])
        self.assertListEqual(list(data.columns), list(subset.columns))
示例#12
0
    def test_vif_top_k_no_label(self):
        data, label = get_data_label(load_iris())

        method = SelectionMethod.Statistical(num_features=2,
                                             method="variance_inflation")
        selector = Selective(method)
        selector.fit(data)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(subset.shape[1], 2)
        self.assertListEqual(list(subset.columns),
                             ['sepal width (cm)', 'petal width (cm)'])
示例#13
0
    def test_linear_regress_top_k_all(self):
        data, label = get_data_label(load_boston())
        data = data.drop(columns=[
            "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"
        ])

        method = SelectionMethod.Linear(num_features=5)
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertListEqual(list(data.columns), list(subset.columns))
示例#14
0
    def test_tree_regress_top_percentile(self):
        data, label = get_data_label(load_boston())
        data = data.drop(columns=[
            "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"
        ])

        method = SelectionMethod.TreeBased(num_features=0.6)
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(subset.shape[1], 3)
        self.assertListEqual(list(subset.columns), ['CRIM', 'AGE', 'LSTAT'])
示例#15
0
    def test_tree_estimator_adaboost_classif_top_k(self):
        data, label = get_data_label(load_iris())

        method = SelectionMethod.TreeBased(
            num_features=2,
            estimator=AdaBoostClassifier(random_state=Constants.default_seed))
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(subset.shape[1], 2)
        self.assertListEqual(list(subset.columns),
                             ['petal length (cm)', 'petal width (cm)'])
示例#16
0
    def test_linear_classif_top_percentile_all(self):
        data, label = get_data_label(load_iris())

        method = SelectionMethod.Linear(num_features=1.0)
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(subset.shape[1], 4)
        self.assertListEqual(list(subset.columns), [
            'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
            'petal width (cm)'
        ])
示例#17
0
    def test_ridge_regress_top_k(self):
        data, label = get_data_label(load_boston())
        data = data.drop(columns=[
            "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"
        ])

        method = SelectionMethod.Linear(num_features=3, regularization="ridge")
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(subset.shape[1], 3)
        self.assertListEqual(list(subset.columns), ['CRIM', 'AGE', 'LSTAT'])
示例#18
0
    def test_tree_estimator_xgboost_regress_top_k(self):
        data, label = get_data_label(load_boston())
        data = data.drop(columns=[
            "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"
        ])

        method = SelectionMethod.TreeBased(
            num_features=3,
            estimator=XGBRegressor(random_state=Constants.default_seed))
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(subset.shape[1], 3)
示例#19
0
    def test_mutual_classif_top_k_all(self):
        data, label = get_data_label(load_iris())

        method = SelectionMethod.Statistical(num_features=4,
                                             method="mutual_info")
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(subset.shape[1], 4)
        self.assertListEqual(list(subset.columns), [
            'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
            'petal width (cm)'
        ])
示例#20
0
    def test_variance_lt1(self):
        data, label = get_data_label(load_boston())

        method = SelectionMethod.Variance(threshold=1.0)
        selector = Selective(method)
        selector.fit(data)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(data.shape[1], 13)
        self.assertEqual(subset.shape[1], 10)
        self.assertListEqual(list(subset.columns), [
            'CRIM', 'ZN', 'INDUS', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B',
            'LSTAT'
        ])
示例#21
0
    def test_vif_top_k_regression(self):
        data, label = get_data_label(load_boston())
        data = data.drop(columns=[
            "CHAS", "NOX", "RM", "DIS", "RAD", "TAX", "PTRATIO", "INDUS"
        ])

        method = SelectionMethod.Statistical(num_features=2,
                                             method="variance_inflation")
        selector = Selective(method)
        selector.fit(data, label)
        subset = selector.transform(data)

        # Reduced columns
        self.assertEqual(subset.shape[1], 2)
        self.assertListEqual(list(subset.columns), ['CRIM', 'ZN'])