示例#1
0
    def test_FeatureReducer(self):
        df = self.test_df
        target = 'gap expt'
        fr = FeatureReducer()

        # ultra-basic case: are we reducing at least 1 feature?
        df = fr.fit_transform(df, target)
        self.assertTrue(df.shape[1] < self.test_df.shape[1])

        # ensure metadata is being written correctly
        self.assertTrue(target not in fr.retained_features)
        self.assertTrue(len(list(fr.removed_features.keys())) == 2)

        # ensure other combinations of feature reducers are working
        fr = FeatureReducer(reducers=('corr', 'rebate'), n_rebate_features=40)
        df = fr.fit_transform(self.test_df, target)
        self.assertEqual(df.shape[1], 41)  # 40 features + target
        self.assertTrue(target in df.columns)

        # ensure the same thing works when fraction is used
        fr = FeatureReducer(reducers=('rebate', ), n_rebate_features=0.2)
        df = fr.fit_transform(self.test_df, target)
        self.assertEqual(df.shape[1], 83 + 1)

        # test transferability
        df2 = self.test_df
        df2 = fr.transform(df2, target)
        self.assertListEqual(df.columns.tolist(), df2.columns.tolist())
示例#2
0
 def test_FeatureReducer_classification(self):
     # test classification with corr matrix (no errors)
     fr = FeatureReducer(reducers=("corr", ))
     df_class = self.test_df
     df_class[self.target] = [
         "semiconductor" if 0.0 < g < 3.0 else "nonmetal"
         for g in df_class[self.target]
     ]
     df_class = fr.fit_transform(df_class, self.target)
     self.assertEqual(df_class.shape[0], 200)
示例#3
0
    def test_FeatureReducer_transferability(self):
        # ensure the same thing works when fraction is used
        fr = FeatureReducer(reducers=("rebate", ), n_rebate_features=0.2)
        df = fr.fit_transform(self.test_df, self.target)
        self.assertEqual(df.shape[1], 83 + 1)

        # test transferability
        df2 = deepcopy(self.test_df)
        df2 = fr.transform(df2, self.target)
        self.assertListEqual(df.columns.tolist(), df2.columns.tolist())
示例#4
0
    def test_FeatureReducer_basic(self):
        fr = FeatureReducer(reducers=("corr", "tree"))

        # ultra-basic case: are we reducing at least 1 feature?
        df = fr.fit_transform(self.test_df, self.target)
        self.assertTrue(df.shape[1] < self.test_df.shape[1])

        # ensure metadata is being written correctly
        self.assertTrue(self.target not in fr.retained_features)
        self.assertTrue(len(list(fr.removed_features.keys())) == 2)
示例#5
0
    def test_manual_feature_reduction(self):
        fr = FeatureReducer(reducers=[], remove_features=["LUMO_element_Th"])

        # ultra-basic case: are we reducing at least 1 feature?
        df = fr.fit_transform(self.test_df, self.target)
        self.assertTrue("LUMO_element_Th" not in df.columns)
        self.assertEqual(fr.removed_features["manual"], ["LUMO_element_Th"])

        # test removing feature that doesn't exist
        fr = FeatureReducer(reducers=[], remove_features=["abcdefg12345!!"])

        with self.assertLogs(AMM_LOGGER_BASENAME, level="INFO") as cm:
            # should give log warning but not throw an error
            fr.fit_transform(self.test_df, self.target)
            self.assertTrue("abcdefg12345!!" in " ".join(cm.output))
示例#6
0
    def test_FeatureReducer_pca(self):
        # Case where n_samples < n_features
        df = self.test_df.iloc[:10]
        fr = FeatureReducer(reducers=("pca", ), n_pca_features='auto')
        df_reduced = fr.fit_transform(df, self.target)
        self.assertTupleEqual(df_reduced.shape, (10, 11))

        # Case where n_samples > n_features
        fsubset = [
            'HOMO_energy', 'LUMO_energy', 'gap_AO', 'minimum X', 'maximum X',
            'range X', 'mean X', 'std_dev X', 'minimum row', 'maximum row',
            'range row', 'mean row', 'std_dev row', 'minimum group',
            'maximum group', 'range group', 'mean group', 'std_dev group'
        ]
        df = self.test_df[fsubset + [self.target]]
        df_reduced = fr.fit_transform(df, self.target)
        self.assertEqual(df_reduced.shape[0], 200)

        # Manually specified case of n_samples > n_features
        fr = FeatureReducer(reducers=('pca', ), n_pca_features=0.5)
        df = self.test_df
        df_reduced = fr.fit_transform(df, self.target)
        self.assertTupleEqual(df_reduced.shape, (200, 201))
示例#7
0
    def test_FeatureReducer_pca(self):
        # Case where n_samples < n_features
        df = self.test_df.iloc[:10]
        fr = FeatureReducer(reducers=("pca", ), n_pca_features="auto")
        df_reduced = fr.fit_transform(df, self.target)
        self.assertTupleEqual(df_reduced.shape, (10, 11))

        # Case where n_samples > n_features
        fsubset = [
            "HOMO_energy",
            "LUMO_energy",
            "gap_AO",
            "minimum X",
            "maximum X",
            "range X",
            "mean X",
            "std_dev X",
            "minimum row",
            "maximum row",
            "range row",
            "mean row",
            "std_dev row",
            "minimum group",
            "maximum group",
            "range group",
            "mean group",
            "std_dev group",
        ]
        df = self.test_df[fsubset + [self.target]]
        df_reduced = fr.fit_transform(df, self.target)
        self.assertEqual(df_reduced.shape[0], 200)

        # Manually specified case of n_samples > n_features
        fr = FeatureReducer(reducers=("pca", ), n_pca_features=0.5)
        df = self.test_df
        df_reduced = fr.fit_transform(df, self.target)
        self.assertTupleEqual(df_reduced.shape, (200, 201))
示例#8
0
    def test_saving_feature_from_removal(self):
        fr = FeatureReducer(reducers=("corr", ), keep_features=["maximum X"])

        # ultra-basic case: are we reducing at least 1 feature?
        df = fr.fit_transform(self.test_df, self.target)
        self.assertTrue("maximum X" in df.columns)
示例#9
0
 def test_FeatureReducer_combinations(self):
     df = self.test_df
     fr = FeatureReducer(reducers=("pca", "rebate", "tree"))
     df_reduced = fr.fit_transform(df, self.target)
     self.assertGreater(df.shape[1], df_reduced.shape[1])
示例#10
0
 def test_FeatureReducer_advanced(self):
     # ensure other combinations of feature reducers are working
     fr = FeatureReducer(reducers=("corr", "rebate"), n_rebate_features=40)
     df = fr.fit_transform(self.test_df, self.target)
     self.assertEqual(df.shape[1], 41)  # 40 features + self.target
     self.assertTrue(self.target in df.columns)