def test2_ig(self):

        input_df = pd.read_csv("test/data/feature_selection/tree_based_test_input.csv")

        input_df_dt = direct_type_generator(input_df, ['uri'], hierarchy=True)

        expected_df = pd.read_csv("test/data/feature_selection/tree_based_test2_expected.csv")

        output_df = tree_based_filter(input_df_dt, 'europe', metric='IG')

        pd.testing.assert_frame_equal(output_df, expected_df, check_like=True)
示例#2
0
 def transform(self, X, y=None):
     X = direct_type_generator(X,
                               self.columns,
                               self.endpoint,
                               self.uri_data_model,
                               self.progress,
                               self.prefix,
                               self.regex_filter,
                               self.result_type,
                               self.bundled_mode,
                               self.hierarchy,
                               self.prefix_lookup,
                               caching=self.caching)
     return X
 def test2_no_pruning_correlation(self):
     
     df = pd.DataFrame({
         'entities': ['Paris', 'Buenos Aires', 'Mannheim', "München"],
         'link': ['http://dbpedia.org/resource/Paris', 'http://dbpedia.org/resource/Buenos_Aires',
                  'http://dbpedia.org/resource/Mannheim', 'http://dbpedia.org/resource/Munich']
         })
     
     expected_df = pd.read_csv("test\data\feature_selection\hierarchy_based_test2_expected.csv")
     
     input_df = direct_type_generator(df, ["link"], regex_filter=['A'], result_type="boolean", bundled_mode=True, hierarchy=True)
     
     output_df = hierarchy_based_filter(input_df, "link", threshold=0.99, G=input_DG, metric="correlation", pruning=False)
     
     pd.testing.assert_frame_equal(output_df, expected_df, check_like=True)