def test2_ig(self): input_df = pd.read_csv("test/data/feature_selection/tree_based_test_input.csv") input_df_dt = direct_type_generator(input_df, ['uri'], hierarchy=True) expected_df = pd.read_csv("test/data/feature_selection/tree_based_test2_expected.csv") output_df = tree_based_filter(input_df_dt, 'europe', metric='IG') pd.testing.assert_frame_equal(output_df, expected_df, check_like=True)
def transform(self, X, y=None): X = direct_type_generator(X, self.columns, self.endpoint, self.uri_data_model, self.progress, self.prefix, self.regex_filter, self.result_type, self.bundled_mode, self.hierarchy, self.prefix_lookup, caching=self.caching) return X
def test2_no_pruning_correlation(self): df = pd.DataFrame({ 'entities': ['Paris', 'Buenos Aires', 'Mannheim', "München"], 'link': ['http://dbpedia.org/resource/Paris', 'http://dbpedia.org/resource/Buenos_Aires', 'http://dbpedia.org/resource/Mannheim', 'http://dbpedia.org/resource/Munich'] }) expected_df = pd.read_csv("test\data\feature_selection\hierarchy_based_test2_expected.csv") input_df = direct_type_generator(df, ["link"], regex_filter=['A'], result_type="boolean", bundled_mode=True, hierarchy=True) output_df = hierarchy_based_filter(input_df, "link", threshold=0.99, G=input_DG, metric="correlation", pruning=False) pd.testing.assert_frame_equal(output_df, expected_df, check_like=True)