def test1_smallset(self):
        path_input = "test/data/schema_matching/default_matches_cities_input.csv"
        df = pd.read_csv(path_input)    

        pairs_relational = relational_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True)
        pairs_string = string_similarity_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True)
        pairs_schema = label_schema_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True)
        pairs_overlap = value_overlap_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True)

        assert all([pairs_relational.equals(x) for x in [pairs_string, pairs_schema, pairs_overlap]])
    def test3_diffpredicate_diffmetric(self):

        path_input = "test/data/schema_matching/string_matching_input_t3.csv"
        df = pd.read_csv(path_input)

        path_expected = "test/data/schema_matching/string_matching_output_t3.csv"
        result_expected = pd.read_csv(path_expected)
        
        result = string_similarity_matching(df, predicate="dbo:abstract", to_lowercase=False, remove_prefixes=False, remove_punctuation=False, similarity_metric="token_set_levenshtein")

        pd.testing.assert_frame_equal(result, result_expected, check_like=True)
    def test2_highthreshold(self):

        path_input = "test/data/schema_matching/string_matching_input_t1t2.csv"
        df = pd.read_csv(path_input)

        path_expected = "test/data/schema_matching/string_matching_output_t2.csv"
        result_expected = pd.read_csv(path_expected)
        
        result = string_similarity_matching(df, prefix_threshold=10)

        pd.testing.assert_frame_equal(result, result_expected, check_like=True)
    def test2_bigset(self):

        #WARNING: Takes long to run!

        path_input = "test/data/schema_matching/pair_equality_test2_bigset.csv"
        df = pd.read_csv(path_input)    

        pairs_relational = relational_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True)
        pairs_string = string_similarity_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True)
        pairs_schema = label_schema_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True)
        pairs_overlap = value_overlap_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True)

        assert all([pairs_relational.equals(x) for x in [pairs_string, pairs_schema, pairs_overlap]])