def test7_no_http_input(self): df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) expected_matches = pd.DataFrame(columns=["uri_1", "uri_2", "value"]) output_matches = relational_matching(df) pd.testing.assert_frame_equal( output_matches, expected_matches, check_like=True)
def test1_smallset(self): path_input = "test/data/schema_matching/default_matches_cities_input.csv" df = pd.read_csv(path_input) pairs_relational = relational_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True) pairs_string = string_similarity_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True) pairs_schema = label_schema_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True) pairs_overlap = value_overlap_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True) assert all([pairs_relational.equals(x) for x in [pairs_string, pairs_schema, pairs_overlap]])
def test4_uri_querier_no_matches(self): path_input = "test/data/schema_matching/no_matches_cities_input.csv" df = pd.read_csv(path_input) path_expected = "test/data/schema_matching/no_matches_cities_expected.csv" expected_matches = pd.read_csv(path_expected) output_matches = relational_matching(df, uri_data_model=True) output_matches['value'] = pd.to_numeric(output_matches['value']) pd.testing.assert_frame_equal( output_matches, expected_matches, check_like=True)
def test1_default(self): path_input = "test/data/schema_matching/default_matches_cities_input.csv" df = pd.read_csv(path_input) path_expected = "test/data/schema_matching/default_matches_cities_expected.csv" expected_matches = pd.read_csv(path_expected) output_matches = relational_matching(df) output_matches['value'] = pd.to_numeric(output_matches['value']) pd.testing.assert_frame_equal( output_matches, expected_matches, check_like=True)
def test5_match_score(self): score = 0.76 path_input = "test/data/schema_matching/default_matches_cities_input.csv" df = pd.read_csv(path_input) path_expected = "test/data/schema_matching/default_matches_cities_expected.csv" expected_matches = pd.read_csv(path_expected) expected_matches['value'] = np.where( expected_matches['value']==1, score, expected_matches['value']) output_matches = relational_matching(df, match_score=score) output_matches['value'] = pd.to_numeric(output_matches['value']) pd.testing.assert_frame_equal( output_matches, expected_matches, check_like=True)
def test2_bigset(self): #WARNING: Takes long to run! path_input = "test/data/schema_matching/pair_equality_test2_bigset.csv" df = pd.read_csv(path_input) pairs_relational = relational_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True) pairs_string = string_similarity_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True) pairs_schema = label_schema_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True) pairs_overlap = value_overlap_matching(df)[["uri_1","uri_2"]].sort_values(by=["uri_1","uri_2"]).reset_index(drop=True) assert all([pairs_relational.equals(x) for x in [pairs_string, pairs_schema, pairs_overlap]])