def test_add_feature_name_already_present(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = create_feature_table() len1 = len(feature_table) feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])" f_dict = get_feature_fn(feature_string, get_tokenizers_for_matching(), get_sim_funs_for_matching()) add_feature(feature_table, 'test', f_dict) add_feature(feature_table, 'test', f_dict)
def test_add_bb_feature_name_already_present(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = create_feature_table() len1 = len(feature_table) def bb_fn(ltuple, rtuple): return 1.0 add_blackbox_feature(feature_table, 'test', bb_fn) add_blackbox_feature(feature_table, 'test', bb_fn)
def test_add_bb_feature_valid_2(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = create_feature_table() def bb_fn(ltuple, rtuple): return 1.0 len1 = len(feature_table) add_blackbox_feature(feature_table, 'test', bb_fn) len2 = len(feature_table) self.assertEqual(len1+1, len2) self.assertEqual(feature_table.ix[len(feature_table)-1, 'function'](A.ix[1], B.ix[2]), 1.0)
def test_add_feature_empty_df(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = create_feature_table() len1 = len(feature_table) feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])" f_dict = get_feature_fn(feature_string, get_tokenizers_for_matching(), get_sim_funs_for_matching()) add_feature(feature_table, 'test', f_dict) len2 = len(feature_table) self.assertEqual(len1+1, len2) self.assertEqual(feature_table.ix[len(feature_table)-1, 'function'](A.ix[1], B.ix[2]), 1.0)
def test_add_feature_empty_df(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = create_feature_table() len1 = len(feature_table) feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])" f_dict = get_feature_fn(feature_string, get_tokenizers_for_matching(), get_sim_funs_for_matching()) add_feature(feature_table, 'test', f_dict) len2 = len(feature_table) self.assertEqual(len1 + 1, len2) self.assertEqual( feature_table.ix[len(feature_table) - 1, 'function'](A.ix[1], B.ix[2]), 1.0)
def test_add_bb_feature_valid_2(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = create_feature_table() def bb_fn(ltuple, rtuple): return 1.0 len1 = len(feature_table) add_blackbox_feature(feature_table, 'test', bb_fn) len2 = len(feature_table) self.assertEqual(len1 + 1, len2) self.assertEqual( feature_table.ix[len(feature_table) - 1, 'function'](A.ix[1], B.ix[2]), 1.0)