def test_feature_fn_valid_nosim_tok(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = get_features_for_matching(A, B) len1 = len(feature_table) feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])" f_dict = get_feature_fn(feature_string, dict(), dict())
def test_vis_debug_matcher_rf_ex_attrs_notin_test(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') rf = RFMatcher() train_test = mu.split_train_test(feature_vectors) train = train_test['train'] test = train_test['test'] test.drop('_id', inplace=True, axis=1) _vis_debug_rf( rf, train, test, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels', show_window=False)
def test_feature_fn_valid_nosim_tok(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = get_features_for_matching(A, B, validate_inferred_attr_types=False) len1 = len(feature_table) feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])" f_dict = get_feature_fn(feature_string, dict(), dict())
def test_vis_debug_matcher_dt_valid_1(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') dt = DTMatcher() train_test = mu.split_train_test(feature_vectors) train = train_test['train'] test = train_test['test'] _vis_debug_dt( dt, train, test, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels', show_window=False)
def test_get_features_for_matching_valid(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feat_table = afg.get_features_for_matching(A, B) self.assertEqual(isinstance(feat_table, pd.DataFrame), True) functions = feat_table['function'] for f in functions: x = f(A.ix[1], B.ix[2]) self.assertEqual(x >= 0, True)
def test_get_features_for_matching_validate_types_valid_no(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') with mockInput('n'): feat_table = afg.get_features_for_matching( A, B, validate_inferred_attr_types=True) self.assertEqual(feat_table, None)
def test_get_features_for_matching_valid(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feat_table = afg.get_features_for_matching(A, B, validate_inferred_attr_types=False) self.assertEqual(isinstance(feat_table, pd.DataFrame), True) functions = feat_table['function'] for f in functions: x = f(A.ix[1], B.ix[2]) self.assertEqual(x >= 0, True)
def test_extract_feature_vecs_invalid_feature_table(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) col_pos = len(C.columns) C.insert(col_pos, 'label', [0] * len(C)) feature_table = get_features_for_matching(A, B, validate_inferred_attr_types=False) F = extract_feature_vecs(C, attrs_before='ltable_name', feature_table=None, attrs_after=['label', '_id'])
def test_add_features_valid_1(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = get_features_for_matching(A, B, validate_inferred_attr_types=False) len1 = len(feature_table) feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])" f_dict = get_feature_fn(feature_string, get_tokenizers_for_matching(), get_sim_funs_for_matching()) add_feature(feature_table, 'test', f_dict) len2 = len(feature_table) self.assertEqual(len1+1, len2) self.assertEqual(feature_table.ix[len(feature_table)-1, 'function'](A.ix[1], B.ix[2]), 1.0)
def test_add_bb_feature_valid_1(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = get_features_for_matching(A, B, validate_inferred_attr_types=False) def bb_fn(ltuple, rtuple): return 1.0 len1 = len(feature_table) add_blackbox_feature(feature_table, 'test', bb_fn) len2 = len(feature_table) self.assertEqual(len1+1, len2) self.assertEqual(feature_table.ix[len(feature_table)-1, 'function'](A.ix[1], B.ix[2]), 1.0)
def test_extract_feature_vecs_invalid_feature_table(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) col_pos = len(C.columns) C.insert(col_pos, 'label', [0] * len(C)) feature_table = get_features_for_matching(A, B) F = extract_feature_vecs(C, attrs_before='ltable_name', feature_table=None, attrs_after=['label', '_id'])
def test_extract_feature_vecs_invalid_attrs_after(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) col_pos = len(C.columns) C.insert(col_pos, 'label', [0] * len(C)) feature_table = get_features_for_matching( A, B, validate_inferred_attr_types=False) F = extract_feature_vecs( C, attrs_before='ltable_name', feature_table=pd.DataFrame(columns=feature_table.columns), attrs_after=['label1', '_id'])
def test_get_features_for_matching_validate_types_valid_yes(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') with mockInput('y'): feat_table = afg.get_features_for_matching( A, B, validate_inferred_attr_types=True) self.assertEqual(isinstance(feat_table, pd.DataFrame), True) functions = feat_table['function'] for f in functions: x = f(A.ix[1], B.ix[2]) self.assertEqual(x >= 0, True)
def test_add_features_valid_1(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = get_features_for_matching(A, B) len1 = len(feature_table) feature_string = "exact_match(ltuple['zipcode'], rtuple['zipcode'])" f_dict = get_feature_fn(feature_string, get_tokenizers_for_matching(), get_sim_funs_for_matching()) add_feature(feature_table, 'test', f_dict) len2 = len(feature_table) self.assertEqual(len1 + 1, len2) self.assertEqual( feature_table.ix[len(feature_table) - 1, 'function'](A.ix[1], B.ix[2]), 1.0)
def test_extract_feature_vecs_with_parralel_job_count_less_than_zero(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) col_pos = len(C.columns) C.insert(col_pos, 'label', [0] * len(C)) feature_table = get_features_for_matching(A, B, validate_inferred_attr_types=False) F = extract_feature_vecs(C, attrs_before=['ltable_name', 'rtable_name'], feature_table=feature_table, n_jobs=-1) self.assertEqual(isinstance(F, pd.DataFrame), True) self.assertEqual(F.columns[0], '_id') self.assertEqual(F.columns[1], cm.get_fk_ltable(C)) self.assertEqual(F.columns[2], cm.get_fk_rtable(C)) self.assertEqual(F.columns[4], 'rtable_name') self.assertEqual(F.columns[len(F.columns) - 1] == 'label', False) self.assertEqual(cm.get_all_properties(C) == cm.get_all_properties(F), True)
def test_add_bb_feature_valid_1(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = get_features_for_matching(A, B) def bb_fn(ltuple, rtuple): return 1.0 len1 = len(feature_table) add_blackbox_feature(feature_table, 'test', bb_fn) len2 = len(feature_table) self.assertEqual(len1 + 1, len2) self.assertEqual( feature_table.ix[len(feature_table) - 1, 'function'](A.ix[1], B.ix[2]), 1.0)
def test_visualize_tree_invalid_df(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') dt = DTMatcher() dt.fit(table=feature_vectors, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels') visualize_tree(dt.clf, feature_vectors.columns, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'])
def test_debug_dt_matcher_valid(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') dt = DTMatcher() dt.fit(table=feature_vectors, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels') debug_decisiontree_matcher(dt, A.ix[1], B.ix[2], feature_table=feature_table, table_columns=feature_vectors.columns, exclude_attrs=['ltable_ID', 'rtable_ID', '_id', 'labels'])
def test_extract_feature_vecs_valid_8(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) col_pos = len(C.columns) C.insert(col_pos, 'label', [0] * len(C)) feature_table = get_features_for_matching(A, B, validate_inferred_attr_types=False) F = extract_feature_vecs(C, feature_table=pd.DataFrame(columns=feature_table.columns), attrs_after=['label', '_id']) self.assertEqual(isinstance(F, pd.DataFrame), True) self.assertEqual(F.columns[0], '_id') self.assertEqual(F.columns[1], cm.get_fk_ltable(C)) self.assertEqual(F.columns[2], cm.get_fk_rtable(C)) # self.assertEqual(F.columns[3], 'ltable_name') self.assertEqual(F.columns[len(F.columns) - 1] == 'label', True) self.assertEqual(cm.get_all_properties(C) == cm.get_all_properties(F), True)
def test_debug_rf_matcher_invalid_feat_table(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') rf = RFMatcher() rf.fit(table=feature_vectors, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels') debug_randomforest_matcher(rf, A.ix[1], B.ix[2], feature_table=None, table_columns=feature_vectors.columns, exclude_attrs=['ltable_ID', 'rtable_ID', '_id', 'labels'])
def test_extract_feature_vecs_valid_2(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) col_pos = len(C.columns) C.insert(col_pos, 'label', [0] * len(C)) feature_table = get_features_for_matching(A, B) F = extract_feature_vecs(C, attrs_before=['ltable_name', 'rtable_name'], feature_table=feature_table) self.assertEqual(isinstance(F, pd.DataFrame), True) self.assertEqual(F.columns[0], '_id') self.assertEqual(F.columns[1], cm.get_fk_ltable(C)) self.assertEqual(F.columns[2], cm.get_fk_rtable(C)) self.assertEqual(F.columns[3], 'ltable_name') self.assertEqual(F.columns[4], 'rtable_name') self.assertEqual(F.columns[len(F.columns) - 1] == 'label', False) self.assertEqual( cm.get_all_properties(C) == cm.get_all_properties(F), True)
def test_add_bb_feature_with_attrs(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feature_table = get_features_for_matching( A, B, validate_inferred_attr_types=False) def bb_fn(ltuple, rtuple): return min(len(ltuple['name']), len(rtuple['name'])) len1 = len(feature_table) attrs = {'left_attribute': 'name', 'right_attribute': 'name'} add_blackbox_feature(feature_table, 'bb_attr_test', bb_fn, **attrs) len2 = len(feature_table) self.assertEqual(len1 + 1, len2) added_feature = feature_table.iloc[len(feature_table) - 1] self.assertEqual(added_feature.feature_name, 'bb_attr_test') self.assertEqual(added_feature.left_attribute, 'name') self.assertEqual(added_feature.right_attribute, 'name') self.assertEqual(added_feature.simfunction, None)
def test_vis_tuple_debug_rf_matcher_valid_1(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') rf = RFMatcher() rf.fit(table=feature_vectors, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels') s = pd.DataFrame(feature_vectors.ix[0]) s1 = s.T vis_tuple_debug_rf_matcher(rf, s1, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'])
def test_extract_feature_vecs_valid_8(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) col_pos = len(C.columns) C.insert(col_pos, 'label', [0] * len(C)) feature_table = get_features_for_matching( A, B, validate_inferred_attr_types=False) F = extract_feature_vecs( C, feature_table=pd.DataFrame(columns=feature_table.columns), attrs_after=['label', '_id']) self.assertEqual(isinstance(F, pd.DataFrame), True) self.assertEqual(F.columns[0], '_id') self.assertEqual(F.columns[1], cm.get_fk_ltable(C)) self.assertEqual(F.columns[2], cm.get_fk_rtable(C)) # self.assertEqual(F.columns[3], 'ltable_name') self.assertEqual(F.columns[len(F.columns) - 1] == 'label', True) self.assertEqual( cm.get_all_properties(C) == cm.get_all_properties(F), True)
def test_vis_debug_matcher_rf_label_col_wi_sp_name(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['_predicted'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='_predicted') rf = RFMatcher() train_test = mu.split_train_test(feature_vectors) train = train_test['train'] test = train_test['test'] _vis_debug_rf(rf, train, test, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID'], target_attr='_predicted', show_window=False)
def test_extract_feature_vecs_with_parralel_job_count_more_than_one(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) col_pos = len(C.columns) C.insert(col_pos, 'label', [0] * len(C)) feature_table = get_features_for_matching( A, B, validate_inferred_attr_types=False) F = extract_feature_vecs(C, attrs_before=['ltable_name', 'rtable_name'], feature_table=feature_table, n_jobs=2) self.assertEqual(isinstance(F, pd.DataFrame), True) self.assertEqual(F.columns[0], '_id') self.assertEqual(F.columns[1], cm.get_fk_ltable(C)) self.assertEqual(F.columns[2], cm.get_fk_rtable(C)) self.assertEqual(F.columns[4], 'rtable_name') self.assertEqual(F.columns[len(F.columns) - 1] == 'label', False) self.assertEqual( cm.get_all_properties(C) == cm.get_all_properties(F), True)
def test_vis_tuple_debug_dt_matcher_valid_3(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') dt = DTMatcher() dt.fit(table=feature_vectors, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels') feature_vectors.drop(['_id', 'ltable_ID', 'rtable_ID', 'labels'], axis=1, inplace=True) s = pd.DataFrame(feature_vectors.ix[0]) s1 = s.T vis_tuple_debug_dt_matcher(dt.clf, s1, exclude_attrs=None)
def test_vis_debug_matcher_dt_tar_attr_notin_train(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') dt = DTMatcher() train_test = mu.split_train_test(feature_vectors) train = train_test['train'] test = train_test['test'] _vis_debug_dt(dt, train, test, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels1', show_window=False)
def test_visualize_tree_invalid_df(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') dt = DTMatcher() dt.fit(table=feature_vectors, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels') visualize_tree( dt.clf, feature_vectors.columns, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'])
def test_vis_debug_matcher_rf_ex_attrs_notin_test(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') rf = RFMatcher() train_test = mu.split_train_test(feature_vectors) train = train_test['train'] test = train_test['test'] test.drop('_id', inplace=True, axis=1) _vis_debug_rf(rf, train, test, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels', show_window=False)
def test_vis_tuple_debug_dt_matcher_valid_1(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') dt = DTMatcher() dt.fit(table=feature_vectors, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels') s = pd.DataFrame(feature_vectors.ix[0]) s1 = s.T vis_tuple_debug_dt_matcher( dt, s1, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'])
def test_debug_dt_matcher_valid(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') dt = DTMatcher() dt.fit(table=feature_vectors, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels') debug_decisiontree_matcher( dt, A.ix[1], B.ix[2], feature_table=feature_table, table_columns=feature_vectors.columns, exclude_attrs=['ltable_ID', 'rtable_ID', '_id', 'labels'])
def test_vis_tuple_debug_rf_matcher_valid_3(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') rf = RFMatcher() rf.fit(table=feature_vectors, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels') feature_vectors.drop(['_id', 'ltable_ID', 'rtable_ID', 'labels'], axis=1, inplace=True) s = pd.DataFrame(feature_vectors.ix[0]) s1 = s.T vis_tuple_debug_rf_matcher(rf.clf, s1, exclude_attrs=None)
def test_vis_debug_matcher_rf_label_col_wi_sp_name(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['_predicted'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='_predicted') rf = RFMatcher() train_test = mu.split_train_test(feature_vectors) train = train_test['train'] test = train_test['test'] _vis_debug_rf(rf, train, test, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID'], target_attr='_predicted', show_window=False)
def test_debug_rf_matcher_invalid_feat_table(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) labels = [0] * 7 labels.extend([1] * 8) C['labels'] = labels feature_table = get_features_for_matching(A, B) feature_vectors = extract_feature_vecs(C, feature_table=feature_table, attrs_after='labels') rf = RFMatcher() rf.fit(table=feature_vectors, exclude_attrs=['_id', 'ltable_ID', 'rtable_ID', 'labels'], target_attr='labels') debug_randomforest_matcher( rf, A.ix[1], B.ix[2], feature_table=None, table_columns=feature_vectors.columns, exclude_attrs=['ltable_ID', 'rtable_ID', '_id', 'labels'])
def test_get_features_for_matching_invalid_df2(self): A = read_csv_metadata(path_a) # B = read_csv_metadata(path_b, key='ID') feat_table = afg.get_features_for_matching(A, None, validate_inferred_attr_types=False)
def test_get_features_for_matching_invalid_df1(self): # A = read_csv_metadata(path_a) B = read_csv_metadata(path_b, key='ID') feat_table = afg.get_features_for_matching(None, B)
def test_get_features_for_matching_invalid_df2(self): A = read_csv_metadata(path_a) # B = read_csv_metadata(path_b, key='ID') feat_table = afg.get_features_for_matching( A, None, validate_inferred_attr_types=False)
def test_get_features_for_matching_invalid_df2(self): A = read_csv_metadata(path_a) # B = read_csv_metadata(path_b, key='ID') feat_table = afg.get_features_for_matching(A, None)