Exemplo n.º 1
0
    def test_get_attr_corres_valid_1(self):
        A = read_csv_metadata(path_a)
        B = read_csv_metadata(path_b, key='ID')
        ac = get_attr_corres(A, B)
        for c in ac['corres']:
            self.assertEqual(c[0], c[1])

        self.assertEqual(all(ac['ltable'] == A), True)
        self.assertEqual(all(ac['rtable'] == B), True)
Exemplo n.º 2
0
def get_features_for_matching(A, B):
    if not isinstance(A, pd.DataFrame):
        logger.error('Input table A is not of type pandas dataframe')
        raise AssertionError('Input table A is not of type pandas dataframe')

    if not isinstance(B, pd.DataFrame):
        logger.error('Input table B is not of type pandas dataframe')
        raise AssertionError('Input table B is not of type pandas dataframe')

    sim_funcs = get_sim_funs_for_matching()
    tok_funcs = get_tokenizers_for_matching()
    t_A = get_attr_types(A)
    t_B = get_attr_types(B)
    attr_corres = get_attr_corres(A, B)
    feat_table = get_features(A, B, t_A, t_B, attr_corres, tok_funcs, sim_funcs)

    # export important variables to global name space
    mg._match_t = tok_funcs
    mg._match_s = sim_funcs
    mg._atypes1 = t_A
    mg._atypes2 = t_B
    mg._match_c = attr_corres
    return feat_table
Exemplo n.º 3
0
def get_features_for_matching(A, B):
    if not isinstance(A, pd.DataFrame):
        logger.error('Input table A is not of type pandas dataframe')
        raise AssertionError('Input table A is not of type pandas dataframe')

    if not isinstance(B, pd.DataFrame):
        logger.error('Input table B is not of type pandas dataframe')
        raise AssertionError('Input table B is not of type pandas dataframe')

    sim_funcs = get_sim_funs_for_matching()
    tok_funcs = get_tokenizers_for_matching()
    t_A = get_attr_types(A)
    t_B = get_attr_types(B)
    attr_corres = get_attr_corres(A, B)
    feat_table = get_features(A, B, t_A, t_B, attr_corres, tok_funcs,
                              sim_funcs)

    # export important variables to global name space
    mg._match_t = tok_funcs
    mg._match_s = sim_funcs
    mg._atypes1 = t_A
    mg._atypes2 = t_B
    mg._match_c = attr_corres
    return feat_table
Exemplo n.º 4
0
 def test_get_attr_corres_invalid_df2(self):
     ac = get_attr_corres(pd.DataFrame(), None)
Exemplo n.º 5
0
 def test_get_attr_corres_invalid_df1(self):
     ac = get_attr_corres(None, pd.DataFrame())