def test_get_record_id_to_index_map_2(self):
     table = [['a1', 'hello'], ['a1', 'world']]
     key = 'ID'
     dataframe = pd.DataFrame(table)
     dataframe.columns = ['ID', 'title']
     em.set_key(dataframe, key)
     db._get_record_id_to_index_map(dataframe, key)
    def test_index_candidate_set_3(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B)
        l_key = cm.get_key(A)
        r_key = cm.get_key(B)
        C.ix[0, 'rtable_ID'] = 'bbbb'

        lrecord_id_to_index_map = db._get_record_id_to_index_map(A, l_key)
        rrecord_id_to_index_map = db._get_record_id_to_index_map(B, r_key)

        db._index_candidate_set(C, lrecord_id_to_index_map,
                                rrecord_id_to_index_map, False)
 def test_index_candidate_set_6(self):
     A_list = [[1, 'asdf', 'fdas'], [2, 'fdsa', 'asdf']]
     B_list = [['B002', 'qqqq', 'wwww'], ['B003', 'rrrr', 'fdsa']]
     A = pd.DataFrame(A_list)
     A.columns = ['ID', 'f1', 'f2']
     em.set_key(A, 'ID')
     B = pd.DataFrame(B_list)
     B.columns = ['ID', 'f1', 'f2']
     em.set_key(B, 'ID')
     C = pd.DataFrame()
     lrecord_id_to_index_map = db._get_record_id_to_index_map(A, 'ID')
     rrecord_id_to_index_map = db._get_record_id_to_index_map(B, 'ID')
     new_C = db._index_candidate_set(C, lrecord_id_to_index_map,
                                     rrecord_id_to_index_map, False)
     self.assertEqual(new_C, set())
    def test_index_candidate_set_1(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B)
        l_key = cm.get_key(A)
        r_key = cm.get_key(B)

        lrecord_id_to_index_map = db._get_record_id_to_index_map(A, l_key)
        rrecord_id_to_index_map = db._get_record_id_to_index_map(B, r_key)

        expected_cand_set = {(0, 1), (1, 2), (3, 2), (0, 0), (3, 3),
                             (4, 4), (1, 4), (2, 0), (1, 3), (0, 5),
                             (2, 1), (4, 3), (4, 2), (2, 5), (3, 4)}
        actual_cand_set = db._index_candidate_set(C,
                lrecord_id_to_index_map, rrecord_id_to_index_map, False)
        self.assertEqual(expected_cand_set, actual_cand_set)
 def test_index_candidate_set_5(self):
     A_list = [[1, 'asdf', 'fdas'], [2, 'fdsa', 'asdf']]
     B_list = [['B002', 'qqqq', 'wwww'], ['B003', 'rrrr', 'fdsa']]
     A = pd.DataFrame(A_list)
     A.columns = ['ID', 'f1', 'f2']
     em.set_key(A, 'ID')
     B = pd.DataFrame(B_list)
     B.columns = ['ID', 'f1', 'f2']
     em.set_key(B, 'ID')
     C_list = [[0, 1, 'B001'], [1, 2, 'B002']]
     C = pd.DataFrame(C_list)
     C.columns = ['_id', 'ltable_ID', 'rtable_ID']
     cm.set_candset_properties(C, '_id', 'ltable_ID', 'rtable_ID', A, B)
     lrecord_id_to_index_map = db._get_record_id_to_index_map(A, 'ID')
     rrecord_id_to_index_map = db._get_record_id_to_index_map(B, 'ID')
     db._index_candidate_set(C, lrecord_id_to_index_map,
                             rrecord_id_to_index_map, False)
 def test_get_record_id_to_index_map_1(self):
     A = read_csv_metadata(path_a, key='ID')
     key = em.get_key(A)
     actual_rec_id_to_idx = db._get_record_id_to_index_map(A, key)
     expected_rec_id_to_idx = {'a1': 0, 'a3': 2, 'a2': 1, 'a5': 4, 'a4': 3}
     self.assertEqual(actual_rec_id_to_idx, expected_rec_id_to_idx)