def test_get_record_id_to_index_map_2(self): table = [['a1', 'hello'], ['a1', 'world']] key = 'ID' dataframe = pd.DataFrame(table) dataframe.columns = ['ID', 'title'] em.set_key(dataframe, key) db._get_record_id_to_index_map(dataframe, key)
def test_index_candidate_set_3(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) l_key = cm.get_key(A) r_key = cm.get_key(B) C.ix[0, 'rtable_ID'] = 'bbbb' lrecord_id_to_index_map = db._get_record_id_to_index_map(A, l_key) rrecord_id_to_index_map = db._get_record_id_to_index_map(B, r_key) db._index_candidate_set(C, lrecord_id_to_index_map, rrecord_id_to_index_map, False)
def test_index_candidate_set_6(self): A_list = [[1, 'asdf', 'fdas'], [2, 'fdsa', 'asdf']] B_list = [['B002', 'qqqq', 'wwww'], ['B003', 'rrrr', 'fdsa']] A = pd.DataFrame(A_list) A.columns = ['ID', 'f1', 'f2'] em.set_key(A, 'ID') B = pd.DataFrame(B_list) B.columns = ['ID', 'f1', 'f2'] em.set_key(B, 'ID') C = pd.DataFrame() lrecord_id_to_index_map = db._get_record_id_to_index_map(A, 'ID') rrecord_id_to_index_map = db._get_record_id_to_index_map(B, 'ID') new_C = db._index_candidate_set(C, lrecord_id_to_index_map, rrecord_id_to_index_map, False) self.assertEqual(new_C, set())
def test_index_candidate_set_1(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B) l_key = cm.get_key(A) r_key = cm.get_key(B) lrecord_id_to_index_map = db._get_record_id_to_index_map(A, l_key) rrecord_id_to_index_map = db._get_record_id_to_index_map(B, r_key) expected_cand_set = {(0, 1), (1, 2), (3, 2), (0, 0), (3, 3), (4, 4), (1, 4), (2, 0), (1, 3), (0, 5), (2, 1), (4, 3), (4, 2), (2, 5), (3, 4)} actual_cand_set = db._index_candidate_set(C, lrecord_id_to_index_map, rrecord_id_to_index_map, False) self.assertEqual(expected_cand_set, actual_cand_set)
def test_index_candidate_set_5(self): A_list = [[1, 'asdf', 'fdas'], [2, 'fdsa', 'asdf']] B_list = [['B002', 'qqqq', 'wwww'], ['B003', 'rrrr', 'fdsa']] A = pd.DataFrame(A_list) A.columns = ['ID', 'f1', 'f2'] em.set_key(A, 'ID') B = pd.DataFrame(B_list) B.columns = ['ID', 'f1', 'f2'] em.set_key(B, 'ID') C_list = [[0, 1, 'B001'], [1, 2, 'B002']] C = pd.DataFrame(C_list) C.columns = ['_id', 'ltable_ID', 'rtable_ID'] cm.set_candset_properties(C, '_id', 'ltable_ID', 'rtable_ID', A, B) lrecord_id_to_index_map = db._get_record_id_to_index_map(A, 'ID') rrecord_id_to_index_map = db._get_record_id_to_index_map(B, 'ID') db._index_candidate_set(C, lrecord_id_to_index_map, rrecord_id_to_index_map, False)
def test_get_record_id_to_index_map_1(self): A = read_csv_metadata(path_a, key='ID') key = em.get_key(A) actual_rec_id_to_idx = db._get_record_id_to_index_map(A, key) expected_rec_id_to_idx = {'a1': 0, 'a3': 2, 'a2': 1, 'a5': 4, 'a4': 3} self.assertEqual(actual_rec_id_to_idx, expected_rec_id_to_idx)