def test_assemble_topk_table_2(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') A_key = em.get_key(A) B_key = em.get_key(B) topk_heap = [(0.2727272727272727, 1, 0), (0.23076923076923078, 0, 4), (0.16666666666666666, 0, 3)] ret_dataframe = db._assemble_topk_table(topk_heap, A, B, A_key, B_key) expected_columns = ['_id', 'ltable_ID', 'rtable_ID', 'ltable_name', 'ltable_birth_year', 'ltable_hourly_wage', 'ltable_address', 'ltable_zipcode', 'rtable_name', 'rtable_birth_year', 'rtable_hourly_wage', 'rtable_address', 'rtable_zipcode'] self.assertEqual(len(ret_dataframe), 3) self.assertEqual(list(ret_dataframe.columns), expected_columns) expected_recs = [[0, 'a2', 'b1', 'Michael Franklin', 1988, 27.5, '1652 Stockton St, San Francisco', 94122, 'Mark Levene', 1987, 29.5, '108 Clement St, San Francisco', 94107], [1, 'a1', 'b5', 'Kevin Smith', 1989, 30.0, '607 From St, San Francisco', 94107, 'Alfons Kemper', 1984, 35.0, '170 Post St, Apt 4, San Francisco', 94122], [2, 'a1', 'b4', 'Kevin Smith', 1989, 30.0, '607 From St, San Francisco', 94107, 'Joseph Kuan', 1982, 26.0, '108 South Park, San Francisco', 94122]] self.assertEqual(list(ret_dataframe.ix[0]), expected_recs[0]) self.assertEqual(list(ret_dataframe.ix[1]), expected_recs[1]) self.assertEqual(list(ret_dataframe.ix[2]), expected_recs[2])
def test_assemble_topk_table_2(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') A_key = em.get_key(A) B_key = em.get_key(B) topk_heap = [(0.2727272727272727, 1, 0), (0.23076923076923078, 0, 4), (0.16666666666666666, 0, 3)] ret_dataframe = db._assemble_topk_table(topk_heap, A, B, A_key, B_key) expected_columns = ['_id', 'ltable_ID', 'rtable_ID', 'ltable_name', 'ltable_birth_year', 'ltable_hourly_wage', 'ltable_address', 'ltable_zipcode', 'rtable_name', 'rtable_birth_year', 'rtable_hourly_wage', 'rtable_address', 'rtable_zipcode'] self.assertEqual(len(ret_dataframe), 3) self.assertEqual(list(ret_dataframe.columns), expected_columns) expected_recs = [[0, 'a2', 'b1', 'Michael Franklin', 1988, 27.5, '1652 Stockton St, San Francisco', 94122, 'Mark Levene', 1987, 29.5, '108 Clement St, San Francisco', 94107], [1, 'a1', 'b5', 'Kevin Smith', 1989, 30.0, '607 From St, San Francisco', 94107, 'Alfons Kemper', 1984, 35.0, '170 Post St, Apt 4, San Francisco', 94122], [2, 'a1', 'b4', 'Kevin Smith', 1989, 30.0, '607 From St, San Francisco', 94107, 'Joseph Kuan', 1982, 26.0, '108 South Park, San Francisco', 94122]] self.assertEqual(list(ret_dataframe.loc[0]), expected_recs[0]) self.assertEqual(list(ret_dataframe.loc[1]), expected_recs[1]) self.assertEqual(list(ret_dataframe.loc[2]), expected_recs[2])
def test_assemble_topk_table_1(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') topk_heap = [] ret_dataframe = db._assemble_topk_table(topk_heap, A, B) self.assertEqual(len(ret_dataframe), 0) self.assertEqual(list(ret_dataframe.columns), [])
def test_assemble_topk_table_1(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') A_key = em.get_key(A) B_key = em.get_key(B) topk_heap = [] ret_dataframe = db._assemble_topk_table(topk_heap, A, B, A_key, B_key) self.assertEqual(len(ret_dataframe), 0) self.assertEqual(list(ret_dataframe.columns), [])