def test_debugblocker_7(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B, fk_ltable='ltable_ID', fk_rtable='rtable_ID', key='_id') attr_corres = [('ID', 'ID'), ['ID', 'ID']] db.debug_blocker(C, A, B, 200, attr_corres)
def test_debugblocker_14(self): path_ltable = os.sep.join([debugblocker_datasets_path, 'test_debugblocker_ltable.csv']) path_rtable = os.sep.join([debugblocker_datasets_path, 'test_debugblocker_rtable.csv']) path_cand = os.sep.join([debugblocker_datasets_path, 'test_debugblocker_cand.csv']) ltable = read_csv_metadata(path_ltable, key='ID') rtable = read_csv_metadata(path_rtable, key='book_id') cand_set = read_csv_metadata(path_cand, ltable=ltable, rtable=rtable, fk_ltable='ltable_ID', fk_rtable='rtable_book_id', key='_id') attr_corres = [('title', 'book_title'), ('price', 'price'), ('desc', 'description'), ('genre', 'book_genre'), ('year', 'pub_year'), ('lang', 'language'), ('author', 'author'), ('publisher', 'publisher')] output_size = 1 ret_dataframe = db.debug_blocker(cand_set, ltable, rtable, output_size, attr_corres) expected_columns = ['_id', 'ltable_ID', 'rtable_book_id', 'ltable_title', 'ltable_desc', 'ltable_year', 'ltable_lang', 'ltable_author', 'ltable_publisher', 'rtable_book_title', 'rtable_description', 'rtable_pub_year', 'rtable_language', 'rtable_author', 'rtable_publisher'] self.assertEqual(list(ret_dataframe.columns), expected_columns) ret_record = list(ret_dataframe.ix[0]) expected_record = [0, 1, 'B001', 'data analysis', 'introduction to data analysis', 2015, 'ENG', 'Jane Doe', 'BCD publisher', 'introduction to data analysis', float('nan'), 'English', 'introduction to data analysis', 'John Doe', 'ABC publisher10.00'] print(ret_record) print(expected_record) self.assertEqual(expected_record[2], ret_record[2]) self.assertEqual(expected_record[3], ret_record[3])
def test_debugblocker_14(self): path_ltable = os.sep.join([debugblocker_datasets_path, 'test_debugblocker_ltable.csv']) path_rtable = os.sep.join([debugblocker_datasets_path, 'test_debugblocker_rtable.csv']) path_cand = os.sep.join([debugblocker_datasets_path, 'test_debugblocker_cand.csv']) ltable = read_csv_metadata(path_ltable, key='ID') rtable = read_csv_metadata(path_rtable, key='book_id') cand_set = read_csv_metadata(path_cand, ltable=ltable, rtable=rtable, fk_ltable='ltable_ID', fk_rtable='rtable_book_id', key='_id') attr_corres = [('title', 'book_title'), ('price', 'price'), ('desc', 'description'), ('genre', 'book_genre'), ('year', 'pub_year'), ('lang', 'language'), ('author', 'author'), ('publisher', 'publisher')] output_size = 1 ret_dataframe = db.debug_blocker(cand_set, ltable, rtable, output_size, attr_corres) expected_columns = ['_id', 'ltable_ID', 'rtable_book_id', 'ltable_title', 'ltable_desc', 'ltable_year', 'ltable_lang', 'ltable_author', 'ltable_publisher', 'rtable_book_title', 'rtable_description', 'rtable_pub_year', 'rtable_language', 'rtable_author', 'rtable_publisher'] self.assertEqual(list(ret_dataframe.columns), expected_columns) ret_record = list(ret_dataframe.loc[0]) expected_record = [0, 1, 'B001', 'data analysis', 'introduction to data analysis', 2015, 'ENG', 'Jane Doe', 'BCD publisher', 'introduction to data analysis', float('nan'), 'English', 'introduction to data analysis', 'John Doe', 'ABC publisher10.00'] print(ret_record) print(expected_record) self.assertEqual(expected_record[2], ret_record[2]) self.assertEqual(expected_record[3], ret_record[3])
def test_debugblocker_12(self): llist = [[0]] rlist = [[0]] ltable = pd.DataFrame(llist) rtable = pd.DataFrame(rlist) ltable.columns = ['ID'] rtable.columns = ['ID'] lkey = 'ID' rkey = 'ID' em.set_key(ltable, lkey) em.set_key(rtable, rkey) cand_set = pd.DataFrame([[0, 0, 0]]) cand_set.columns = ['_id', 'ltable_ID', 'rtable_ID'] cm.set_candset_properties(cand_set, '_id', 'ltable_ID', 'rtable_ID', ltable, rtable) db.debug_blocker(cand_set, ltable, rtable)
def test_debugblocker_18(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B, fk_ltable='ltable_ID', fk_rtable='rtable_ID', key='_id') ret_table = db.debug_blocker(C, A, B, n_jobs = 2)
def test_debugblocker_13(self): A = read_csv_metadata(path_a, key='ID') B = read_csv_metadata(path_b, key='ID') C = read_csv_metadata(path_c, ltable=A, rtable=B, fk_ltable='ltable_ID', fk_rtable='rtable_ID', key='_id') actual_ret_table = db.debug_blocker(C, A, B) test_file_path = os.sep.join( [debugblocker_datasets_path, 'test_debugblocker_13_out.csv']) expected_ret_table = read_csv_metadata(test_file_path, ltable=A, rtable=B, fk_ltable='ltable_ID', fk_rtable='rtable_ID', key='_id') self.assertEqual(len(expected_ret_table), len(actual_ret_table))
def test_debugblocker_14(self): path_ltable = os.sep.join( [debugblocker_datasets_path, 'test_debugblocker_ltable.csv']) path_rtable = os.sep.join( [debugblocker_datasets_path, 'test_debugblocker_rtable.csv']) path_cand = os.sep.join( [debugblocker_datasets_path, 'test_debugblocker_cand.csv']) ltable = read_csv_metadata(path_ltable, key='ID') rtable = read_csv_metadata(path_rtable, key='book_id') cand_set = read_csv_metadata(path_cand, ltable=ltable, rtable=rtable, fk_ltable='ltable_ID', fk_rtable='rtable_book_id', key='_id') attr_corres = [('title', 'book_title'), ('price', 'price'), ('desc', 'description'), ('genre', 'book_genre'), ('year', 'pub_year'), ('lang', 'language'), ('author', 'author'), ('publisher', 'publisher')] output_size = 1 ret_dataframe = db.debug_blocker(cand_set, ltable, rtable, output_size, attr_corres) expected_columns = [ '_id', 'similarity', 'ltable_ID', 'rtable_book_id', 'ltable_title', 'ltable_desc', 'ltable_year', 'ltable_lang', 'ltable_author', 'ltable_publisher', 'rtable_book_title', 'rtable_description', 'rtable_pub_year', 'rtable_language', 'rtable_author', 'rtable_publisher' ] self.assertEqual(list(ret_dataframe.columns), expected_columns) ret_record = list(ret_dataframe.ix[0]) expected_record = [ 0, 0.33333333333333331, 2, 'B002', 'Thinking in Java', 'learn how to program in Java', 2000, 'ENG', 'Johnnie Doe', pd.np.nan, 'Thinking in C', 'learn programming in C++', '1990', pd.np.nan, 'Jane Doe', 'BCD publisher' ] self.assertEqual(expected_record[2], ret_record[2]) self.assertEqual(expected_record[3], ret_record[3])
def test_debugblocker_4(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b) C = read_csv_metadata(path_c, ltable=A, rtable=B) output_size = '200' db.debug_blocker(C, A, B, output_size)
def test_debugblocker_2(self): A = read_csv_metadata(path_a) B = [] C = [] db.debug_blocker(C, A, B)
def test_debugblocker_1(self): A = [] B = [] C = [] db.debug_blocker(C, A, B)
def test_debugblocker_3(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b) C = None db.debug_blocker(C, A, B)
def test_debugblocker_11(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b) C = pd.DataFrame([]) output_size = 0 db.debug_blocker(C, A, B, output_size)
def test_debugblocker_5(self): A = read_csv_metadata(path_a) B = read_csv_metadata(path_b) C = read_csv_metadata(path_c, ltable=A, rtable=B) attr_corres = set() db.debug_blocker(C, A, B, 200, attr_corres)
def test_debugblocker_10(self): A = read_csv_metadata(path_a) B = pd.DataFrame([]) C = pd.DataFrame([]) db.debug_blocker(C, A, B)
def time_debug_blocking(self): db.debug_blocker(self.A, self.B, self.C)
def time_debug_blocking(self): db.debug_blocker(self.A, self.B, self.C, self.output_size, self.attr_corres)