Python debug_blocker示例，py_entitymatching.debugblocker.debugblocker.debug_blocker Python示例

示例#1

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_7(self):
     A = read_csv_metadata(path_a, key='ID')
     B = read_csv_metadata(path_b, key='ID')
     C = read_csv_metadata(path_c, ltable=A, rtable=B,
                           fk_ltable='ltable_ID',
                           fk_rtable='rtable_ID',
                           key='_id')
     attr_corres = [('ID', 'ID'), ['ID', 'ID']]
     db.debug_blocker(C, A, B, 200, attr_corres)

示例#2

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_7(self):
     A = read_csv_metadata(path_a, key='ID')
     B = read_csv_metadata(path_b, key='ID')
     C = read_csv_metadata(path_c, ltable=A, rtable=B,
                           fk_ltable='ltable_ID',
                           fk_rtable='rtable_ID',
                           key='_id')
     attr_corres = [('ID', 'ID'), ['ID', 'ID']]
     db.debug_blocker(C, A, B, 200, attr_corres)

示例#3

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_14(self):
     path_ltable = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_ltable.csv'])
     path_rtable = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_rtable.csv'])
     path_cand = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_cand.csv'])
     ltable = read_csv_metadata(path_ltable, key='ID')
     rtable = read_csv_metadata(path_rtable, key='book_id')
     cand_set = read_csv_metadata(path_cand, ltable=ltable, rtable=rtable,
                                   fk_ltable='ltable_ID',
                                   fk_rtable='rtable_book_id',
                                   key='_id')
     attr_corres = [('title', 'book_title'), ('price', 'price'),
                    ('desc', 'description'), ('genre', 'book_genre'),
                    ('year', 'pub_year'), ('lang', 'language'),
                    ('author', 'author'), ('publisher', 'publisher')]
     output_size = 1
     ret_dataframe = db.debug_blocker(cand_set, ltable, rtable,
                                      output_size, attr_corres)
     expected_columns = ['_id', 'ltable_ID', 'rtable_book_id',
                         'ltable_title', 'ltable_desc', 'ltable_year',
                         'ltable_lang', 'ltable_author', 'ltable_publisher',
                         'rtable_book_title', 'rtable_description',
                         'rtable_pub_year', 'rtable_language',
                         'rtable_author', 'rtable_publisher']
     self.assertEqual(list(ret_dataframe.columns), expected_columns)
     ret_record = list(ret_dataframe.ix[0])
     expected_record = [0, 1, 'B001', 'data analysis', 'introduction to data analysis',
         2015, 'ENG', 'Jane Doe', 'BCD publisher', 'introduction to data analysis', 
         float('nan'), 'English', 'introduction to data analysis', 'John Doe', 'ABC publisher10.00']
     print(ret_record)
     print(expected_record)
     self.assertEqual(expected_record[2], ret_record[2])
     self.assertEqual(expected_record[3], ret_record[3])

示例#4

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_14(self):
     path_ltable = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_ltable.csv'])
     path_rtable = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_rtable.csv'])
     path_cand = os.sep.join([debugblocker_datasets_path,
                                'test_debugblocker_cand.csv'])
     ltable = read_csv_metadata(path_ltable, key='ID')
     rtable = read_csv_metadata(path_rtable, key='book_id')
     cand_set = read_csv_metadata(path_cand, ltable=ltable, rtable=rtable,
                                   fk_ltable='ltable_ID',
                                   fk_rtable='rtable_book_id',
                                   key='_id')
     attr_corres = [('title', 'book_title'), ('price', 'price'),
                    ('desc', 'description'), ('genre', 'book_genre'),
                    ('year', 'pub_year'), ('lang', 'language'),
                    ('author', 'author'), ('publisher', 'publisher')]
     output_size = 1
     ret_dataframe = db.debug_blocker(cand_set, ltable, rtable,
                                      output_size, attr_corres)
     expected_columns = ['_id', 'ltable_ID', 'rtable_book_id',
                         'ltable_title', 'ltable_desc', 'ltable_year',
                         'ltable_lang', 'ltable_author', 'ltable_publisher',
                         'rtable_book_title', 'rtable_description',
                         'rtable_pub_year', 'rtable_language',
                         'rtable_author', 'rtable_publisher']
     self.assertEqual(list(ret_dataframe.columns), expected_columns)
     ret_record = list(ret_dataframe.loc[0])
     expected_record = [0, 1, 'B001', 'data analysis', 'introduction to data analysis',
         2015, 'ENG', 'Jane Doe', 'BCD publisher', 'introduction to data analysis', 
         float('nan'), 'English', 'introduction to data analysis', 'John Doe', 'ABC publisher10.00']
     print(ret_record)
     print(expected_record)
     self.assertEqual(expected_record[2], ret_record[2])
     self.assertEqual(expected_record[3], ret_record[3])

示例#5

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

    def test_debugblocker_12(self):
        llist = [[0]]
        rlist = [[0]]
        ltable = pd.DataFrame(llist)
        rtable = pd.DataFrame(rlist)
        ltable.columns = ['ID']
        rtable.columns = ['ID']
        lkey = 'ID'
        rkey = 'ID'
        em.set_key(ltable, lkey)
        em.set_key(rtable, rkey)
        cand_set = pd.DataFrame([[0, 0, 0]])
        cand_set.columns = ['_id', 'ltable_ID', 'rtable_ID']
        cm.set_candset_properties(cand_set, '_id', 'ltable_ID',
                                  'rtable_ID', ltable, rtable)

        db.debug_blocker(cand_set, ltable, rtable)

示例#6

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

    def test_debugblocker_18(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B,
                              fk_ltable='ltable_ID', fk_rtable='rtable_ID',
                              key='_id')

        ret_table = db.debug_blocker(C, A, B, n_jobs = 2)

示例#7

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

    def test_debugblocker_12(self):
        llist = [[0]]
        rlist = [[0]]
        ltable = pd.DataFrame(llist)
        rtable = pd.DataFrame(rlist)
        ltable.columns = ['ID']
        rtable.columns = ['ID']
        lkey = 'ID'
        rkey = 'ID'
        em.set_key(ltable, lkey)
        em.set_key(rtable, rkey)
        cand_set = pd.DataFrame([[0, 0, 0]])
        cand_set.columns = ['_id', 'ltable_ID', 'rtable_ID']
        cm.set_candset_properties(cand_set, '_id', 'ltable_ID',
                                  'rtable_ID', ltable, rtable)

        db.debug_blocker(cand_set, ltable, rtable)

示例#8

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

    def test_debugblocker_18(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B,
                              fk_ltable='ltable_ID', fk_rtable='rtable_ID',
                              key='_id')

        ret_table = db.debug_blocker(C, A, B, n_jobs = 2)

示例#9

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

    def test_debugblocker_13(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B,
                              fk_ltable='ltable_ID', fk_rtable='rtable_ID',
                              key='_id')

        actual_ret_table = db.debug_blocker(C, A, B)
        test_file_path = os.sep.join(
            [debugblocker_datasets_path, 'test_debugblocker_13_out.csv'])
        expected_ret_table = read_csv_metadata(test_file_path,
                                               ltable=A, rtable=B,
                                               fk_ltable='ltable_ID',
                                               fk_rtable='rtable_ID',
                                               key='_id')
        self.assertEqual(len(expected_ret_table), len(actual_ret_table))

示例#10

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

    def test_debugblocker_13(self):
        A = read_csv_metadata(path_a, key='ID')
        B = read_csv_metadata(path_b, key='ID')
        C = read_csv_metadata(path_c, ltable=A, rtable=B,
                              fk_ltable='ltable_ID', fk_rtable='rtable_ID',
                              key='_id')

        actual_ret_table = db.debug_blocker(C, A, B)
        test_file_path = os.sep.join(
            [debugblocker_datasets_path, 'test_debugblocker_13_out.csv'])
        expected_ret_table = read_csv_metadata(test_file_path,
                                               ltable=A, rtable=B,
                                               fk_ltable='ltable_ID',
                                               fk_rtable='rtable_ID',
                                               key='_id')
        self.assertEqual(len(expected_ret_table), len(actual_ret_table))

示例#11

0

显示文件

文件： test_debugblocker.py 项目： kjinxin/py_entitymatching

 def test_debugblocker_14(self):
     path_ltable = os.sep.join(
         [debugblocker_datasets_path, 'test_debugblocker_ltable.csv'])
     path_rtable = os.sep.join(
         [debugblocker_datasets_path, 'test_debugblocker_rtable.csv'])
     path_cand = os.sep.join(
         [debugblocker_datasets_path, 'test_debugblocker_cand.csv'])
     ltable = read_csv_metadata(path_ltable, key='ID')
     rtable = read_csv_metadata(path_rtable, key='book_id')
     cand_set = read_csv_metadata(path_cand,
                                  ltable=ltable,
                                  rtable=rtable,
                                  fk_ltable='ltable_ID',
                                  fk_rtable='rtable_book_id',
                                  key='_id')
     attr_corres = [('title', 'book_title'), ('price', 'price'),
                    ('desc', 'description'), ('genre', 'book_genre'),
                    ('year', 'pub_year'), ('lang', 'language'),
                    ('author', 'author'), ('publisher', 'publisher')]
     output_size = 1
     ret_dataframe = db.debug_blocker(cand_set, ltable, rtable, output_size,
                                      attr_corres)
     expected_columns = [
         '_id', 'similarity', 'ltable_ID', 'rtable_book_id', 'ltable_title',
         'ltable_desc', 'ltable_year', 'ltable_lang', 'ltable_author',
         'ltable_publisher', 'rtable_book_title', 'rtable_description',
         'rtable_pub_year', 'rtable_language', 'rtable_author',
         'rtable_publisher'
     ]
     self.assertEqual(list(ret_dataframe.columns), expected_columns)
     ret_record = list(ret_dataframe.ix[0])
     expected_record = [
         0, 0.33333333333333331, 2, 'B002', 'Thinking in Java',
         'learn how to program in Java', 2000, 'ENG', 'Johnnie Doe',
         pd.np.nan, 'Thinking in C', 'learn programming in C++', '1990',
         pd.np.nan, 'Jane Doe', 'BCD publisher'
     ]
     self.assertEqual(expected_record[2], ret_record[2])
     self.assertEqual(expected_record[3], ret_record[3])

示例#12

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_4(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = read_csv_metadata(path_c, ltable=A, rtable=B)
     output_size = '200'
     db.debug_blocker(C, A, B, output_size)

示例#13

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_2(self):
     A = read_csv_metadata(path_a)
     B = []
     C = []
     db.debug_blocker(C, A, B)

示例#14

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_1(self):
     A = []
     B = []
     C = []
     db.debug_blocker(C, A, B)

示例#15

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_3(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = None
     db.debug_blocker(C, A, B)

示例#16

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_11(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = pd.DataFrame([])
     output_size = 0
     db.debug_blocker(C, A, B, output_size)

示例#17

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_1(self):
     A = []
     B = []
     C = []
     db.debug_blocker(C, A, B)

示例#18

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_5(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = read_csv_metadata(path_c, ltable=A, rtable=B)
     attr_corres = set()
     db.debug_blocker(C, A, B, 200, attr_corres)

示例#19

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_3(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = None
     db.debug_blocker(C, A, B)

示例#20

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_2(self):
     A = read_csv_metadata(path_a)
     B = []
     C = []
     db.debug_blocker(C, A, B)

示例#21

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_5(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = read_csv_metadata(path_c, ltable=A, rtable=B)
     attr_corres = set()
     db.debug_blocker(C, A, B, 200, attr_corres)

示例#22

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_4(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = read_csv_metadata(path_c, ltable=A, rtable=B)
     output_size = '200'
     db.debug_blocker(C, A, B, output_size)

示例#23

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_10(self):
     A = read_csv_metadata(path_a)
     B = pd.DataFrame([])
     C = pd.DataFrame([])
     db.debug_blocker(C, A, B)

示例#24

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_10(self):
     A = read_csv_metadata(path_a)
     B = pd.DataFrame([])
     C = pd.DataFrame([])
     db.debug_blocker(C, A, B)

示例#25

0

显示文件

 def time_debug_blocking(self):
     db.debug_blocker(self.A, self.B, self.C)

示例#26

0

显示文件

文件： test_debugblocker.py 项目： anhaidgroup/py_entitymatching

 def test_debugblocker_11(self):
     A = read_csv_metadata(path_a)
     B = read_csv_metadata(path_b)
     C = pd.DataFrame([])
     output_size = 0
     db.debug_blocker(C, A, B, output_size)

示例#27

0

显示文件

 def time_debug_blocking(self):
     db.debug_blocker(self.A, self.B, self.C, self.output_size,
                      self.attr_corres)