def test_keyblocking_couples(self):
     blocking = KeyBlocking(ref_attr_index=1, target_attr_index=1,
                            callback=partial(soundexcode, language='english'))
     blocking.fit(SOUNDEX_REFSET, SOUNDEX_TARGETSET)
     pairs = list(blocking.iter_id_pairs())
     self.assertEqual(len(pairs), 8)
     for pair in SOUNDEX_PAIRS:
         self.assertIn(pair, pairs)
 def test_baseblocking_indice_blocks(self):
     blocking = KeyBlocking(ref_attr_index=1, target_attr_index=1,
                            callback=partial(soundexcode, language='english'))
     blocking.fit(SOUNDEX_REFSET, SOUNDEX_TARGETSET)
     blocks = list(blocking.iter_indice_blocks())
     self.assertEqual(len(blocks), 3)
     self.assertIn(([0, 6], [2, 5]), blocks)
     self.assertIn(([1, 4], [3]), blocks)
     self.assertIn(([2], [0, 1]), blocks)
 def test_baseblocking_id_blocks(self):
     blocking = KeyBlocking(ref_attr_index=1, target_attr_index=1,
                            callback=partial(soundexcode, language='english'))
     blocking.fit(SOUNDEX_REFSET, SOUNDEX_TARGETSET)
     blocks = list(blocking.iter_id_blocks())
     self.assertEqual(len(blocks), 3)
     self.assertIn((['a1', 'a7'], ['b3', 'b6']), blocks)
     self.assertIn((['a2', 'a5'], ['b4']), blocks)
     self.assertIn((['a3'], ['b1', 'b2']), blocks)
 def test_baseblocking_blocks(self):
     blocking = KeyBlocking(ref_attr_index=1, target_attr_index=1,
                            callback=partial(soundexcode, language='english'))
     blocking.fit(SOUNDEX_REFSET, SOUNDEX_TARGETSET)
     blocks = list(blocking.iter_blocks())
     self.assertEqual(len(blocks), 3)
     self.assertIn(([(0, 'a1'), (6, 'a7')], [(2, 'b3'), (5, 'b6')]), blocks)
     self.assertIn(([(1, 'a2'), (4, 'a5')], [(3, 'b4')]), blocks)
     self.assertIn(([(2, 'a3')], [(0, 'b1'), (1, 'b2')]), blocks)
 def test_baseblocking_pairs(self):
     blocking = KeyBlocking(ref_attr_index=1, target_attr_index=1,
                            callback=partial(soundexcode, language='english'))
     blocking.fit(SOUNDEX_REFSET, SOUNDEX_TARGETSET)
     pairs = list(blocking.iter_pairs())
     ref_ind = dict((r[0], ind) for ind, r in enumerate(SOUNDEX_REFSET))
     target_ind = dict((r[0], ind) for ind, r in enumerate(SOUNDEX_TARGETSET))
     true_pairs = [((ref_ind[r[0]], r[0]), (target_ind[r[1]], r[1])) for r in SOUNDEX_PAIRS]
     self.assertEqual(len(pairs), len(true_pairs))
     for pair in true_pairs:
         self.assertIn(pair, pairs)