def test_ngram_blocks(self):
     blocking = NGramBlocking(ref_attr_index=1, target_attr_index=1)
     blocking.fit(SOUNDEX_REFSET, SOUNDEX_TARGETSET)
     blocks = list(blocking.iter_id_blocks())
     self.assertEqual(len(blocks), 3)
     self.assertIn((['a3'], ['b1', 'b2']), blocks)
     self.assertIn((['a5'], ['b4']), blocks)
     self.assertIn((['a1', 'a4'], ['b3']), blocks)
 def test_ngram_blocks_2(self):
     refset = [['3', 'ccdd', 'aabb'],
               ['4', 'ccdd', 'bbaa']]
     targetset = [['c', 'ccdd', 'aabb'],
                  ['d', 'ccdd', 'bbaa']]
     true_pairs = [('3', 'c'), ('4', 'd')]
     blocking = NGramBlocking(ref_attr_index=2, target_attr_index=2,
                                ngram_size=2, depth=1)
     blocking.fit(refset, targetset)
     pairs = list(blocking.iter_id_pairs())
     self.assertEqual(len(pairs), len(true_pairs))