def test_huge_dataset(self): m = ExternalMerger(self.agg, 5, partitions=3) m.mergeCombiners( map(lambda k_v: (k_v[0], [str(k_v[1])]), self.data * 10)) self.assertTrue(m.spills >= 1) self.assertEqual(sum(len(v) for k, v in m.items()), self.N * 10) m._cleanup()
def test_huge_dataset(self): m = ExternalMerger(self.agg, 10) m.mergeCombiners(map(lambda (k, v): (k, [str(v)]), self.data * 10)) self.assertTrue(m.spills >= 1) self.assertEqual(sum(len(v) for k, v in m._recursive_merged_items(0)), self.N * 10) m._cleanup()
def test_huge_dataset(self): m = ExternalMerger(self.agg, 5, partitions=3) m.mergeCombiners(map(lambda k_v: (k_v[0], [str(k_v[1])]), self.data * 10)) self.assertTrue(m.spills >= 1) self.assertEqual(sum(len(v) for k, v in m.items()), self.N * 10) m._cleanup()