示例#1
0
 def test_huge_dataset(self):
     m = ExternalMerger(self.agg, 5, partitions=3)
     m.mergeCombiners(
         map(lambda k_v: (k_v[0], [str(k_v[1])]), self.data * 10))
     self.assertTrue(m.spills >= 1)
     self.assertEqual(sum(len(v) for k, v in m.items()), self.N * 10)
     m._cleanup()
示例#2
0
    def test_stopiteration_is_raised(self):
        def stopit(*args, **kwargs):
            raise StopIteration()

        def legit_create_combiner(x):
            return [x]

        def legit_merge_value(x, y):
            return x.append(y) or x

        def legit_merge_combiners(x, y):
            return x.extend(y) or x

        data = [(x % 2, x) for x in range(100)]

        # wrong create combiner
        m = ExternalMerger(
            Aggregator(stopit, legit_merge_value, legit_merge_combiners), 20)
        with self.assertRaises((Py4JJavaError, RuntimeError)) as cm:
            m.mergeValues(data)

        # wrong merge value
        m = ExternalMerger(
            Aggregator(legit_create_combiner, stopit, legit_merge_combiners),
            20)
        with self.assertRaises((Py4JJavaError, RuntimeError)) as cm:
            m.mergeValues(data)

        # wrong merge combiners
        m = ExternalMerger(
            Aggregator(legit_create_combiner, legit_merge_value, stopit), 20)
        with self.assertRaises((Py4JJavaError, RuntimeError)) as cm:
            m.mergeCombiners(map(lambda x_y1: (x_y1[0], [x_y1[1]]), data))
示例#3
0
文件: tests.py 项目: fireflyc/spark
 def test_huge_dataset(self):
     m = ExternalMerger(self.agg, 10)
     m.mergeCombiners(map(lambda (k, v): (k, [str(v)]), self.data * 10))
     self.assertTrue(m.spills >= 1)
     self.assertEqual(sum(len(v) for k, v in m._recursive_merged_items(0)),
             self.N * 10)
     m._cleanup()
示例#4
0
    def test_stopiteration_is_raised(self):

        def stopit(*args, **kwargs):
            raise StopIteration()

        def legit_create_combiner(x):
            return [x]

        def legit_merge_value(x, y):
            return x.append(y) or x

        def legit_merge_combiners(x, y):
            return x.extend(y) or x

        data = [(x % 2, x) for x in range(100)]

        # wrong create combiner
        m = ExternalMerger(Aggregator(stopit, legit_merge_value, legit_merge_combiners), 20)
        with self.assertRaises((Py4JJavaError, RuntimeError)) as cm:
            m.mergeValues(data)

        # wrong merge value
        m = ExternalMerger(Aggregator(legit_create_combiner, stopit, legit_merge_combiners), 20)
        with self.assertRaises((Py4JJavaError, RuntimeError)) as cm:
            m.mergeValues(data)

        # wrong merge combiners
        m = ExternalMerger(Aggregator(legit_create_combiner, legit_merge_value, stopit), 20)
        with self.assertRaises((Py4JJavaError, RuntimeError)) as cm:
            m.mergeCombiners(map(lambda x_y1: (x_y1[0], [x_y1[1]]), data))
示例#5
0
 def test_huge_dataset(self):
     m = ExternalMerger(self.agg, 5, partitions=3)
     m.mergeCombiners(map(lambda k_v: (k_v[0], [str(k_v[1])]), self.data * 10))
     self.assertTrue(m.spills >= 1)
     self.assertEqual(sum(len(v) for k, v in m.items()),
                      self.N * 10)
     m._cleanup()
示例#6
0
    def test_small_dataset(self):
        m = ExternalMerger(self.agg, 1000)
        m.mergeValues(self.data)
        self.assertEqual(m.spills, 0)
        self.assertEqual(sum(sum(v) for k, v in m.items()), sum(range(self.N)))

        m = ExternalMerger(self.agg, 1000)
        m.mergeCombiners(map(lambda x_y1: (x_y1[0], [x_y1[1]]), self.data))
        self.assertEqual(m.spills, 0)
        self.assertEqual(sum(sum(v) for k, v in m.items()), sum(range(self.N)))
示例#7
0
    def test_medium_dataset(self):
        m = ExternalMerger(self.agg, 20)
        m.mergeValues(self.data)
        self.assertTrue(m.spills >= 1)
        self.assertEqual(sum(sum(v) for k, v in m.items()), sum(range(self.N)))

        m = ExternalMerger(self.agg, 10)
        m.mergeCombiners(map(lambda x_y2: (x_y2[0], [x_y2[1]]), self.data * 3))
        self.assertTrue(m.spills >= 1)
        self.assertEqual(sum(sum(v) for k, v in m.items()),
                         sum(range(self.N)) * 3)
示例#8
0
文件: tests.py 项目: fireflyc/spark
    def test_medium_dataset(self):
        m = ExternalMerger(self.agg, 10)
        m.mergeValues(self.data)
        self.assertTrue(m.spills >= 1)
        self.assertEqual(sum(sum(v) for k, v in m.iteritems()),
                sum(xrange(self.N)))

        m = ExternalMerger(self.agg, 10)
        m.mergeCombiners(map(lambda (x, y): (x, [y]), self.data * 3))
        self.assertTrue(m.spills >= 1)
        self.assertEqual(sum(sum(v) for k, v in m.iteritems()),
                sum(xrange(self.N)) * 3)
示例#9
0
文件: tests.py 项目: fireflyc/spark
    def test_small_dataset(self):
        m = ExternalMerger(self.agg, 1000)
        m.mergeValues(self.data)
        self.assertEqual(m.spills, 0)
        self.assertEqual(sum(sum(v) for k, v in m.iteritems()),
                sum(xrange(self.N)))

        m = ExternalMerger(self.agg, 1000)
        m.mergeCombiners(map(lambda (x, y): (x, [y]), self.data))
        self.assertEqual(m.spills, 0)
        self.assertEqual(sum(sum(v) for k, v in m.iteritems()),
                sum(xrange(self.N)))