Exemplo n.º 1
0
 def test_distinct(self):
     xs = [1, 2, 5, 1, 6, 8, 2, 90]
     d = set(xs)
     rdd = MockRDD.from_seq(xs).distinct()
     results = list(rdd)
     self.assertEqual(len(results), len(d))
     self.assertEqual(set(results), d)
Exemplo n.º 2
0
 def test_reduceByKey(self):
     kvs = [('a', 1), ('b', 7), ('a', 2)]
     rdd = MockRDD.from_seq(kvs).reduceByKey(lambda a, b: a + b)
     results = list(rdd)
     self.assertEqual(len(results), 2)
     results = dict(results)
     self.assertEqual(results, {'a': 3, 'b': 7})
Exemplo n.º 3
0
    def test_combineByKey(self):
        kvs = [('a', 1), ('b', 3), ('a', 5), ('b', 3), ('a', 1)]

        # Want to ensure that both value and combiner merging is performed
        # It's possible to do everything with only value merging, but if we did
        # we'd miss bugs in the merge combiner
        used_merge_value = False
        used_merge_combiners = False

        def create_combiner(v):
            return {v}

        def merge_value(v, x):
            nonlocal used_merge_value
            used_merge_value = True
            v.add(x)
            return v

        def merge_combiners(a, b):
            nonlocal used_merge_combiners
            used_merge_combiners = True
            a.update(b)
            return a

        rdd = MockRDD.from_seq(kvs).combineByKey(create_combiner, merge_value,
                                                 merge_combiners)
        results = list(rdd)
        self.assertEqual(len(results), 2)
        results = dict(results)
        self.assertEqual(results, {'a': {1, 5}, 'b': {3}})
        self.assertTrue(used_merge_value)
        self.assertTrue(used_merge_combiners)
Exemplo n.º 4
0
    def test_flatMap(self):
        x = [1, 2, 3]

        def func(el):
            assert el in x
            for i in range(el):
                yield i

        rdd = MockRDD.from_seq(x).flatMap(func)
        self.assertEqual(list(rdd), [0, 0, 1, 0, 1, 2])
Exemplo n.º 5
0
    def test_flatMapValues(self):
        x = [('a', 1), ('b', 3)]

        def func(el):
            self.assertIn(el, (1, 3))
            for i in range(1, el):
                yield i

        rdd = MockRDD.from_seq(x).flatMapValues(func)
        self.assertEqual(list(rdd), [('b', 1), ('b', 2)])
Exemplo n.º 6
0
    def test_groupByKey(self):
        kvs = [('a', 1), ('b', 3), ('a', 5), ('b', 3)]

        rdd = MockRDD.from_seq(kvs).groupByKey()
        results = list(rdd)
        self.assertEqual(len(results), 2)
        results = dict(results)
        self.assertEqual(set(results), {'a', 'b'})
        self.assertEqual(set(results['a']), {1, 5})
        self.assertEqual(results['b'], [3, 3])
Exemplo n.º 7
0
    def test_aggregate(self):
        zeroValue = set()

        def seqO(c, x):
            c.add(x)
            return c

        did_combine = False

        def combOp(a, b):
            nonlocal did_combine
            did_combine = True
            a.update(b)
            return a

        x = [1, 2, 3, 5, 2, 8, 3]
        result = MockRDD.from_seq(x).aggregate(zeroValue, seqO, combOp)
        self.assertEqual(result, set(x))
        self.assertTrue(did_combine)
Exemplo n.º 8
0
 def test_count(self):
     x = [1, 2, 1]
     self.assertEqual(MockRDD.from_seq(x).count(), 3)
Exemplo n.º 9
0
 def test_sum(self):
     x = [1, 5, 2]
     self.assertEqual(MockRDD.from_seq(x).sum(), 8)
Exemplo n.º 10
0
 def test_min(self):
     x = [1, 2, 3, 5, 2, 8, 3]
     self.assertEqual(MockRDD.from_seq(x).min(), 1)
Exemplo n.º 11
0
 def test_fold(self):
     x = [1, 2, 3, 5]
     i = 7
     result = MockRDD.from_seq(x).fold(i, lambda a, b: a + b)
     self.assertEqual(result, i + sum(x))
from mockrdd import MockRDD

from invalid_key_value_pairs import job

logs = ['server0,1539015865,127.0.0.1,/index.html']

results = job(MockRDD.from_seq(logs))
print(results)
Exemplo n.º 13
0
 def test_reduce(self):
     x = [1, 2, 3, 5]
     result = MockRDD.from_seq(x).reduce(lambda a, b: a + b)
     self.assertEqual(result, sum(x))
Exemplo n.º 14
0
from mockrdd import MockRDD

from invalid_callable import count_distinct_servers

logs = [
    'server0,1539015865,127.0.0.1,/index.html',
    'server0,1539015866,127.0.0.1,/index.html'
]

results = count_distinct_servers(MockRDD.from_seq(logs))
print(results)
Exemplo n.º 15
0
    def test_mapValues(self):
        x = [('a', 1), ('b', 3)]

        rdd = MockRDD.from_seq(x).mapValues(lambda a: a + 1)
        self.assertEqual(list(rdd), [('a', 2), ('b', 4)])
Exemplo n.º 16
0
 def test_countByValue(self):
     x = [1, 3, 1]
     self.assertEqual(MockRDD.from_seq(x).countByValue(), {1: 2, 3: 1})
Exemplo n.º 17
0
 def test_keyBy(self):
     xs = [1, 2]
     rdd = MockRDD.from_seq(xs).keyBy(lambda x: x % 2)
     results = list(rdd)
     self.assertEqual(results, [(1, 1), (0, 2)])
Exemplo n.º 18
0
 def test_values(self):
     kvs = [(1, 2), (3, 4)]
     rdd = MockRDD.from_seq(kvs).values()
     results = list(rdd)
     self.assertEqual(results, [2, 4])
Exemplo n.º 19
0
 def test_keys(self):
     kvs = [(1, 2), (3, 4)]
     rdd = MockRDD.from_seq(kvs).keys()
     results = list(rdd)
     self.assertEqual(results, [1, 3])
Exemplo n.º 20
0
 def test_union(self):
     x = [1, 2]
     y = [7, 8]
     rdd = MockRDD.from_seq(x).union(MockRDD.from_seq(y))
     self.assertEqual(list(rdd), x + y)
Exemplo n.º 21
0
 def test_countByKey(self):
     x = [(1, 'a'), (3, 'a'), (1, 'b')]
     self.assertEqual(MockRDD.from_seq(x).countByKey(), {1: 2, 3: 1})
Exemplo n.º 22
0
from mockrdd import MockRDD

from invalid_flatmap import count_distinct_timestamps

logs = ['server0,1539015865,127.0.0.1,/index.html',
        'server0,1539015866,127.0.0.1,/index.html']

results = count_distinct_timestamps(MockRDD.from_seq(logs))
print(results)