Пример #1
0
 def test_return_none(self):
     input = ["hello hello world"]
     rdd = self.sc.parallelize(input)
     timeout_ = timeout(answer, 5)
     result = timeout_(rdd, 3)
     expected = self.sc.parallelize([])
     self.assertTrue(self.assertRDDEqualsWithOrder(expected, result))
Пример #2
0
 def test_same_value(self):
     input = ["hello hello world world"]
     rdd = self.sc.parallelize(input)
     timeout_ = timeout(answer, 5)
     result = timeout_(rdd)
     expected = self.sc.parallelize([('hello', 2), ('world', 2)])
     assert self.assertRDDEquals(expected, result) == True
Пример #3
0
 def test_select_filter_by_count_distinct(self):
     input = ["hello hello world world"]
     rdd = self.sc.parallelize(input)
     timeout_ = timeout(answer, 5)
     result = timeout_(rdd, 2)
     expected = self.sc.parallelize(["hello", "world"])
     self.assertTrue(self.assertRDDEqualsWithOrder(expected, result))
 def test_order_by_value(self):
     input = ["hello world world"]
     rdd = self.sc.parallelize(input)
     timeout_ = timeout(answer, 5)
     result = timeout_(rdd)
     expected = self.sc.parallelize([('world', 2), ('hello',1)])
     self.assertTrue(self.assertRDDEqualsWithOrder(expected, result))
Пример #5
0
 def test_order_by_key(self):
     """Test a parallelize & collect."""
     input = ["hello world"]
     rdd = self.sc.parallelize(input)
     timeout_ = timeout(answer, 5)
     result = timeout_(rdd)
     expected = self.sc.parallelize([('world', 1), ('hello', 1)])
     assert self.assertRDDEquals(expected, result) == True
Пример #6
0
    def test_select_filter_number(self):
        input = [('apple', 1), ('banana', 5), ('mac', 2), ('ipad', 3)]

        rdd = self.sc.parallelize(input)
        timeout_ = timeout(answer, 5)
        result = timeout_(rdd, 5)
        expected = self.sc.parallelize(['banana'])
        self.assertTrue(self.assertRDDEquals(expected, result))
Пример #7
0
    def test_select_without_dup(self):
        input = [('apple', 'fruit'), ('banana', 'fruit'), ('mac', '3c'),
                 ('ipad', '3c')]

        rdd = self.sc.parallelize(input)
        timeout_ = timeout(answer, 5)
        result = timeout_(rdd, 'fruit')
        expected = self.sc.parallelize(['apple', 'banana'])
        self.assertTrue(self.assertRDDEquals(expected, result))
Пример #8
0
 def test_select_filter_by_string_3(self):
     input = [(u'Some1', (u'ABC', 9989)),
              (u'Some2', (u'XYZ', 235)),
              (u'Some3', (u'BBB', 5379)),
              (u'Some4', (u'ABC', 5379))]
     keyword = 'XYZ'
     rdd = self.sc.parallelize(input)
     timeout_ = timeout(answer, 5)
     result = timeout_(rdd, keyword)
     expected = self.sc.parallelize([(u'Some2', (u'XYZ', 235))])
     self.assertTrue(self.assertRDDEqualsWithOrder(expected, result))
Пример #9
0
 def test_select_filter_by_string_2(self):
     input = [(u'Some1', (u'ABC', 9989)),
              (u'Some2', (u'XYZ', 235)),
              (u'Some3', (u'BBB', 5379)),
              (u'Some4', (u'ABC', 5379))]
     keyword = 'QQ'
     rdd = self.sc.parallelize(input)
     timeout_ = timeout(answer, 5)
     result = timeout_(rdd, keyword)
     print(result)
     expected = None
     self.assertEquals(expected, result)
Пример #10
0
    def test_count_by_category(self):
        """Test a parallelize & collect."""
        input = [('apple', 'fruit'), ('apple', 'fruit'), ('banana', 'fruit'),
                 ('mac', '3c'), ('ipad', '3c'), ('ipad', '3c'), ('ipad', '3c')]

        rdd = self.sc.parallelize(input)
        timeout_ = timeout(answer, 5)
        result = timeout_(rdd)
        expected = self.sc.parallelize([('3c', 'ipad', 3), ('3c', 'mac', 1),
                                        ('fruit', 'apple', 2),
                                        ('fruit', 'banana', 1)])
        self.assertTrue(self.assertRDDEqualsWithOrder(expected, result))
Пример #11
0
    def test_basic_join(self):
        inputA = [('fruit', 'apple'), ('fruit', 'apple'), ('fruit', 'banana'),
                  ('3c', 'mac')]

        inputB = [('apple', 5), ('banana', 3), ('kiwi', 10)]

        rddA = self.sc.parallelize(inputA)
        rddB = self.sc.parallelize(inputB)
        timeout_ = timeout(answer, 5)
        result = timeout_(rddA, rddB)
        expected = self.sc.parallelize([('apple', ('fruit', 5)),
                                        ('apple', ('fruit', 5)),
                                        ('banana', ('fruit', 3))])
        self.assertTrue(self.assertRDDEquals(expected, result))