示例#1
0
 def test_partition_number(self):
     blocked = BlockRDD(self.generate(1000, 5), bsize=50)
     assert_equal(blocked.partitions, 5)
     blocked = BlockRDD(self.generate(621, 3), bsize=45)
     assert_equal(blocked.partitions, 3)
     blocked = BlockRDD(self.generate(100, 10))
     assert_equal(blocked.partitions, 10)
示例#2
0
    def test_unblock(self):
        blocked = BlockRDD(self.generate(1000, 5))
        unblocked = blocked.unblock()
        assert_is_instance(blocked, BlockRDD)
        assert_equal(unblocked.collect(), range(1000))

        blocked = BlockRDD(self.generate(1000, 5), dtype=tuple)
        unblocked = blocked.unblock()
        assert_is_instance(blocked, BlockRDD)
        assert_equal(unblocked.collect(), range(1000))
示例#3
0
 def test_blocks_number(self):
     blocked = BlockRDD(self.generate(1000), bsize=50)
     assert_equal(blocked.blocks, 20)
     blocked = BlockRDD(self.generate(621), bsize=45)
     assert_equal(blocked.blocks, 20)
     blocked = BlockRDD(self.generate(100), bsize=4)
     assert_equal(blocked.blocks, 30)
     blocked = BlockRDD(self.generate(79, 2), bsize=9)
     assert_equal(blocked.blocks, 10)
     blocked = BlockRDD(self.generate(89, 2), bsize=5)
     assert_equal(blocked.blocks, 18)
示例#4
0
 def test_length(self):
     blocked = BlockRDD(self.generate(1000))
     assert_equal(len(blocked), 1000)
     blocked = BlockRDD(self.generate(100))
     assert_equal(len(blocked), 100)
     blocked = BlockRDD(self.generate(79))
     assert_equal(len(blocked), 79)
     blocked = BlockRDD(self.generate(89))
     assert_equal(len(blocked), 89)
     blocked = BlockRDD(self.generate(62))
     assert_equal(len(blocked), 62)
示例#5
0
 def test_dtypes(self):
     rdd = self.generate()
     blocked = BlockRDD(rdd, dtype=list)
     assert_is_instance(blocked.first(), list)
     blocked = BlockRDD(rdd, dtype=tuple)
     assert_is_instance(blocked.first(), tuple)
     blocked = BlockRDD(rdd, dtype=set)
     assert_is_instance(blocked.first(), set)
     blocked = BlockRDD(rdd, dtype=np.array)
     assert_is_instance(blocked.first(), np.ndarray)
示例#6
0
    def test_tolist(self):
        blocked = BlockRDD(self.generate(1000, 5))
        unblocked = blocked.tolist()
        assert_is_instance(blocked, BlockRDD)
        assert_equal(unblocked, list(range(1000)))

        blocked = BlockRDD(self.generate(1000, 5), dtype=tuple)
        unblocked = blocked.tolist()
        assert_is_instance(blocked, BlockRDD)
        assert_equal(unblocked, list(range(1000)))

        blocked = BlockRDD(self.generate(1000, 5), dtype=np.array)
        unblocked = blocked.tolist()
        assert_is_instance(blocked, BlockRDD)
        assert_equal(unblocked, list(range(1000)))
示例#7
0
    def test_creation(self):
        rdd = self.generate()

        blocked = BlockRDD(rdd)
        assert_is_instance(blocked, BlockRDD)
        expected = tuple(range(10))
        assert_equal(blocked.first(), expected)
        expected = [tuple(v) for v in np.arange(100).reshape(10, 10)]
        assert_equal(blocked.collect(), expected)

        blocked = BlockRDD(rdd, bsize=4)
        assert_is_instance(blocked, BlockRDD)
        expected = tuple(range(4))
        assert_equal(blocked.first(), expected)
        expected = [4, 4, 2] * 10
        assert_equal([len(x) for x in blocked.collect()], expected)
示例#8
0
    def test_unblock(self):
        blocked = BlockRDD(self.generate(1000, 5))
        unblocked = blocked.unblock()
        assert_is_instance(blocked, BlockRDD)
        assert_equal(unblocked.collect(), list(range(1000)))

        blocked = BlockRDD(self.generate(1000, 5), dtype=tuple)
        unblocked = blocked.unblock()
        assert_is_instance(blocked, BlockRDD)
        assert_equal(unblocked.collect(), list(range(1000)))
示例#9
0
    def test_creation(self):
        rdd = self.generate()

        blocked = BlockRDD(rdd)
        assert_is_instance(blocked, BlockRDD)
        assert_equal(blocked.first(), range(10))
        assert_equal(blocked.collect(), np.arange(100).reshape(10, 10).tolist())

        blocked = BlockRDD(rdd, bsize=4)
        assert_is_instance(blocked, BlockRDD)
        assert_equal(blocked.first(), range(4))
        assert_equal([len(x) for x in blocked.collect()], [4, 4, 2] * 10)
示例#10
0
    def test_creation_from_blocked_rdds(self):
        x = np.arange(80).reshape((40, 2))
        y = np.arange(40)
        z = list(range(40))
        x_rdd = ArrayRDD(self.sc.parallelize(x, 4))
        y_rdd = ArrayRDD(self.sc.parallelize(y, 4))
        z_rdd = BlockRDD(self.sc.parallelize(z, 4), dtype=list)

        expected = (np.arange(20).reshape(10,
                                          2), np.arange(10), list(range(10)))
        rdd = DictRDD([x_rdd, y_rdd, z_rdd])
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD([x_rdd, y_rdd, z_rdd], columns=('x', 'y', 'z'))
        assert_tuple_equal(rdd.first(), expected)
        rdd = DictRDD([x_rdd, y_rdd, z_rdd], dtype=(None, None, list))
        first = rdd.first()
        assert_tuple_equal(first, expected)
        assert_is_instance(first[2], list)
示例#11
0
    def test_tolist(self):
        blocked = BlockRDD(self.generate(1000, 5))
        unblocked = blocked.tolist()
        assert_is_instance(blocked, BlockRDD)
        assert_equal(unblocked, range(1000))

        blocked = BlockRDD(self.generate(1000, 5), dtype=tuple)
        unblocked = blocked.tolist()
        assert_is_instance(blocked, BlockRDD)
        assert_equal(unblocked, range(1000))

        blocked = BlockRDD(self.generate(1000, 5), dtype=np.array)
        unblocked = blocked.tolist()
        assert_is_instance(blocked, BlockRDD)
        assert_equal(unblocked, range(1000))
示例#12
0
    def test_creation(self):
        rdd = self.generate()

        blocked = BlockRDD(rdd)
        assert_is_instance(blocked, BlockRDD)
        expected = tuple(range(10))
        assert_equal(blocked.first(), expected)
        expected = [tuple(v) for v in np.arange(100).reshape(10, 10)]
        assert_equal(blocked.collect(), expected)

        blocked = BlockRDD(rdd, bsize=4)
        assert_is_instance(blocked, BlockRDD)
        expected = tuple(range(4))
        assert_equal(blocked.first(), expected)
        expected = [4, 4, 2] * 10
        assert_equal([len(x) for x in blocked.collect()], expected)
示例#13
0
 def test_dtypes(self):
     rdd = self.generate()
     blocked = BlockRDD(rdd, dtype=list)
     assert_is_instance(blocked.first(), list)
     blocked = BlockRDD(rdd, dtype=tuple)
     assert_is_instance(blocked.first(), tuple)
     blocked = BlockRDD(rdd, dtype=set)
     assert_is_instance(blocked.first(), set)
     blocked = BlockRDD(rdd, dtype=np.array)
     assert_is_instance(blocked.first(), np.ndarray)