Exemplo n.º 1
0
    def test_sortbykey(self):

        dataLocal = [((0, 0), array([0])), ((0, 1), array([0])),
                     ((0, 2), array([0])), ((1, 0), array([0])),
                     ((1, 1), array([0])), ((1, 2), array([0]))]

        data = Data(self.sc.parallelize(dataLocal))
        out = data.sortByKey().keys().collect()
        assert (array_equal(out, [(0, 0), (1, 0), (0, 1), (1, 1), (0, 2),
                                  (1, 2)]))

        dataLocal = [((0, ), array([0])), ((1, ), array([0])),
                     ((2, ), array([0]))]

        data = Data(self.sc.parallelize(dataLocal))
        out = data.sortByKey().keys().collect()
        assert (array_equal(out, [(0, ), (1, ), (2, )]))
Exemplo n.º 2
0
 def setUp(self):
     super(TestSeriesGetters, self).setUp()
     self.dataLocal = [((0, 0), array([1.0, 2.0, 3.0], dtype='float32')),
                       ((0, 1), array([2.0, 2.0, 4.0], dtype='float32')),
                       ((1, 0), array([4.0, 2.0, 1.0], dtype='float32')),
                       ((1, 1), array([3.0, 1.0, 1.0], dtype='float32'))]
     self.series = Data(self.sc.parallelize(self.dataLocal),
                        dtype='float32')
Exemplo n.º 3
0
    def test_range_tuple_key(self):

        dataLocal = [
            ((0, 0), array([0])),
            ((0, 1), array([1])),
            ((0, 2), array([2])),
            ((1, 0), array([3])),
            ((1, 1), array([4])),
            ((1, 2), array([5]))
        ]

        data = Data(self.sc.parallelize(dataLocal))

        out = data.range((0, 0), (1, 1)).collectKeysAsArray()
        assert(array_equal(out, [(0, 0), (0, 1), (0, 2), (1, 0)]))
Exemplo n.º 4
0
    def test_collect(self):

        dataLocal = [((0, 0), array([0])), ((0, 1), array([1])),
                     ((0, 2), array([2])), ((1, 0), array([3])),
                     ((1, 1), array([4])), ((1, 2), array([5]))]

        data = Data(self.sc.parallelize(dataLocal))

        out = data.collectKeysAsArray()

        assert (array_equal(out,
                            [[0, 0], [0, 1], [0, 2], [1, 0], [1, 1], [1, 2]]))

        out = data.collectValuesAsArray()

        assert (array_equal(out, [[0], [1], [2], [3], [4], [5]]))
Exemplo n.º 5
0
 def setUp(self):
     super(TestCasting, self).setUp()
     # float16 max value is 6.55040e+04 (np.finfo(np.float16))
     # "*Big*" values are too large to cast safely down to float16s
     DATA = [('float32Array', array([1.1, 2.2], dtype='float32')),
             ('float32BigArray', array([1.1e+05, 2.2e+05],
                                       dtype='float32')),
             ('float32Scalar', float32(1.1)),
             ('float32BigScalar', float32(4.4e+05)),
             ('pythonFloatScalar', 1.1), ('pythonFloatBigScalar', 5.5e+05)]
     for datum in DATA:
         k, v = datum
         rdd = self.sc.parallelize([(0, v)])
         data = Data(rdd, nrecords=1, dtype='float32')
         setattr(self, k, v)
         setattr(self, k + "RDD", rdd)
         setattr(self, k + "Data", data)
     self.allCases = [datum[0] for datum in DATA]
Exemplo n.º 6
0
    def test_range_int_key(self):

        dataLocal = [
            (0, array([0])),
            (1, array([1])),
            (2, array([2])),
            (3, array([3])),
            (4, array([4])),
            (5, array([5]))
        ]

        data = Data(self.sc.parallelize(dataLocal))

        out = data.range(0, 2).collectKeysAsArray()
        assert(array_equal(out, [0, 1]))

        out = data.range(0, 5).collectKeysAsArray()
        assert(array_equal(out, [0, 1, 2, 3, 4]))

        out = data.range(0, 6).collectKeysAsArray()
        assert(array_equal(out, [0, 1, 2, 3, 4, 5]))
Exemplo n.º 7
0
 def setUp(self):
     super(TestImagesGetters, self).setUp()
     self.ary1 = array([[1, 2], [3, 4]], dtype='int16')
     self.ary2 = array([[5, 6], [7, 8]], dtype='int16')
     rdd = self.sc.parallelize([(0, self.ary1), (1, self.ary2)])
     self.images = Data(rdd, dtype='int16')