示例#1
0
    def test_transform_with_dtype(self):
        data1 = np.arange(400).reshape((100, 4))
        data2 = np.arange(200).reshape((100, 2))
        rdd1 = self.sc.parallelize(data1, 4)
        rdd2 = self.sc.parallelize(data2, 4)

        X = DictRDD(rdd1.zip(rdd2), bsize=5)

        X2 = X.transform(lambda x: x**2, column=0)
        assert_equal(X2.dtype, (np.ndarray, np.ndarray))

        X2 = X.transform(lambda x: tuple((x**2).tolist()),
                         column=0,
                         dtype=tuple)
        assert_equal(X2.dtype, (tuple, np.ndarray))
        assert_true(check_rdd_dtype(X2, {0: tuple, 1: np.ndarray}))

        X2 = X.transform(lambda x: x**2, column=1, dtype=list)
        assert_equal(X2.dtype, (np.ndarray, list))
        assert_true(check_rdd_dtype(X2, {0: np.ndarray, 1: list}))

        X2 = X.transform(lambda a, b: (a**2, (b**0.5).tolist()),
                         column=[0, 1],
                         dtype=(np.ndarray, list))
        assert_true(check_rdd_dtype(X2, {0: np.ndarray, 1: list}))

        X2 = X.transform(lambda b, a: ((b**0.5).tolist(), a**2),
                         column=[1, 0],
                         dtype=(list, np.ndarray))
        assert_equal(X2.dtype, (np.ndarray, list))
        assert_true(check_rdd_dtype(X2, {0: np.ndarray, 1: list}))
示例#2
0
    def test_transform(self):
        data1 = np.arange(400).reshape((100, 4))
        data2 = np.arange(200).reshape((100, 2))
        rdd1 = self.sc.parallelize(data1, 4)
        rdd2 = self.sc.parallelize(data2, 4)

        X = DictRDD(rdd1.zip(rdd2), bsize=5)

        X1 = [(x[0], x[1] ** 2) for x in X.collect()]
        X2 = X.transform(lambda a, b: (a, b ** 2)).collect()
        assert_multiple_tuples_equal(X1, X2)

        X1 = [(x[0], x[1] ** 2) for x in X.collect()]
        X2 = X.transform(lambda x: x ** 2, column=1).collect()
        assert_multiple_tuples_equal(X1, X2)

        X1 = [(x[0] ** 2, x[1]) for x in X.collect()]
        X2 = X.transform(lambda x: x ** 2, column=0).collect()
        assert_multiple_tuples_equal(X1, X2)

        X1 = [(x[0] ** 2, x[1] ** 0.5) for x in X.collect()]
        X2 = X.transform(lambda a, b: (a ** 2, b ** 0.5), column=[0, 1])
        assert_multiple_tuples_equal(X1, X2.collect())

        X1 = [(x[0] ** 2, x[1] ** 0.5) for x in X.collect()]
        X2 = X.transform(lambda b, a: (b ** 0.5, a ** 2), column=[1, 0])
        assert_multiple_tuples_equal(X1, X2.collect())
示例#3
0
    def test_transform(self):
        data1 = np.arange(400).reshape((100, 4))
        data2 = np.arange(200).reshape((100, 2))
        rdd1 = self.sc.parallelize(data1, 4)
        rdd2 = self.sc.parallelize(data2, 4)

        X = DictRDD(rdd1.zip(rdd2), bsize=5)

        X1 = [(x[0], x[1]**2) for x in X.collect()]
        X2 = X.transform(lambda a, b: (a, b**2))
        assert_multiple_tuples_equal(X1, X2.collect())

        X1 = [(x[0], x[1]**2) for x in X.collect()]
        X2 = X.transform(lambda x: x**2, column=1)
        assert_multiple_tuples_equal(X1, X2.collect())

        X1 = [(x[0]**2, x[1]) for x in X.collect()]
        X2 = X.transform(lambda x: x**2, column=0)
        assert_multiple_tuples_equal(X1, X2.collect())

        X1 = [(x[0]**2, x[1]**0.5) for x in X.collect()]
        X2 = X.transform(lambda a, b: (a**2, b**0.5), column=[0, 1])
        assert_multiple_tuples_equal(X1, X2.collect())

        X1 = [(x[0]**2, x[1]**0.5) for x in X.collect()]
        X2 = X.transform(lambda b, a: (b**0.5, a**2), column=[1, 0])
        assert_multiple_tuples_equal(X1, X2.collect())
    def test_transform_with_dtype(self):
        data1 = np.arange(400).reshape((100, 4))
        data2 = np.arange(200).reshape((100, 2))
        rdd1 = self.sc.parallelize(data1, 4)
        rdd2 = self.sc.parallelize(data2, 4)

        X = DictRDD(rdd1.zip(rdd2), bsize=5)

        X2 = X.transform(lambda x: x ** 2, column=0)
        assert_equal(X2.dtype, (np.ndarray, np.ndarray))

        X2 = X.transform(lambda x: tuple((x ** 2).tolist()), column=0,
                         dtype=tuple)
        assert_equal(X2.dtype, (tuple, np.ndarray))
        assert_true(check_rdd_dtype(X2, {0: tuple, 1: np.ndarray}))

        X2 = X.transform(lambda x: x ** 2, column=1, dtype=list)
        assert_equal(X2.dtype, (np.ndarray, list))
        assert_true(check_rdd_dtype(X2, {0: np.ndarray, 1: list}))

        X2 = X.transform(lambda a, b: (a ** 2, (b ** 0.5).tolist()),
                         column=[0, 1], dtype=(np.ndarray, list))
        assert_true(check_rdd_dtype(X2, {0: np.ndarray, 1: list}))

        X2 = X.transform(lambda b, a: ((b ** 0.5).tolist(), a ** 2),
                         column=[1, 0], dtype=(list, np.ndarray))
        assert_equal(X2.dtype, (np.ndarray, list))
        assert_true(check_rdd_dtype(X2, {0: np.ndarray, 1: list}))