Пример #1
0
    def testPercentileExecution(self):
        raw = np.random.rand(20, 10)
        q = np.random.RandomState(0).randint(100, size=11)
        a = tensor(raw, chunk_size=7)
        r = percentile(a, q)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = np.percentile(raw, q)
        np.testing.assert_array_equal(result, expected)

        mq = tensor(q)

        this = self

        class MockSession:
            def __init__(self):
                self.executor = this.executor

        ctx = LocalContext(MockSession())
        executor = ExecutorForTest('numpy', storage=ctx)
        with ctx:
            r = percentile(a, mq)
            result = executor.execute_tensors([r])[0]

            np.testing.assert_array_equal(result, expected)
Пример #2
0
        def execute_size(t):
            def _tensordot_size_recorder(ctx, op):
                TensorTensorDot.estimate_size(ctx, op)

                chunk_key = op.outputs[0].key
                chunk_sizes[chunk_key] = ctx[chunk_key]
                chunk_nbytes[chunk_key] = op.outputs[0].nbytes

                input_sizes = dict(
                    (inp.op.key, ctx[inp.key][0]) for inp in op.inputs)
                chunk_input_sizes[chunk_key] = sum(input_sizes.values())
                input_nbytes = dict(
                    (inp.op.key, inp.nbytes) for inp in op.inputs)
                chunk_input_nbytes[chunk_key] = sum(input_nbytes.values())

            size_executor = ExecutorForTest(
                sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)
            try:
                chunk_sizes.clear()
                chunk_nbytes.clear()
                chunk_input_sizes.clear()
                chunk_input_nbytes.clear()
                register(TensorTensorDot,
                         size_estimator=_tensordot_size_recorder)
                size_executor.execute_tensor(t, mock=True)
            finally:
                register_default(TensorTensorDot)
Пример #3
0
class Test(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest()

    def testImreadExecution(self):
        with tempfile.TemporaryDirectory() as tempdir:
            raws = []
            for i in range(10):
                array = np.random.randint(0, 256, 2500,
                                          dtype=np.uint8).reshape((50, 50))
                raws.append(array)
                im = Image.fromarray(array)
                im.save(os.path.join(tempdir, f'random_{i}.png'))
            # Single image
            t = imread(os.path.join(tempdir, 'random_0.png'))
            res = self.executor.execute_tensor(t, concat=True)[0]
            np.testing.assert_array_equal(res, raws[0])

            t2 = imread(os.path.join(tempdir, 'random_*.png'))
            res = self.executor.execute_tensor(t2, concat=True)[0]
            np.testing.assert_array_equal(np.sort(res, axis=0),
                                          np.sort(raws, axis=0))

            t3 = imread(os.path.join(tempdir, 'random_*.png'), chunk_size=4)
            res = self.executor.execute_tensor(t3, concat=True)[0]
            np.testing.assert_array_equal(np.sort(res, axis=0),
                                          np.sort(raws, axis=0))

            t4 = imread(os.path.join(tempdir, 'random_*.png'), chunk_size=4)
            res = self.executor.execute_tensor(t4, concat=True)[0]
            np.testing.assert_array_equal(np.sort(res, axis=0),
                                          np.sort(raws, axis=0))
Пример #4
0
class TestIndexReduction(TestBase):
    def setUp(self):
        self.executor = ExecutorForTest()

    def testIndexReduction(self):
        rs = np.random.RandomState(0)
        data = pd.Index(rs.randint(0, 5, (100, )))
        data2 = pd.Index(rs.randint(1, 6, (100, )))

        for method in ['min', 'max', 'all', 'any']:
            idx = md.Index(data)
            result = self.executor.execute_dataframe(getattr(idx, method)(),
                                                     concat=True)[0]
            self.assertEqual(result, getattr(data, method)())

            idx = md.Index(data, chunk_size=10)
            result = self.executor.execute_dataframe(getattr(idx, method)(),
                                                     concat=True)[0]
            self.assertEqual(result, getattr(data, method)())

            idx = md.Index(data2)
            result = self.executor.execute_dataframe(getattr(idx, method)(),
                                                     concat=True)[0]
            self.assertEqual(result, getattr(data2, method)())

            idx = md.Index(data2, chunk_size=10)
            result = self.executor.execute_dataframe(getattr(idx, method)(),
                                                     concat=True)[0]
            self.assertEqual(result, getattr(data2, method)())
Пример #5
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testPairwiseDistancesExecution(self):
        raw_x = np.random.rand(20, 5)
        raw_y = np.random.rand(21, 5)

        x = mt.tensor(raw_x, chunk_size=11)
        y = mt.tensor(raw_y, chunk_size=12)

        d = pairwise_distances(x, y)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, raw_y)
        np.testing.assert_almost_equal(result, expected)

        # test precomputed
        d2 = d.copy()
        d2[0, 0] = -1
        d2 = pairwise_distances(d2, y, metric='precomputed')
        with self.assertRaises(ValueError):
            _ = self.executor.execute_tensor(d2, concat=True)[0]

        # test cdist
        weight = np.random.rand(5)
        d = pairwise_distances(x, y, metric='wminkowski', p=3, w=weight)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x,
                                         raw_y,
                                         metric='wminkowski',
                                         p=3,
                                         w=weight)
        np.testing.assert_almost_equal(result, expected)

        # test pdist
        d = pairwise_distances(x, metric='hamming')
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, metric='hamming')
        np.testing.assert_almost_equal(result, expected)

        # test function metric
        m = lambda u, v: np.sqrt(((u - v)**2).sum())
        d = pairwise_distances(x, y, metric=m)
        result = self.executor.execute_tensor(d, concat=True)[0]
        expected = sk_pairwise_distances(raw_x, raw_y, metric=m)
        np.testing.assert_almost_equal(result, expected)

        assert_warns(DataConversionWarning,
                     pairwise_distances,
                     x,
                     y,
                     metric='jaccard')

        with self.assertRaises(ValueError):
            _ = pairwise_distances(x, y, metric='unknown')
Пример #6
0
    def testRandintExecution(self):
        size_executor = ExecutorForTest(sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)

        arr = tensor.random.randint(0, 2, size=(10, 30), chunk_size=3)
        size_res = size_executor.execute_tensor(arr, mock=True)
        self.assertEqual(arr.nbytes, sum(tp[0] for tp in size_res))

        res = self.executor.execute_tensor(arr, concat=True)[0]
        self.assertEqual(res.shape, (10, 30))
        self.assertTrue(np.all(res >= 0))
        self.assertTrue(np.all(res < 2))
Пример #7
0
    def setUp(self) -> None:
        this = self

        class MockSession:
            @property
            def executor(self):
                return this.executor

        self.ctx = ctx = LocalContext(MockSession())
        self.executor = ExecutorForTest('numpy', storage=ctx)
        ctx.__enter__()
Пример #8
0
    def testSparseRandintExecution(self):
        size_executor = ExecutorForTest(sync_provider_type=ExecutorForTest.SyncProviderType.MOCK)

        arr = tensor.random.randint(1, 2, size=(30, 50), density=.1, chunk_size=10, dtype='f4')
        size_res = size_executor.execute_tensor(arr, mock=True)
        self.assertAlmostEqual(arr.nbytes * 0.1, sum(tp[0] for tp in size_res))

        res = self.executor.execute_tensor(arr, concat=True)[0]
        self.assertTrue(issparse(res))
        self.assertEqual(res.shape, (30, 50))
        self.assertTrue(np.all(res.data >= 1))
        self.assertTrue(np.all(res.data < 2))
        self.assertAlmostEqual((res >= 1).toarray().sum(), 30 * 50 * .1, delta=20)
Пример #9
0
class Test(unittest.TestCase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testGammalnExecution(self):
        raw = np.random.rand(10, 8, 6)
        a = tensor(raw, chunk_size=3)

        r = gammaln(a)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = scipy_gammaln(raw)

        np.testing.assert_array_equal(result, expected)

        # test sparse
        raw = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan]))
        a = tensor(raw, chunk_size=3)

        r = gammaln(a)

        result = self.executor.execute_tensor(r, concat=True)[0]

        data = scipy_gammaln(raw.data)
        expected = sps.csr_matrix((data, raw.indices, raw.indptr), raw.shape)

        np.testing.assert_array_equal(result.toarray(), expected.toarray())

    def testErfExecution(self):
        raw = np.random.rand(10, 8, 6)
        a = tensor(raw, chunk_size=3)

        r = erf(a)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = scipy_erf(raw)

        np.testing.assert_array_equal(result, expected)

        # test sparse
        raw = sps.csr_matrix(np.array([0, 1.0, 1.01, np.nan]))
        a = tensor(raw, chunk_size=3)

        r = erf(a)

        result = self.executor.execute_tensor(r, concat=True)[0]

        data = scipy_erf(raw.data)
        expected = sps.csr_matrix((data, raw.indices, raw.indptr), raw.shape)

        np.testing.assert_array_equal(result.toarray(), expected.toarray())
Пример #10
0
    def setUp(self):
        register_mars_backend()

        self.session = new_session().as_default()
        self._old_executor = self.session._sess._executor
        self.executor = self.session._sess._executor = \
            ExecutorForTest('numpy', storage=self.session._sess._context)
Пример #11
0
    def testInputTileable(self):
        def f(t, x):
            return (t * x).sum().to_numpy()

        rs = np.random.RandomState(0)
        raw = rs.rand(5, 4)

        t1 = mt.tensor(raw, chunk_size=3)
        t2 = t1.sum(axis=0)
        s = spawn(f, args=(t2, 3))

        sess = new_session()
        sess._sess._executor = ExecutorForTest('numpy', storage=sess._context)

        result = s.execute(session=sess).fetch(session=sess)
        expected = (raw.sum(axis=0) * 3).sum()
        self.assertAlmostEqual(result, expected)

        df1 = md.DataFrame(raw, chunk_size=3)
        df1.execute(session=sess)
        df2 = shuffle(df1)
        df2.execute(session=sess)

        def f2(input_df):
            bonus = input_df.iloc[:, 0].fetch().sum()
            return input_df.sum().to_pandas() + bonus

        for df in [df1, df2]:
            s = spawn(f2, args=(df, ))

            result = s.execute(session=sess).fetch(session=sess)
            expected = pd.DataFrame(raw).sum() + raw[:, 0].sum()
            pd.testing.assert_series_equal(result, expected)
Пример #12
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testHaversineDistancesOp(self):
        # shape[1] != 2
        with self.assertRaises(ValueError):
            haversine_distances(mt.random.rand(10, 3))

        # shape[1] != 2
        with self.assertRaises(ValueError):
            haversine_distances(mt.random.rand(10, 2), mt.random.rand(11, 3))

        # cannot support sparse
        with self.assertRaises(TypeError):
            haversine_distances(
                mt.random.randint(10, size=(10, 2), density=0.5))

    def testHaversineDistancesExecution(self):
        raw_x = np.random.rand(30, 2)
        raw_y = np.random.rand(21, 2)

        # one chunk
        x1 = mt.tensor(raw_x, chunk_size=30)
        y1 = mt.tensor(raw_y, chunk_size=30)

        # multiple chunks
        x2 = mt.tensor(raw_x, chunk_size=(11, 1))
        y2 = mt.tensor(raw_y, chunk_size=(17, 1))

        for x, y in [(x1, y1), (x2, y2)]:
            for use_sklearn in [True, False]:
                distance = haversine_distances(x, y)
                distance.op._use_sklearn = use_sklearn

                result = self.executor.execute_tensor(distance, concat=True)[0]
                expected = sk_haversine_distances(raw_x, raw_y)
                np.testing.assert_array_equal(result, expected)

                # test x is y
                distance = haversine_distances(x)
                distance.op._use_sklearn = use_sklearn

                result = self.executor.execute_tensor(distance, concat=True)[0]
                expected = sk_haversine_distances(raw_x, raw_x)
                np.testing.assert_array_equal(result, expected)
Пример #13
0
    def setUp(self):
        self.iris = mt.tensor(datasets.load_iris().data)
        # solver_list not includes arpack
        self.solver_list = ['full', 'randomized', 'auto']

        self.session = new_session().as_default()
        self._old_executor = self.session._sess._executor
        self.executor = self.session._sess._executor = \
            ExecutorForTest('numpy', storage=self.session._sess._context)
Пример #14
0
class TestCustomAggregate(TestBase):
    def setUp(self):
        self.executor = ExecutorForTest()

    def testDataFrameAggregate(self):
        data = pd.DataFrame(np.random.rand(30, 20))

        df = md.DataFrame(data)
        result = df.agg(MockReduction1())
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            data.agg(MockReduction1()))

        result = df.agg(MockReduction2())
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            data.agg(MockReduction2()))

        df = md.DataFrame(data, chunk_size=5)
        result = df.agg(MockReduction2())
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            data.agg(MockReduction2()))

        result = df.agg(MockReduction2())
        pd.testing.assert_series_equal(
            self.executor.execute_dataframe(result, concat=True)[0],
            data.agg(MockReduction2()))

    def testSeriesAggregate(self):
        data = pd.Series(np.random.rand(20))

        s = md.Series(data)
        result = s.agg(MockReduction1())
        self.assertEqual(
            self.executor.execute_dataframe(result, concat=True)[0],
            data.agg(MockReduction1()))

        result = s.agg(MockReduction2())
        self.assertEqual(
            self.executor.execute_dataframe(result, concat=True)[0],
            data.agg(MockReduction2()))

        s = md.Series(data, chunk_size=5)
        result = s.agg(MockReduction2())
        self.assertAlmostEqual(
            self.executor.execute_dataframe(result, concat=True)[0],
            data.agg(MockReduction2()))

        result = s.agg(MockReduction2())
        self.assertAlmostEqual(
            self.executor.execute_dataframe(result, concat=True)[0],
            data.agg(MockReduction2()))
Пример #15
0
class Test(unittest.TestCase):
    def setUp(self):
        self.executor = ExecutorForTest('numpy')

    def testEntropyExecution(self):
        rs = np.random.RandomState(0)
        a = rs.rand(10)

        t1 = tensor(a, chunk_size=4)
        r = entropy(t1)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = sp_entropy(a)
        np.testing.assert_array_almost_equal(result, expected)

        b = rs.rand(10)
        base = 3.1

        t2 = tensor(b, chunk_size=4)
        r = entropy(t1, t2, base)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = sp_entropy(a, b, base)
        np.testing.assert_array_almost_equal(result, expected)

        b = rs.rand(10)
        base = 3.1

        t2 = tensor(b, chunk_size=4)
        r = entropy(t1, t2, base)

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = sp_entropy(a, b, base)
        np.testing.assert_array_almost_equal(result, expected)

        r = entropy(t1, t2, t1.sum())

        result = self.executor.execute_tensor(r, concat=True)[0]
        expected = sp_entropy(a, b, a.sum())
        np.testing.assert_array_almost_equal(result, expected)

        with self.assertRaises(ValueError):
            entropy(t1, t2[:7])
Пример #16
0
    def testSeriesQuantileExecution(self):
        raw = pd.Series(np.random.rand(10), name='a')
        a = Series(raw, chunk_size=3)

        # q = 0.5, scalar
        r = a.quantile()
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.quantile()

        self.assertEqual(result, expected)

        # q is a list
        r = a.quantile([0.3, 0.7])
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.quantile([0.3, 0.7])

        pd.testing.assert_series_equal(result, expected)

        # test interpolation
        r = a.quantile([0.3, 0.7], interpolation='midpoint')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = raw.quantile([0.3, 0.7], interpolation='midpoint')

        pd.testing.assert_series_equal(result, expected)

        this = self

        class MockSession:
            def __init__(self):
                self.executor = this.executor

        ctx = LocalContext(MockSession())
        executor = ExecutorForTest('numpy', storage=ctx)
        with ctx:
            q = tensor([0.3, 0.7])

            # q is a tensor
            r = a.quantile(q)
            result = executor.execute_dataframes([r])[0]
            expected = raw.quantile([0.3, 0.7])

            pd.testing.assert_series_equal(result, expected)
Пример #17
0
class Test(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest()

    def testToCSVExecution(self):
        index = pd.RangeIndex(100, 0, -1, name='index')
        raw = pd.DataFrame(
            {
                'col1': np.random.rand(100),
                'col2': np.random.choice(['a', 'b', 'c'], (100, )),
                'col3': np.arange(100)
            },
            index=index)
        df = DataFrame(raw, chunk_size=33)

        with tempfile.TemporaryDirectory() as base_path:
            # test one file
            path = os.path.join(base_path, 'out.csv')

            r = df.to_csv(path)
            self.executor.execute_dataframe(r)

            result = pd.read_csv(path, dtype=raw.dtypes.to_dict())
            result.set_index('index', inplace=True)
            pd.testing.assert_frame_equal(result, raw)

            # test multi files
            path = os.path.join(base_path, 'out-*.csv')
            r = df.to_csv(path)
            self.executor.execute_dataframe(r)

            dfs = [
                pd.read_csv(os.path.join(base_path, 'out-{}.csv'.format(i)),
                            dtype=raw.dtypes.to_dict()) for i in range(4)
            ]
            result = pd.concat(dfs, axis=0)
            result.set_index('index', inplace=True)
            pd.testing.assert_frame_equal(result, raw)
            pd.testing.assert_frame_equal(dfs[1].set_index('index'),
                                          raw.iloc[33:66])
Пример #18
0
    def setUp(self):
        n_rows = 1000
        n_columns = 10
        chunk_size = 20
        rs = mt.random.RandomState(0)
        self.X = rs.rand(n_rows, n_columns, chunk_size=chunk_size)
        self.y = rs.rand(n_rows, chunk_size=chunk_size)

        self.session = new_session().as_default()
        self._old_executor = self.session._sess._executor
        self.executor = self.session._sess._executor = \
            ExecutorForTest('numpy', storage=self.session._sess._context)
Пример #19
0
class TestUnary(TestBase):
    def setUp(self):
        super().setUp()
        self.executor = ExecutorForTest()

    def testAbs(self):
        data1 = pd.DataFrame(np.random.uniform(low=-1, high=1, size=(10, 10)))
        df1 = from_pandas(data1, chunk_size=5)

        result = self.executor.execute_dataframe(abs(df1), concat=True)[0]
        expected = data1.abs()
        pd.testing.assert_frame_equal(expected, result)
Пример #20
0
    def testHistogramBinEdgesExecution(self):
        rs = np.random.RandomState(0)

        raw = rs.randint(10, size=(20,))
        a = tensor(raw, chunk_size=3)

        # range provided
        for range_ in [(0, 10), (3, 11), (3, 7)]:
            bin_edges = histogram_bin_edges(a, range=range_)
            result = self.executor.execute_tensor(bin_edges)[0]
            expected = np.histogram_bin_edges(raw, range=range_)
            np.testing.assert_array_equal(result, expected)

        this = self

        class MockSession:
            def __init__(self):
                self.executor = this.executor

        ctx = LocalContext(MockSession())
        executor = ExecutorForTest('numpy', storage=ctx)
        with ctx:
            raw2 = rs.randint(10, size=(1,))
            b = tensor(raw2)
            raw3 = rs.randint(10, size=(0,))
            c = tensor(raw3)
            for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]:
                test_bins = [10, 'stone', 'auto', 'doane', 'fd',
                             'rice', 'scott', 'sqrt', 'sturges']
                for bins in test_bins:
                    bin_edges = histogram_bin_edges(t, bins=bins)

                    if r.size > 0:
                        with self.assertRaises(TilesError):
                            executor.execute_tensor(bin_edges)

                    result = executor.execute_tensors([bin_edges])[0]
                    expected = np.histogram_bin_edges(r, bins=bins)
                    np.testing.assert_array_equal(result, expected)

                test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)]
                for bins in test_bins:
                    bin_edges = histogram_bin_edges(t, bins=bins)
                    result = executor.execute_tensors([bin_edges])[0]
                    expected = np.histogram_bin_edges(r, bins=[0, 4, 8])
                    np.testing.assert_array_equal(result, expected)

            raw = np.arange(5)
            a = tensor(raw, chunk_size=3)
            bin_edges = histogram_bin_edges(a)
            result = executor.execute_tensors([bin_edges])[0]
            expected = np.histogram_bin_edges(raw)
            self.assertEqual(bin_edges.shape, expected.shape)
            np.testing.assert_array_equal(result, expected)
Пример #21
0
    def testHistogramExecution(self):
        rs = np.random.RandomState(0)

        raw = rs.randint(10, size=(20,))
        a = tensor(raw, chunk_size=3)
        raw_weights = rs.random(20)
        weights = tensor(raw_weights, chunk_size=4)

        # range provided
        for range_ in [(0, 10), (3, 11), (3, 7)]:
            bin_edges = histogram(a, range=range_)[0]
            result = self.executor.execute_tensor(bin_edges)[0]
            expected = np.histogram(raw, range=range_)[0]
            np.testing.assert_array_equal(result, expected)

        for wt in (raw_weights, weights):
            for density in (True, False):
                bins = [1, 4, 6, 9]
                bin_edges = histogram(a, bins=bins, weights=wt, density=density)[0]
                result = self.executor.execute_tensor(bin_edges)[0]
                expected = np.histogram(
                    raw, bins=bins, weights=raw_weights, density=density)[0]
                np.testing.assert_almost_equal(result, expected)

        this = self

        class MockSession:
            def __init__(self):
                self.executor = this.executor

        ctx = LocalContext(MockSession())
        executor = ExecutorForTest('numpy', storage=ctx)
        with ctx:
            raw2 = rs.randint(10, size=(1,))
            b = tensor(raw2)
            raw3 = rs.randint(10, size=(0,))
            c = tensor(raw3)
            for t, r in [(a, raw), (b, raw2), (c, raw3), (sort(a), raw)]:
                for density in (True, False):
                    test_bins = [10, 'stone', 'auto', 'doane', 'fd',
                                 'rice', 'scott', 'sqrt', 'sturges']
                    for bins in test_bins:
                        hist = histogram(t, bins=bins, density=density)[0]

                        if r.size > 0:
                            with self.assertRaises(TilesError):
                                executor.execute_tensor(hist)

                        result = executor.execute_tensors([hist])[0]
                        expected = np.histogram(r, bins=bins, density=density)[0]
                        np.testing.assert_array_equal(result, expected)

                    test_bins = [[0, 4, 8], tensor([0, 4, 8], chunk_size=2)]
                    for bins in test_bins:
                        hist = histogram(t, bins=bins, density=density)[0]
                        result = executor.execute_tensors([hist])[0]
                        expected = np.histogram(r, bins=[0, 4, 8], density=density)[0]
                        np.testing.assert_array_equal(result, expected)
Пример #22
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testCosineDistancesExecution(self):
        raw_dense_x = np.random.rand(25, 10)
        raw_dense_y = np.random.rand(17, 10)

        raw_sparse_x = sps.random(25,
                                  10,
                                  density=0.5,
                                  format='csr',
                                  random_state=0)
        raw_sparse_y = sps.random(17,
                                  10,
                                  density=0.4,
                                  format='csr',
                                  random_state=1)

        for raw_x, raw_y in [(raw_dense_x, raw_dense_y),
                             (raw_sparse_x, raw_sparse_y)]:
            for chunk_size in (25, 6):
                x = mt.tensor(raw_x, chunk_size=chunk_size)
                y = mt.tensor(raw_y, chunk_size=chunk_size)

                d = cosine_distances(x, y)

                result = self.executor.execute_tensor(d, concat=True)[0]
                expected = sk_cosine_distances(raw_x, raw_y)

                np.testing.assert_almost_equal(np.asarray(result), expected)

                d = cosine_distances(x)

                result = self.executor.execute_tensor(d, concat=True)[0]
                expected = sk_cosine_distances(raw_x)

                np.testing.assert_almost_equal(np.asarray(result), expected)
Пример #23
0
class Test(unittest.TestCase):
    def setUp(self) -> None:
        self.executor = ExecutorForTest('numpy')

    def testAggregateResult(self):
        rs = np.random.RandomState(0)
        raw = rs.rand(10, 10)
        t = tensor(raw, chunk_size=6)

        slc = slice(None, None, 3)

        # test no reorder
        fancy_index = np.array([3, 6, 7])
        indexes = [slc, fancy_index]
        result = t[indexes].tiles()

        handler = NDArrayIndexesHandler()

        context = handler.handle(result.op, return_context=True)
        self.assertGreater(context.op.outputs[0].chunk_shape[-1], 1)
        chunk_results = self.executor.execute_tensor(result)
        chunk_results = \
            [(c.index, r) for c, r in zip(get_tiled(result).chunks, chunk_results)]
        expected = self.executor.execute_tensor(result, concat=True)[0]
        res = handler.aggregate_result(context, chunk_results)
        np.testing.assert_array_equal(res, expected)

        # test fancy index that requires reordering
        fancy_index = np.array([6, 7, 3])
        indexes = [slc, fancy_index]
        test = t[indexes].tiles()

        context = handler.handle(test.op, return_context=True)
        self.assertEqual(context.op.outputs[0].chunk_shape[-1], 1)
        res = handler.aggregate_result(context, chunk_results)
        expected = self.executor.execute_tensor(test, concat=True)[0]
        np.testing.assert_array_equal(res, expected)
Пример #24
0
    def setUp(self) -> None:
        self.session = new_session().as_default()
        self._old_executor = self.session._sess._executor
        self.executor = self.session._sess._executor = \
            ExecutorForTest('numpy', storage=self.session._sess._context)

        self.estimators = [(LabelPropagation, {
            'kernel': 'rbf'
        }), (LabelPropagation, {
            'kernel': 'knn',
            'n_neighbors': 2
        }),
                           (LabelPropagation, {
                               'kernel':
                               lambda x, y: rbf_kernel(x, y, gamma=20)
                           })]
Пример #25
0
    def testInputTileable(self):
        def f(t, x):
            return (t * x).sum().to_numpy()

        rs = np.random.RandomState(0)
        raw = rs.rand(5, 4)

        t1 = mt.tensor(raw, chunk_size=3)
        t2 = t1.sum(axis=0)
        s = spawn(f, args=(t2, 3))

        sess = new_session()
        sess._sess._executor = ExecutorForTest('numpy', storage=sess._context)

        result = s.execute(session=sess).fetch(session=sess)
        expected = (raw.sum(axis=0) * 3).sum()
        self.assertAlmostEqual(result, expected)
Пример #26
0
    def setUp(self):
        n_rows = 1000
        n_columns = 10
        chunk_size = 20
        rs = mt.random.RandomState(0)
        self.X = rs.rand(n_rows, n_columns, chunk_size=chunk_size)
        self.y = rs.rand(n_rows, chunk_size=chunk_size)
        self.X_df = md.DataFrame(self.X)
        x_sparse = np.random.rand(n_rows, n_columns)
        x_sparse[np.arange(n_rows),
                 np.random.randint(n_columns, size=n_rows)] = np.nan
        self.X_sparse = mt.tensor(
            x_sparse, chunk_size=chunk_size).tosparse(missing=np.nan)

        self.session = new_session().as_default()
        self._old_executor = self.session._sess._executor
        self.executor = self.session._sess._executor = \
            ExecutorForTest('numpy', storage=self.session._sess._context)
Пример #27
0
    def setUp(self) -> None:
        self.session = new_session().as_default()
        self._old_executor = self.session._sess._executor
        self.executor = self.session._sess._executor = \
            ExecutorForTest('numpy', storage=self.session._sess._context)

        rng = mt.random.RandomState(0)
        self.n_features = n_features = 30
        self.n_samples = n_samples = 1000
        offsets = rng.uniform(-1, 1, size=n_features)
        scales = rng.uniform(1, 10, size=n_features)
        self.X_2d = X_2d = rng.randn(n_samples, n_features) * scales + offsets
        self.X_1row = X_1row = X_2d[0, :].reshape(1, n_features)
        self.X_1col = X_1col = X_2d[:, 0].reshape(n_samples, 1)
        self.X_list_1row = X_1row.to_numpy().tolist()
        self.X_list_1col = X_1col.to_numpy().tolist()

        self.iris = mt.tensor(load_iris().data)
Пример #28
0
class TestGPUReduction(TestBase):
    def setUp(self):
        self.executor = ExecutorForTest()

    def testGPUExecution(self):
        df_raw = pd.DataFrame(np.random.rand(30, 3), columns=list('abc'))
        df = to_gpu(md.DataFrame(df_raw, chunk_size=6))

        r = df.sum()
        res = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(res.to_pandas(), df_raw.sum())

        r = df.kurt()
        res = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(res.to_pandas(), df_raw.kurt())

        r = df.agg(['sum', 'var'])
        res = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_frame_equal(res.to_pandas(),
                                      df_raw.agg(['sum', 'var']))

        s_raw = pd.Series(np.random.rand(30))
        s = to_gpu(md.Series(s_raw, chunk_size=6))

        r = s.sum()
        res = self.executor.execute_dataframe(r, concat=True)[0]
        self.assertAlmostEqual(res, s_raw.sum())

        r = s.kurt()
        res = self.executor.execute_dataframe(r, concat=True)[0]
        self.assertAlmostEqual(res, s_raw.kurt())

        r = s.agg(['sum', 'var'])
        res = self.executor.execute_dataframe(r, concat=True)[0]
        pd.testing.assert_series_equal(res.to_pandas(),
                                       s_raw.agg(['sum', 'var']))

        s_raw = pd.Series(
            np.random.randint(0, 3, size=(30, )) *
            np.random.randint(0, 5, size=(30, )))
        s = to_gpu(md.Series(s_raw, chunk_size=6))

        r = s.unique()
        res = self.executor.execute_dataframe(r, concat=True)[0]
        np.testing.assert_array_equal(
            cp.asnumpy(res).sort(),
            s_raw.unique().sort())
Пример #29
0
    def testUnknownShapeInputs(self):
        def f(t, x):
            assert all(not np.isnan(s) for s in t.shape)
            return (t * x).sum().to_numpy(check_nsplits=False)

        rs = np.random.RandomState(0)
        raw = rs.rand(5, 4)

        t1 = mt.tensor(raw, chunk_size=3)
        t2 = t1[t1 > 0]
        s = spawn(f, args=(t2, 3))

        sess = new_session()
        sess._sess._executor = ExecutorForTest('numpy', storage=sess._context)

        result = s.execute(session=sess).fetch(session=sess)
        expected = (raw[raw > 0] * 3).sum()
        self.assertAlmostEqual(result, expected)
Пример #30
0
    def setUp(self):
        # Make an X that looks somewhat like a small tf-idf matrix.
        # XXX newer versions of SciPy >0.16 have scipy.sparse.rand for this.
        shape = 60, 55
        n_samples, n_features = shape
        rng = check_random_state(42)
        X = rng.randint(-100, 20, np.product(shape)).reshape(shape)
        X = sp.csr_matrix(np.maximum(X, 0), dtype=np.float64)
        X.data[:] = 1 + np.log(X.data)
        self.X = X
        self.Xdense = X.A
        self.n_samples = n_samples
        self.n_features = n_features

        self.session = new_session().as_default()
        self._old_executor = self.session._sess._executor
        self.executor = self.session._sess._executor = \
            ExecutorForTest('numpy', storage=self.session._sess._context)